File: /var/www/html/calendar-planning/app/utils/generate_events_from_openai.py
import os
from dotenv import load_dotenv
from .scrap_website_content import extract_text_from_url
from .google_search import search_web
from .openai_utils import generate_events
from typing import List
from .token_counter import str_token_counter
load_dotenv()
api_key = os.getenv("SEARCH_GOOGLE_API_KEY")
cx = os.getenv("CUSTOM_SEARCH_ENGINE_ID")
def event_generator(categories:list, year:int, country:str) -> List[dict]:
"""Searches the web using google search, scrapes the data from each url and
generate events detail using OpenAI"""
event_results = []
for category in categories:
search_query = f"upcoming {category} events in {country} {year}"
search_results = search_web(
query = search_query,
api_key=api_key,
cx=cx,
num=5
)
if search_results['statusCode'] != 200:
print(f"Web search failed with status code : {search_results['statusCode']}")
return event_results
valid_urls = [url for url in search_results['data'] if search_results['statusCode'] == 200]
for url in valid_urls:
# print("url : ",url)
scrap_result = extract_text_from_url(url)
if scrap_result['statusCode'] != 200:
print(f"Scrapping failed for {scrap_result['url']} with status code : {scrap_result['statusCode']}")
else:
website_content = ""
token_count = str_token_counter(scrap_result['content'])
if token_count > 3000:
website_content = scrap_result['content'][:9000] # First 3k tokens only
else:
website_content = scrap_result['content']
user_prompt = f"Category: {category}\nYear: {year}\nLocation: {country}\n{website_content}"
gpt_response = generate_events(user_prompt)
for response in gpt_response.event_detail:
data = {
"url" : scrap_result['url'],
# "count" : gpt_response.count,
"date" : response.date ,#if response.date != 'NA' else None,
"event" : response.name,
"category" : gpt_response.category,
"location" : response.location #if response.location != 'NA' else None
}
event_results.append(data)
return event_results