HEX
Server: Apache/2.4.52 (Ubuntu)
System: Linux spn-python 5.15.0-89-generic #99-Ubuntu SMP Mon Oct 30 20:42:41 UTC 2023 x86_64
User: arjun (1000)
PHP: 8.1.2-1ubuntu2.20
Disabled: NONE
Upload Files
File: /var/www/html/calendar-planning/app/utils/generate_events_from_openai.py
import os
from dotenv import load_dotenv
from .scrap_website_content import extract_text_from_url
from .google_search import search_web
from .openai_utils import generate_events
from typing import List
from .token_counter import str_token_counter

load_dotenv()

api_key = os.getenv("SEARCH_GOOGLE_API_KEY")
cx = os.getenv("CUSTOM_SEARCH_ENGINE_ID")


def event_generator(categories:list, year:int, country:str) -> List[dict]:
    
    """Searches the web using google search, scrapes the data from each url and 
    generate events detail using OpenAI"""
    event_results = []
    for category in categories:
        
        search_query = f"upcoming {category} events in {country} {year}"
    
        search_results = search_web(
            query = search_query,
            api_key=api_key,
            cx=cx,
            num=5
        )
        
        if search_results['statusCode'] != 200:
            print(f"Web search failed with status code : {search_results['statusCode']}")
            return event_results
            
        valid_urls = [url for url in search_results['data'] if search_results['statusCode'] == 200]
        
        for url in valid_urls:
            # print("url : ",url)
            scrap_result = extract_text_from_url(url)
            
            if scrap_result['statusCode'] != 200:
                print(f"Scrapping failed for {scrap_result['url']} with status code : {scrap_result['statusCode']}")
            else:
                website_content = ""
                token_count = str_token_counter(scrap_result['content'])
                if token_count > 3000:
                    website_content = scrap_result['content'][:9000] # First 3k tokens only
                else:
                    website_content = scrap_result['content']
                user_prompt = f"Category: {category}\nYear: {year}\nLocation: {country}\n{website_content}"                            
                gpt_response = generate_events(user_prompt)
                
                for response in gpt_response.event_detail:
                    data = {
                    "url" : scrap_result['url'],
                    # "count" : gpt_response.count,
                    "date" : response.date ,#if response.date != 'NA' else None,
                    "event" : response.name,
                    "category" : gpt_response.category,
                    "location" : response.location #if response.location != 'NA' else None
                    }

                    event_results.append(data)
                    
    return event_results