import requests from bs4 import BeautifulSoup import os import json from datetime import datetime, timedelta import feedparser def search_duckduckgo(topic): # try with https://duckduckgo.com/?q=tesla&iar=news&ia=news url = f"http://api.duckduckgo.com/?q={topic}&format=json" response = requests.get(url) #results = response.json().get('RelatedTopics', []) soup = BeautifulSoup(response.text, 'html.parser') page_text = soup.get_text(separator='\n', strip=True) url = f"https://duckduckgo.com/?q={topic}&iar=news&ia=news" soup = BeautifulSoup(response.text, 'html.parser') page_text2 = soup.get_text(separator='\n', strip=True) return page_text + page_text2 def search_newsapi(topic, api_key, from_param=None): endpoint = "https://newsapi.org/v2/everything" # Set up parameters including your API key and query parameters params = { 'apiKey': api_key, 'q': topic, 'from': from_param, # Specify the date in the format "YYYY-MM-DD" 'sortBy': 'publishedAt', 'language': 'en', } # Add 'from' parameter only if 'from_param' is provided if from_param: params['from'] = from_param response = requests.get(endpoint, params=params) articles = response.json().get('articles', []) headlines = [article.get('title', '') for article in articles] return articles def parse_rss_feed(feed_url): feed = feedparser.parse(feed_url) articles = [{'title': entry.title, 'link': entry.link} for entry in feed.entries] return articles import yfinance as yf from selenium import webdriver from selenium.webdriver.chrome.options import Options def fetch_stock_data(ticker, interval='1d', period='1mo'): stock = yf.Ticker(ticker) hist = stock.history(interval=interval, period=period) return hist def search_google_news(topic): options = Options() options.headless = True driver = webdriver.Chrome(options=options) try: driver.get(f"https://www.google.com/search?q={topic}&tbm=nws") # Code to accept cookies or terms goes here soup = BeautifulSoup(driver.page_source, 'html.parser') page_text = soup.get_text(separator='\n', strip=True) return page_text finally: driver.quit() def get_google_search_results_old_requiresLogin(query): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} # if response.status_code == 200: # soup = BeautifulSoup(response.text, 'html.parser') # page_text = soup.get_text(separator='\n', strip=True) # return page_text # else: # return f"Error: {response.status_code}" try: response = requests.get(f"https://www.google.com/search?q={query}", headers=headers) response.raise_for_status() # Ensure the correct character set is used response.encoding = response.apparent_encoding soup = BeautifulSoup(response.text, 'html.parser') page_text = soup.get_text(separator='\n', strip=True) return page_text except Exception as e: return f"Parsing Error: {e}" def google_search_api_headlines(query, api_key, cx, daterange=None): try: # Set up the API endpoint endpoint = "https://www.googleapis.com/customsearch/v1" # Set up parameters including your API key and custom search engine ID (cx) params = { 'key': api_key, 'cx': cx, 'q': query } # Add 'dateRestrict' parameter if provided if daterange: params['dateRestrict'] = daterange # Make the request to the Custom Search API response = requests.get(endpoint, params=params) response.raise_for_status() # Parse the JSON response search_results = response.json() # Extract and return headlines from the response items = search_results.get('items', []) headlines = [item.get('title', '') for item in items] return headlines except Exception as e: return f"API Request Error: {e}" def get_news_api_results(query, api_key, from_param): try: # Set up the API endpoint endpoint = "https://newsapi.org/v2/everything" # Set up parameters including your API key and query parameters params = { 'apiKey': api_key, 'q': query, 'from': from_param, # Specify the date in the format "YYYY-MM-DD" 'sortBy': 'publishedAt', 'language': 'en', } # Make the request to the News API response = requests.get(endpoint, params=params) response.raise_for_status() # Parse the JSON response news_results = response.json() # Extract and return relevant information from the response articles = news_results.get('articles', []) headlines = [article.get('title', '') for article in articles] return headlines except Exception as e: return f"API Request Error: {e}" def search_tavily(topic, api_key): url = f"https://app.tavily.com/api/search?q={topic}" headers = { "Authorization": f"Bearer {api_key}" } response = requests.get(url, headers=headers) if response.status_code == 200: return response.json() else: return {"error": response.text} def search_news(topic): # DuckDuckGo Results duck_results = search_duckduckgo(topic) # NewsAPI Results current_date = datetime.now() from_date = current_date - timedelta(days=2) from_param = from_date.strftime('%Y-%m-%d') newsapi_key = "44721311c40147ea9fe19080621cdb8a" newsapi_results = search_newsapi(topic, newsapi_key, from_param) # RSS Feed Results rss_feeds = ['http://feeds.reuters.com/Reuters/worldNews',] # Add more RSS URLs rss_results = [] for feed_url in rss_feeds: rss_results.extend(parse_rss_feed(feed_url)) # Google News Results # google_results = search_google_news(topic) # google_results = get_google_search_results(topic) # # //t-air: AIzaSyBC5-h1-WFqwKeHhagB-I1pcjRVEkvUZp4 https://console.cloud.google.com/apis/credentials/key/15ab8371-c67b-4d3a-a9af-7106cb4015e5?authuser=0&project=t-air-1704714414235 cx=049ff6d98d29c4e67 api_key = "AIzaSyBC5-h1-WFqwKeHhagB-I1pcjRVEkvUZp4" cx = "049ff6d98d29c4e67" query = topic daterange = "d1" # Specify the date range according to Google's search syntax # d1: Past 24 hours # w1: Past week # daterange_8_hours = "h8" # daterange_3_days = "d3" google_results = google_search_api_headlines(query, api_key, cx, daterange) return { "duckduckgo": duck_results, "newsapi": newsapi_results, "rss": rss_results, "google": google_results } def save_data(data, folder): if not os.path.exists(folder): os.makedirs(folder) timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") file_path = os.path.join(folder, f"data_{timestamp}.json") with open(file_path, 'w') as file: json.dump(data, file) def summarize_data(data): summary = "Summarized information" # Replace with actual summarization logic return summary def run_web_agent(topic, folder): print(f"[{datetime.now()}] Running web agent for topic: {topic}") news_data = search_news(topic) tavily_api_key = "YOUR_TAVILY_API_KEY" tavily_results = search_tavily(topic, tavily_api_key) news_data["tavily"] = tavily_results return news_data