fix google search - using api
This commit is contained in:
parent
7c0dd39227
commit
3d114d1a76
@ -190,9 +190,21 @@ async def bad_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> Non
|
|||||||
#------------------------- webagent --------------------------#
|
#------------------------- webagent --------------------------#
|
||||||
import schedule
|
import schedule
|
||||||
import time
|
import time
|
||||||
from agents.webagent import run_web_agent
|
from agents.webagent import run_web_agent, save_data
|
||||||
|
|
||||||
|
async def run_web_agent_and_process_result(topic, folder):
|
||||||
|
print(f"Running web agent for topic: {topic}")
|
||||||
|
news_data = run_web_agent(topic, folder)
|
||||||
|
save_data(news_data, folder)
|
||||||
|
# summary = summarize_data(news_data)
|
||||||
|
|
||||||
|
# with open(os.path.join(folder, "summary_log.txt"), 'a') as log_file:
|
||||||
|
# log_file.write(f"{datetime.now()}: {summary}\n")
|
||||||
|
|
||||||
|
# Process the result immediately after obtaining it
|
||||||
|
user_message = f"New data received: {news_data}"
|
||||||
|
query_result = await query_llm(user_message)
|
||||||
|
# Process the query_result as needed
|
||||||
|
|
||||||
async def async_main():
|
async def async_main():
|
||||||
# Assuming this is your asynchronous main function with its full details
|
# Assuming this is your asynchronous main function with its full details
|
||||||
@ -205,22 +217,17 @@ async def async_main():
|
|||||||
def sync_main():
|
def sync_main():
|
||||||
# Synchronous part for scheduling
|
# Synchronous part for scheduling
|
||||||
topic = "tesla news"
|
topic = "tesla news"
|
||||||
interval = 1 # in minutes
|
interval = 8 # in minutes
|
||||||
folder = "agent-py-bot/scrape/raw"
|
folder = "agent-py-bot/scrape/raw"
|
||||||
|
|
||||||
schedule.every(interval).minutes.do(run_web_agent, topic=topic, folder=folder)
|
# schedule.every(interval).minutes.do(run_web_agent_and_process_result, topic=topic, folder=folder)
|
||||||
|
schedule.every(interval).hours.do(run_web_agent_and_process_result, topic=topic, folder=folder)
|
||||||
# Run once at the start
|
# Run once at the start
|
||||||
news_json = run_web_agent(topic=topic, folder=folder)
|
news_json = await run_web_agent_and_process_result(topic=topic, folder=folder)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
schedule.run_pending()
|
schedule.run_pending()
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
# Check if there's new data obtained from web agent
|
|
||||||
new_data, new_summary = run_web_agent(topic=topic, folder=folder)
|
|
||||||
|
|
||||||
# Use the new data to call the async function
|
|
||||||
user_message = f"New data received: {new_data}"
|
|
||||||
query_result = query_llm(user_message)
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
|
@ -2,7 +2,7 @@ import requests
|
|||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
import feedparser
|
import feedparser
|
||||||
|
|
||||||
def search_duckduckgo(topic):
|
def search_duckduckgo(topic):
|
||||||
@ -20,10 +20,25 @@ def search_duckduckgo(topic):
|
|||||||
|
|
||||||
return page_text + page_text2
|
return page_text + page_text2
|
||||||
|
|
||||||
def search_newsapi(topic, api_key):
|
def search_newsapi(topic, api_key, from_param=None):
|
||||||
url = f"https://newsapi.org/v2/everything?q={topic}&apiKey={api_key}"
|
endpoint = "https://newsapi.org/v2/everything"
|
||||||
response = requests.get(url)
|
|
||||||
|
# Set up parameters including your API key and query parameters
|
||||||
|
params = {
|
||||||
|
'apiKey': api_key,
|
||||||
|
'q': topic,
|
||||||
|
'from': from_param, # Specify the date in the format "YYYY-MM-DD"
|
||||||
|
'sortBy': 'publishedAt',
|
||||||
|
'language': 'en',
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add 'from' parameter only if 'from_param' is provided
|
||||||
|
if from_param:
|
||||||
|
params['from'] = from_param
|
||||||
|
|
||||||
|
response = requests.get(endpoint, params=params)
|
||||||
articles = response.json().get('articles', [])
|
articles = response.json().get('articles', [])
|
||||||
|
headlines = [article.get('title', '') for article in articles]
|
||||||
return articles
|
return articles
|
||||||
|
|
||||||
def parse_rss_feed(feed_url):
|
def parse_rss_feed(feed_url):
|
||||||
@ -51,7 +66,7 @@ def search_google_news(topic):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_google_search_results(query):
|
def get_google_search_results_old_requiresLogin(query):
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
|
||||||
|
|
||||||
@ -61,7 +76,6 @@ def get_google_search_results(query):
|
|||||||
# return page_text
|
# return page_text
|
||||||
# else:
|
# else:
|
||||||
# return f"Error: {response.status_code}"
|
# return f"Error: {response.status_code}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = requests.get(f"https://www.google.com/search?q={query}", headers=headers)
|
response = requests.get(f"https://www.google.com/search?q={query}", headers=headers)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
@ -75,13 +89,76 @@ def get_google_search_results(query):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
return f"Parsing Error: {e}"
|
return f"Parsing Error: {e}"
|
||||||
|
|
||||||
|
|
||||||
|
def google_search_api_headlines(query, api_key, cx, daterange=None):
|
||||||
|
try:
|
||||||
|
# Set up the API endpoint
|
||||||
|
endpoint = "https://www.googleapis.com/customsearch/v1"
|
||||||
|
|
||||||
|
# Set up parameters including your API key and custom search engine ID (cx)
|
||||||
|
params = {
|
||||||
|
'key': api_key,
|
||||||
|
'cx': cx,
|
||||||
|
'q': query
|
||||||
|
}
|
||||||
|
|
||||||
|
# Add 'dateRestrict' parameter if provided
|
||||||
|
if daterange:
|
||||||
|
params['dateRestrict'] = daterange
|
||||||
|
|
||||||
|
# Make the request to the Custom Search API
|
||||||
|
response = requests.get(endpoint, params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# Parse the JSON response
|
||||||
|
search_results = response.json()
|
||||||
|
|
||||||
|
# Extract and return headlines from the response
|
||||||
|
items = search_results.get('items', [])
|
||||||
|
headlines = [item.get('title', '') for item in items]
|
||||||
|
return headlines
|
||||||
|
except Exception as e:
|
||||||
|
return f"API Request Error: {e}"
|
||||||
|
|
||||||
|
def get_news_api_results(query, api_key, from_param):
|
||||||
|
try:
|
||||||
|
# Set up the API endpoint
|
||||||
|
endpoint = "https://newsapi.org/v2/everything"
|
||||||
|
|
||||||
|
# Set up parameters including your API key and query parameters
|
||||||
|
params = {
|
||||||
|
'apiKey': api_key,
|
||||||
|
'q': query,
|
||||||
|
'from': from_param, # Specify the date in the format "YYYY-MM-DD"
|
||||||
|
'sortBy': 'publishedAt',
|
||||||
|
'language': 'en',
|
||||||
|
}
|
||||||
|
|
||||||
|
# Make the request to the News API
|
||||||
|
response = requests.get(endpoint, params=params)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# Parse the JSON response
|
||||||
|
news_results = response.json()
|
||||||
|
|
||||||
|
# Extract and return relevant information from the response
|
||||||
|
articles = news_results.get('articles', [])
|
||||||
|
headlines = [article.get('title', '') for article in articles]
|
||||||
|
return headlines
|
||||||
|
except Exception as e:
|
||||||
|
return f"API Request Error: {e}"
|
||||||
|
|
||||||
def search_news(topic):
|
def search_news(topic):
|
||||||
# DuckDuckGo Results
|
# DuckDuckGo Results
|
||||||
duck_results = search_duckduckgo(topic)
|
duck_results = search_duckduckgo(topic)
|
||||||
|
|
||||||
# NewsAPI Results
|
# NewsAPI Results
|
||||||
|
current_date = datetime.now()
|
||||||
|
from_date = current_date - timedelta(days=2)
|
||||||
|
from_param = from_date.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
newsapi_key = "44721311c40147ea9fe19080621cdb8a"
|
newsapi_key = "44721311c40147ea9fe19080621cdb8a"
|
||||||
newsapi_results = search_newsapi(topic, newsapi_key)
|
newsapi_results = search_newsapi(topic, newsapi_key, from_param)
|
||||||
|
|
||||||
# RSS Feed Results
|
# RSS Feed Results
|
||||||
rss_feeds = ['http://feeds.reuters.com/Reuters/worldNews',] # Add more RSS URLs
|
rss_feeds = ['http://feeds.reuters.com/Reuters/worldNews',] # Add more RSS URLs
|
||||||
@ -91,7 +168,18 @@ def search_news(topic):
|
|||||||
|
|
||||||
# Google News Results
|
# Google News Results
|
||||||
# google_results = search_google_news(topic)
|
# google_results = search_google_news(topic)
|
||||||
google_results = get_google_search_results(topic)
|
# google_results = get_google_search_results(topic)
|
||||||
|
|
||||||
|
# # //t-air: AIzaSyBC5-h1-WFqwKeHhagB-I1pcjRVEkvUZp4 https://console.cloud.google.com/apis/credentials/key/15ab8371-c67b-4d3a-a9af-7106cb4015e5?authuser=0&project=t-air-1704714414235 cx=049ff6d98d29c4e67
|
||||||
|
api_key = "AIzaSyBC5-h1-WFqwKeHhagB-I1pcjRVEkvUZp4"
|
||||||
|
cx = "049ff6d98d29c4e67"
|
||||||
|
query = topic
|
||||||
|
daterange = "d1" # Specify the date range according to Google's search syntax
|
||||||
|
# d1: Past 24 hours
|
||||||
|
# w1: Past week
|
||||||
|
# daterange_8_hours = "h8"
|
||||||
|
# daterange_3_days = "d3"
|
||||||
|
google_results = google_search_api_headlines(query, api_key, cx, daterange)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"duckduckgo": duck_results,
|
"duckduckgo": duck_results,
|
||||||
@ -117,8 +205,8 @@ def summarize_data(data):
|
|||||||
def run_web_agent(topic, folder):
|
def run_web_agent(topic, folder):
|
||||||
print(f"Running web agent for topic: {topic}")
|
print(f"Running web agent for topic: {topic}")
|
||||||
news_data = search_news(topic)
|
news_data = search_news(topic)
|
||||||
save_data(news_data, folder)
|
# save_data(news_data, folder)
|
||||||
summary = summarize_data(news_data)
|
# summary = summarize_data(news_data)
|
||||||
with open(os.path.join(folder, "summary_log.txt"), 'a') as log_file:
|
# with open(os.path.join(folder, "summary_log.txt"), 'a') as log_file:
|
||||||
log_file.write(f"{datetime.now()}: {summary}\n")
|
# log_file.write(f"{datetime.now()}: {summary}\n")
|
||||||
return news_data
|
return news_data
|
Loading…
x
Reference in New Issue
Block a user