diff --git a/agent-py-bot/agent.py b/agent-py-bot/agent.py index c3cf49a..0bce74e 100644 --- a/agent-py-bot/agent.py +++ b/agent-py-bot/agent.py @@ -204,17 +204,23 @@ async def async_main(): def sync_main(): # Synchronous part for scheduling - topic = "TSLA" + topic = "tesla news" interval = 1 # in minutes folder = "agent-py-bot/scrape/raw" schedule.every(interval).minutes.do(run_web_agent, topic=topic, folder=folder) # Run once at the start - run_web_agent(topic=topic, folder=folder) + news_json = run_web_agent(topic=topic, folder=folder) while True: schedule.run_pending() time.sleep(1) + # Check if there's new data obtained from web agent + new_data, new_summary = run_web_agent(topic=topic, folder=folder) + + # Use the new data to call the async function + user_message = f"New data received: {new_data}" + query_result = query_llm(user_message) if __name__ == '__main__': loop = asyncio.get_event_loop() diff --git a/agent-py-bot/agents/webagent.py b/agent-py-bot/agents/webagent.py index 425cb46..3759a3e 100644 --- a/agent-py-bot/agents/webagent.py +++ b/agent-py-bot/agents/webagent.py @@ -3,14 +3,102 @@ from bs4 import BeautifulSoup import os import json from datetime import datetime +import feedparser + +def search_duckduckgo(topic): + # try with https://duckduckgo.com/?q=tesla&iar=news&ia=news + url = f"http://api.duckduckgo.com/?q={topic}&format=json" + response = requests.get(url) + #results = response.json().get('RelatedTopics', []) + + soup = BeautifulSoup(response.text, 'html.parser') + page_text = soup.get_text(separator='\n', strip=True) + + url = f"https://duckduckgo.com/?q={topic}&iar=news&ia=news" + soup = BeautifulSoup(response.text, 'html.parser') + page_text2 = soup.get_text(separator='\n', strip=True) + + return page_text + page_text2 + +def search_newsapi(topic, api_key): + url = f"https://newsapi.org/v2/everything?q={topic}&apiKey={api_key}" + response = requests.get(url) + articles = response.json().get('articles', []) + return articles + +def parse_rss_feed(feed_url): + feed = feedparser.parse(feed_url) + articles = [{'title': entry.title, 'link': entry.link} for entry in feed.entries] + return articles + +from selenium import webdriver +from selenium.webdriver.chrome.options import Options + +def search_google_news(topic): + options = Options() + options.headless = True + driver = webdriver.Chrome(options=options) + + try: + driver.get(f"https://www.google.com/search?q={topic}&tbm=nws") + # Code to accept cookies or terms goes here + + soup = BeautifulSoup(driver.page_source, 'html.parser') + page_text = soup.get_text(separator='\n', strip=True) + return page_text + finally: + driver.quit() + + + +def get_google_search_results(query): + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} + + # if response.status_code == 200: + # soup = BeautifulSoup(response.text, 'html.parser') + # page_text = soup.get_text(separator='\n', strip=True) + # return page_text + # else: + # return f"Error: {response.status_code}" + + try: + response = requests.get(f"https://www.google.com/search?q={query}", headers=headers) + response.raise_for_status() + + # Ensure the correct character set is used + response.encoding = response.apparent_encoding + + soup = BeautifulSoup(response.text, 'html.parser') + page_text = soup.get_text(separator='\n', strip=True) + return page_text + except Exception as e: + return f"Parsing Error: {e}" def search_news(topic): - url = f"https://www.google.com/search?q={topic}" - response = requests.get(url) - soup = BeautifulSoup(response.text, 'html.parser') + # DuckDuckGo Results + duck_results = search_duckduckgo(topic) - news_data = [] # Extract relevant information here - return news_data + # NewsAPI Results + newsapi_key = "44721311c40147ea9fe19080621cdb8a" + newsapi_results = search_newsapi(topic, newsapi_key) + + # RSS Feed Results + rss_feeds = ['http://feeds.reuters.com/Reuters/worldNews',] # Add more RSS URLs + rss_results = [] + for feed_url in rss_feeds: + rss_results.extend(parse_rss_feed(feed_url)) + + # Google News Results + # google_results = search_google_news(topic) + google_results = get_google_search_results(topic) + + return { + "duckduckgo": duck_results, + "newsapi": newsapi_results, + "rss": rss_results, + "google": google_results + } def save_data(data, folder): if not os.path.exists(folder): @@ -33,3 +121,4 @@ def run_web_agent(topic, folder): summary = summarize_data(news_data) with open(os.path.join(folder, "summary_log.txt"), 'a') as log_file: log_file.write(f"{datetime.now()}: {summary}\n") + return news_data \ No newline at end of file diff --git a/agent-py-bot/scrape/raw/data_20231225_170201.json b/agent-py-bot/scrape/raw/data_20231225_170201.json new file mode 100644 index 0000000..3d0e911 --- /dev/null +++ b/agent-py-bot/scrape/raw/data_20231225_170201.json @@ -0,0 +1 @@ +{"duckduckgo": "{\"Abstract\":\"\",\"AbstractSource\":\"\",\"AbstractText\":\"\",\"AbstractURL\":\"\",\"Answer\":\"\",\"AnswerType\":\"\",\"Definition\":\"\",\"DefinitionSource\":\"\",\"DefinitionURL\":\"\",\"Entity\":\"\",\"Heading\":\"\",\"Image\":\"\",\"ImageHeight\":\"\",\"ImageIsLogo\":\"\",\"ImageWidth\":\"\",\"Infobox\":\"\",\"Redirect\":\"\",\"RelatedTopics\":[],\"Results\":[],\"Type\":\"\",\"meta\":{\"attribution\":null,\"blockgroup\":null,\"created_date\":\"2021-03-24\",\"description\":\"testing\",\"designer\":null,\"dev_date\":\"2021-03-24\",\"dev_milestone\":\"development\",\"developer\":[{\"name\":\"zt\",\"type\":\"duck.co\",\"url\":\"https://duck.co/user/zt\"}],\"example_query\":\"\",\"id\":\"just_another_test\",\"is_stackexchange\":0,\"js_callback_name\":\"another_test\",\"live_date\":null,\"maintainer\":{\"github\":\"\"},\"name\":\"Just Another Test\",\"perl_module\":\"DDG::Lontail::AnotherTest\",\"producer\":null,\"production_state\":\"offline\",\"repo\":\"fathead\",\"signal_from\":\"just_another_test\",\"src_domain\":\"how about there\",\"src_id\":null,\"src_name\":\"hi there\",\"src_options\":{\"directory\":\"\",\"is_fanon\":0,\"is_mediawiki\":0,\"is_wikipedia\":0,\"language\":\"\",\"min_abstract_length\":null,\"skip_abstract\":0,\"skip_abstract_paren\":0,\"skip_icon\":0,\"skip_image_name\":0,\"skip_qr\":\"\",\"src_info\":\"\",\"src_skip\":\"\"},\"src_url\":\"Hello there\",\"status\":null,\"tab\":\"is this source\",\"topic\":[],\"unsafe\":null}}{\"Abstract\":\"\",\"AbstractSource\":\"\",\"AbstractText\":\"\",\"AbstractURL\":\"\",\"Answer\":\"\",\"AnswerType\":\"\",\"Definition\":\"\",\"DefinitionSource\":\"\",\"DefinitionURL\":\"\",\"Entity\":\"\",\"Heading\":\"\",\"Image\":\"\",\"ImageHeight\":\"\",\"ImageIsLogo\":\"\",\"ImageWidth\":\"\",\"Infobox\":\"\",\"Redirect\":\"\",\"RelatedTopics\":[],\"Results\":[],\"Type\":\"\",\"meta\":{\"attribution\":null,\"blockgroup\":null,\"created_date\":\"2021-03-24\",\"description\":\"testing\",\"designer\":null,\"dev_date\":\"2021-03-24\",\"dev_milestone\":\"development\",\"developer\":[{\"name\":\"zt\",\"type\":\"duck.co\",\"url\":\"https://duck.co/user/zt\"}],\"example_query\":\"\",\"id\":\"just_another_test\",\"is_stackexchange\":0,\"js_callback_name\":\"another_test\",\"live_date\":null,\"maintainer\":{\"github\":\"\"},\"name\":\"Just Another Test\",\"perl_module\":\"DDG::Lontail::AnotherTest\",\"producer\":null,\"production_state\":\"offline\",\"repo\":\"fathead\",\"signal_from\":\"just_another_test\",\"src_domain\":\"how about there\",\"src_id\":null,\"src_name\":\"hi there\",\"src_options\":{\"directory\":\"\",\"is_fanon\":0,\"is_mediawiki\":0,\"is_wikipedia\":0,\"language\":\"\",\"min_abstract_length\":null,\"skip_abstract\":0,\"skip_abstract_paren\":0,\"skip_icon\":0,\"skip_image_name\":0,\"skip_qr\":\"\",\"src_info\":\"\",\"src_skip\":\"\"},\"src_url\":\"Hello there\",\"status\":null,\"tab\":\"is this source\",\"topic\":[],\"unsafe\":null}}", "newsapi": [{"source": {"id": "the-verge", "name": "The Verge"}, "author": "Andrew J. Hawkins", "title": "Tesla Cybertruck will usher in a new \u2018Powershare\u2019 bidirectional charging feature", "description": "Tesla\u2019s Cybertruck will be the company\u2019s first vehicle to feature vehicle-to-load, or bidirectional charging. That allows customers to charge equipment, another EV, or even power their whole home from their Cybertruck.", "url": "https://www.theverge.com/2023/11/30/23983226/tesla-cybertruck-powershare-bidirectional-vehicle-to-load", "urlToImage": "https://cdn.vox-cdn.com/thumbor/b8pqGPSF6FhbjfA_Uv-DGznEBR4=/0x0:2226x948/1200x628/filters:focal(1113x474:1114x475)/cdn.vox-cdn.com/uploads/chorus_asset/file/25123625/Screen_Shot_2023_11_30_at_4.17.14_PM.png", "publishedAt": "2023-11-30T21:47:26Z", "content": "Tesla Cybertruck will usher in a new Powershare bidirectional charging feature\r\nTesla Cybertruck will usher in a new Powershare bidirectional charging feature\r\n / The EV maker finally jumps on the ve\u2026 [+2497 chars]"}, {"source": {"id": "the-verge", "name": "The Verge"}, "author": "Mia Sato", "title": "DealBook Summit 2023: Elon Musk, Bob Iger, and more", "description": "Live coverage of this year\u2019s event featuring Vice President Kamala Harris, Disney CEO Bob Iger, FTC Chair Lina Khan, Warner Bros. Discovery CEO David Zaslav, and Tesla CEO Elon Musk, among others.", "url": "https://www.theverge.com/2023/11/29/23980877/new-york-times-dealbook-summit-elon-musk-bob-iger-david-zaslav", "urlToImage": "https://cdn.vox-cdn.com/thumbor/wW2zI39ydY5ptMMdvjwcwrzhapc=/0x0:7324x4883/1200x628/filters:focal(3652x2539:3653x2540)/cdn.vox-cdn.com/uploads/chorus_asset/file/25119833/1445788868.jpg", "publishedAt": "2023-11-29T15:40:00Z", "content": "Every year, The New York Times DealBook Summit features a roster of major speakers sitting for interviews with Andrew Ross Sorkin. Last year, the conference gave us that disastrous Sam Bankman-Fried \u2026 [+723 chars]"}, {"source": {"id": "business-insider", "name": "Business Insider"}, "author": "Jyoti Mann", "title": "Elon Musk thinks Tesla has 'the best real-world AI,' but an ex-worker raised concerns about how it's powering the Autopilot feature", "description": "Whistleblower and former Tesla technician Lukasz Krupski told BBC News that he didn't think its self-driving feature, Autopilot, was ready to be used on public roads.", "url": "https://www.businessinsider.com/tesla-employee-self-driving-feature-autopilot-safety-elon-musk-2023-12", "urlToImage": "https://i.insider.com/656efaf858e7c0c29a2990ad?width=1200&format=jpeg", "publishedAt": "2023-12-05T11:40:28Z", "content": "Tesla CEO Elon Musk introducing the Cybertruck in November 2019.Frederic J. Brown/AFP via Getty Images\r\n