From 17d7316ef9739cd1475ea6bd984391f7d4734ebc Mon Sep 17 00:00:00 2001 From: Dobromir Popov Date: Mon, 8 Jan 2024 19:00:58 +0200 Subject: [PATCH] doing summary with AI --- .gitignore | 4 +- agent-py-bot/agent.py | 75 +++++++++++-------------- agent-py-bot/agents/webagent.py | 2 +- agent-py-bot/scrape/raw/summary_log.txt | 11 ---- 4 files changed, 37 insertions(+), 55 deletions(-) diff --git a/.gitignore b/.gitignore index 45e83d7..2c73ccf 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,6 @@ node_modules/ package-lock.json rec/* */__pycache__/* -__pycache__ \ No newline at end of file +__pycache__ +agent-py-bot/scrape/raw/summary_log.txt +agent-py-bot/scrape/raw/* \ No newline at end of file diff --git a/agent-py-bot/agent.py b/agent-py-bot/agent.py index f0fad9a..09f463a 100644 --- a/agent-py-bot/agent.py +++ b/agent-py-bot/agent.py @@ -11,6 +11,7 @@ from selenium import webdriver from selenium.webdriver.chrome.options import Options from io import BytesIO from PIL import Image +from datetime import datetime, timedelta # Apply nest_asyncio nest_asyncio.apply() @@ -78,13 +79,18 @@ import re from agents.runner import execute_python_code #https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion -async def query_llm(user_message): +async def query_llm(user_message, model=None): """Query the LLM with the user's message.""" + # use the model if provided, otherwise use the default llama2 + if model is None: + model = "llama2:latest" + data = { - "model": "llama2:latest", + "model": model, "messages": [{"role": "user", "content": user_message}], "stream": False } + response = requests.post(LLM_ENDPOINT, json=data) if response.status_code == 200: response_data = response.json() @@ -99,15 +105,15 @@ async def query_llm(user_message): # Find and execute all code blocks code_blocks = re.findall(r"```(.*?)```", content, re.DOTALL) - for code in code_blocks: - execution_result = execute_python_code(code.strip()) - if APPEND_RESULTS: - # Append the result after the code block - content = content.replace(f"```{code}```", f"```{code}```\n```{execution_result}```") - else: - # Replace the code block with its result - content = content.replace(f"```{code}```", f"```{execution_result}```") - + if code_blocks: + for code in code_blocks: + execution_result = execute_python_code(code.strip()) + if APPEND_RESULTS: + # Append the result after the code block + content = content.replace(f"```{code}```", f"```{code}```\n```{execution_result}```") + else: + # Replace the code block with its result + content = content.replace(f"```{code}```", f"```{execution_result}```") return content else: @@ -193,50 +199,35 @@ import time from agents.webagent import run_web_agent, save_data async def run_web_agent_and_process_result(topic, folder): - print(f"Running web agent for topic: {topic}") news_data = run_web_agent(topic, folder) - save_data(news_data, folder) # summary = summarize_data(news_data) # with open(os.path.join(folder, "summary_log.txt"), 'a') as log_file: # log_file.write(f"{datetime.now()}: {summary}\n") # Process the result immediately after obtaining it - user_message = f"New data received: {news_data}" - query_result = await query_llm(user_message) + user_message = f"Summarize these news and make sentiment analysis on each news and one overall: {news_data}" + + print(f"[{datetime.now()}] Doing sentiment analysis with AI model.") + start = time.time() + query_result = await query_llm(user_message, "openhermes") + print(f"[{datetime.now()}] AI call returned in {time.time() - start} seconds.") + news_data["summary"] = query_result + save_data(news_data, folder) + + with open(os.path.join(folder, "summary_log.txt"), 'a') as log_file: + log_file.write(f"{datetime.now()}: {query_result}\n") + # Process the query_result as needed async def async_main(): - # Assuming this is your asynchronous main function with its full details - loop = asyncio.get_event_loop() - if loop.is_running(): - loop.create_task(main()) - else: - await main() - -def sync_main(): - # Synchronous part for scheduling topic = "tesla news" - interval = 8 # in minutes + interval = 1 # in hours folder = "agent-py-bot/scrape/raw" - # schedule.every(interval).minutes.do(run_web_agent_and_process_result, topic=topic, folder=folder) - schedule.every(interval).hours.do(run_web_agent_and_process_result, topic=topic, folder=folder) - # Run once at the start - news_json = await run_web_agent_and_process_result(topic=topic, folder=folder) - while True: - schedule.run_pending() - time.sleep(1) + await run_web_agent_and_process_result(topic=topic, folder=folder) + await asyncio.sleep(interval * 60) # Convert hours to seconds if __name__ == '__main__': - loop = asyncio.get_event_loop() - - # Run the asynchronous part - if loop.is_running(): - loop.create_task(async_main()) - else: - loop.run_until_complete(async_main()) - - # Run the synchronous part - sync_main() \ No newline at end of file + asyncio.run(async_main()) \ No newline at end of file diff --git a/agent-py-bot/agents/webagent.py b/agent-py-bot/agents/webagent.py index 41e6e97..d60321a 100644 --- a/agent-py-bot/agents/webagent.py +++ b/agent-py-bot/agents/webagent.py @@ -203,7 +203,7 @@ def summarize_data(data): return summary def run_web_agent(topic, folder): - print(f"Running web agent for topic: {topic}") + print(f"[{datetime.now()}] Running web agent for topic: {topic}") news_data = search_news(topic) # save_data(news_data, folder) # summary = summarize_data(news_data) diff --git a/agent-py-bot/scrape/raw/summary_log.txt b/agent-py-bot/scrape/raw/summary_log.txt index a3879a5..e69de29 100644 --- a/agent-py-bot/scrape/raw/summary_log.txt +++ b/agent-py-bot/scrape/raw/summary_log.txt @@ -1,11 +0,0 @@ -2023-12-23 01:18:42.922812: Summarized information -2023-12-25 17:02:01.477567: Summarized information -2024-01-08 13:12:04.190959: Summarized information -2024-01-08 13:13:03.437567: Summarized information -2024-01-08 13:14:04.749784: Summarized information -2024-01-08 13:15:06.100403: Summarized information -2024-01-08 13:16:07.387491: Summarized information -2024-01-08 13:17:09.016139: Summarized information -2024-01-08 13:18:10.384559: Summarized information -2024-01-08 13:19:12.129203: Summarized information -2024-01-08 13:20:13.569597: Summarized information