From 317a876ec492d362ad4ffbd53727b36342693a40 Mon Sep 17 00:00:00 2001
From: Dobromir Popov <d-popov@abv.bg>
Date: Sat, 23 Dec 2023 00:41:19 +0000
Subject: [PATCH] agent scraper

---
 .gitignore                      |  2 +
 agent-py-bot/agent.py           | 72 +++++++++++++++++++++++++++------
 agent-py-bot/agents/runner.py   | 13 ++++++
 agent-py-bot/agents/webagent.py | 35 ++++++++++++++++
 agent-py-bot/cmd.sh             | 12 +++++-
 agent-py-bot/prompts.py         |  9 ++++-
 agent-py-bot/requirements.txt   |  3 ++
 7 files changed, 132 insertions(+), 14 deletions(-)
 create mode 100644 agent-py-bot/agents/runner.py
 create mode 100644 agent-py-bot/agents/webagent.py

diff --git a/.gitignore b/.gitignore
index f847d39..45e83d7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,5 @@
 node_modules/
 package-lock.json
 rec/*
+*/__pycache__/*
+__pycache__
\ No newline at end of file
diff --git a/agent-py-bot/agent.py b/agent-py-bot/agent.py
index 34cd32f..c3cf49a 100644
--- a/agent-py-bot/agent.py
+++ b/agent-py-bot/agent.py
@@ -3,9 +3,7 @@ import asyncio, nest_asyncio
 from telegram import Bot, Message, Update
 from telegram.constants import ParseMode
 from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes, CallbackContext
-
-# import "gopkg.in/telebot.v3/middleware"
-
+import os
 import requests
 import json
 import base64
@@ -33,10 +31,7 @@ DEVELOPER_CHAT_ID = "777826553"
 # LLM API Endpoint
 LLM_ENDPOINT = "http://192.168.0.11:11434/api/chat"
 
-#! Selenium WebDriver setup for screenshots
-#chrome_options = Options()
-#chrome_options.add_argument("--headless")
-#driver = webdriver.Chrome(options=chrome_options)
+APPEND_RESULTS = os.getenv('APPEND_RESULTS', 'True') == 'True'
 
 async def start(update: Update, context: CallbackContext):
     await context.bot.send_message(chat_id=update.effective_chat.id, text="Hi! I'm your AI bot. Ask me aything with /ask")
@@ -78,7 +73,11 @@ async def ok(update: Update, context: CallbackContext):
     context.chat_data['messages'] = []
     await update.message.reply_text("Exiting ask mode.")
 
-    #https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion
+#                CODE RUNNER
+import re
+from agents.runner import execute_python_code
+
+#https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion
 async def query_llm(user_message):
     """Query the LLM with the user's message."""
     data = {
@@ -95,7 +94,22 @@ async def query_llm(user_message):
                 logger.error(f"LLM Error: {error_message}")
                 # Return a user-friendly error message
                 return "Sorry, there was an error processing your request."
-            return response_data.get('message', {}).get('content', 'No response from AI')
+            # handle response
+            content = response_data.get('message', {}).get('content', 'No response')
+
+            # Find and execute all code blocks
+            code_blocks = re.findall(r"```(.*?)```", content, re.DOTALL)
+            for code in code_blocks:
+                execution_result = execute_python_code(code.strip())
+            if APPEND_RESULTS:
+                # Append the result after the code block
+                content = content.replace(f"```{code}```", f"```{code}```\n```{execution_result}```")
+            else:
+                # Replace the code block with its result
+                content = content.replace(f"```{code}```", f"```{execution_result}```")
+
+
+            return content
     else:    
         logger.error(f"Error reaching LLM: {response.text}")
         return "Error: Unable to reach the AI agent."
@@ -172,10 +186,44 @@ async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> N
 async def bad_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
     """Raise an error to trigger the error handler."""
     await context.bot.wrong_method_name()  # type: ignore[attr-defined]
-    
-if __name__ == '__main__':
+
+#------------------------- webagent --------------------------# 
+import schedule
+import time
+from agents.webagent import run_web_agent
+
+
+
+async def async_main():
+    # Assuming this is your asynchronous main function with its full details
     loop = asyncio.get_event_loop()
     if loop.is_running():
         loop.create_task(main())
     else:
-        asyncio.run(main())
+        await main()
+
+def sync_main():
+    # Synchronous part for scheduling
+    topic = "TSLA"
+    interval = 1  # in minutes
+    folder = "agent-py-bot/scrape/raw"
+
+    schedule.every(interval).minutes.do(run_web_agent, topic=topic, folder=folder)
+    # Run once at the start
+    run_web_agent(topic=topic, folder=folder)
+
+    while True:
+        schedule.run_pending()
+        time.sleep(1)
+
+if __name__ == '__main__':
+    loop = asyncio.get_event_loop()
+
+    # Run the asynchronous part
+    if loop.is_running():
+        loop.create_task(async_main())
+    else:
+        loop.run_until_complete(async_main())
+
+    # Run the synchronous part
+    sync_main()
\ No newline at end of file
diff --git a/agent-py-bot/agents/runner.py b/agent-py-bot/agents/runner.py
new file mode 100644
index 0000000..d165923
--- /dev/null
+++ b/agent-py-bot/agents/runner.py
@@ -0,0 +1,13 @@
+import subprocess
+import re
+
+
+def execute_python_code(code_block):
+    try:
+        result = subprocess.run(['python', '-c', code_block], 
+                                capture_output=True, text=True, timeout=5)
+        return result.stdout or result.stderr
+    except Exception as e:
+        return f"Execution error: {str(e)}"
+    
+    
\ No newline at end of file
diff --git a/agent-py-bot/agents/webagent.py b/agent-py-bot/agents/webagent.py
new file mode 100644
index 0000000..425cb46
--- /dev/null
+++ b/agent-py-bot/agents/webagent.py
@@ -0,0 +1,35 @@
+import requests
+from bs4 import BeautifulSoup
+import os
+import json
+from datetime import datetime
+
+def search_news(topic):
+    url = f"https://www.google.com/search?q={topic}"
+    response = requests.get(url)
+    soup = BeautifulSoup(response.text, 'html.parser')
+
+    news_data = [] # Extract relevant information here
+    return news_data
+
+def save_data(data, folder):
+    if not os.path.exists(folder):
+        os.makedirs(folder)
+    
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    file_path = os.path.join(folder, f"data_{timestamp}.json")
+
+    with open(file_path, 'w') as file:
+        json.dump(data, file)
+
+def summarize_data(data):
+    summary = "Summarized information"  # Replace with actual summarization logic
+    return summary
+
+def run_web_agent(topic, folder):
+    print(f"Running web agent for topic: {topic}")
+    news_data = search_news(topic)
+    save_data(news_data, folder)
+    summary = summarize_data(news_data)
+    with open(os.path.join(folder, "summary_log.txt"), 'a') as log_file:
+        log_file.write(f"{datetime.now()}: {summary}\n")
diff --git a/agent-py-bot/cmd.sh b/agent-py-bot/cmd.sh
index 20c3a5c..f7f52aa 100644
--- a/agent-py-bot/cmd.sh
+++ b/agent-py-bot/cmd.sh
@@ -20,4 +20,14 @@ make the telegram bot think while getting the response
 
 /ask who are you
 /ai test
-/ask how to print numbers 1 to 100 in python? mark your answers in python code blocks
\ No newline at end of file
+/ask how to print numbers 1 to 100 in python? mark your answers in python code blocks
+
+
+
+
+# prompts
+in python, create an app that will search for a news about a specific topic on time based interval (it should be possible to also subscribe to updates). The information sources should be versatile and flexible - we can use web search engines, reddit, twitter, YouTube latest videos transcriptions and of course known news and info websites. All found information will be stored on separate folder and then we'll use LLM to summarize all of it in a simple trend. a log of all the summaries will be appended each interval
+
+
+
+
diff --git a/agent-py-bot/prompts.py b/agent-py-bot/prompts.py
index 5844a87..fe077c1 100644
--- a/agent-py-bot/prompts.py
+++ b/agent-py-bot/prompts.py
@@ -10,4 +10,11 @@ data = {
 #        "content": "what is in this image?",
 #      "images": ["iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"],
         "stream": False
-}
\ No newline at end of file
+}
+
+SYS: Write full run.py that can be executed in brand new environment 
+USER: /ask how to print numbers 1 to 100 in python? 
+
+SYS: you are a AI agent that has different tools to work with. for example you can visit and read websites using the following format <a href="the url" request="text|snapshot">
+response will be in the same format with result tag: <a href="the url" request="text|snapshot" result="">. wait for the results in the next user message before answering.
+USER: what is the news today?
\ No newline at end of file
diff --git a/agent-py-bot/requirements.txt b/agent-py-bot/requirements.txt
index a193252..648b8ef 100644
--- a/agent-py-bot/requirements.txt
+++ b/agent-py-bot/requirements.txt
@@ -19,3 +19,6 @@ trio-websocket==0.11.1
 urllib3==2.1.0
 wsproto==1.2.0
 nest_asyncio
+requests
+beautifulsoup4
+schedule
\ No newline at end of file