agent scraper
This commit is contained in:
parent
73803096b0
commit
317a876ec4
2
.gitignore
vendored
2
.gitignore
vendored
@ -1,3 +1,5 @@
|
||||
node_modules/
|
||||
package-lock.json
|
||||
rec/*
|
||||
*/__pycache__/*
|
||||
__pycache__
|
@ -3,9 +3,7 @@ import asyncio, nest_asyncio
|
||||
from telegram import Bot, Message, Update
|
||||
from telegram.constants import ParseMode
|
||||
from telegram.ext import Application, CommandHandler, MessageHandler, filters, ContextTypes, CallbackContext
|
||||
|
||||
# import "gopkg.in/telebot.v3/middleware"
|
||||
|
||||
import os
|
||||
import requests
|
||||
import json
|
||||
import base64
|
||||
@ -33,10 +31,7 @@ DEVELOPER_CHAT_ID = "777826553"
|
||||
# LLM API Endpoint
|
||||
LLM_ENDPOINT = "http://192.168.0.11:11434/api/chat"
|
||||
|
||||
#! Selenium WebDriver setup for screenshots
|
||||
#chrome_options = Options()
|
||||
#chrome_options.add_argument("--headless")
|
||||
#driver = webdriver.Chrome(options=chrome_options)
|
||||
APPEND_RESULTS = os.getenv('APPEND_RESULTS', 'True') == 'True'
|
||||
|
||||
async def start(update: Update, context: CallbackContext):
|
||||
await context.bot.send_message(chat_id=update.effective_chat.id, text="Hi! I'm your AI bot. Ask me aything with /ask")
|
||||
@ -78,7 +73,11 @@ async def ok(update: Update, context: CallbackContext):
|
||||
context.chat_data['messages'] = []
|
||||
await update.message.reply_text("Exiting ask mode.")
|
||||
|
||||
#https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion
|
||||
# CODE RUNNER
|
||||
import re
|
||||
from agents.runner import execute_python_code
|
||||
|
||||
#https://github.com/jmorganca/ollama/blob/main/docs/api.md#generate-a-completion
|
||||
async def query_llm(user_message):
|
||||
"""Query the LLM with the user's message."""
|
||||
data = {
|
||||
@ -95,7 +94,22 @@ async def query_llm(user_message):
|
||||
logger.error(f"LLM Error: {error_message}")
|
||||
# Return a user-friendly error message
|
||||
return "Sorry, there was an error processing your request."
|
||||
return response_data.get('message', {}).get('content', 'No response from AI')
|
||||
# handle response
|
||||
content = response_data.get('message', {}).get('content', 'No response')
|
||||
|
||||
# Find and execute all code blocks
|
||||
code_blocks = re.findall(r"```(.*?)```", content, re.DOTALL)
|
||||
for code in code_blocks:
|
||||
execution_result = execute_python_code(code.strip())
|
||||
if APPEND_RESULTS:
|
||||
# Append the result after the code block
|
||||
content = content.replace(f"```{code}```", f"```{code}```\n```{execution_result}```")
|
||||
else:
|
||||
# Replace the code block with its result
|
||||
content = content.replace(f"```{code}```", f"```{execution_result}```")
|
||||
|
||||
|
||||
return content
|
||||
else:
|
||||
logger.error(f"Error reaching LLM: {response.text}")
|
||||
return "Error: Unable to reach the AI agent."
|
||||
@ -172,10 +186,44 @@ async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE) -> N
|
||||
async def bad_command(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
||||
"""Raise an error to trigger the error handler."""
|
||||
await context.bot.wrong_method_name() # type: ignore[attr-defined]
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
#------------------------- webagent --------------------------#
|
||||
import schedule
|
||||
import time
|
||||
from agents.webagent import run_web_agent
|
||||
|
||||
|
||||
|
||||
async def async_main():
|
||||
# Assuming this is your asynchronous main function with its full details
|
||||
loop = asyncio.get_event_loop()
|
||||
if loop.is_running():
|
||||
loop.create_task(main())
|
||||
else:
|
||||
asyncio.run(main())
|
||||
await main()
|
||||
|
||||
def sync_main():
|
||||
# Synchronous part for scheduling
|
||||
topic = "TSLA"
|
||||
interval = 1 # in minutes
|
||||
folder = "agent-py-bot/scrape/raw"
|
||||
|
||||
schedule.every(interval).minutes.do(run_web_agent, topic=topic, folder=folder)
|
||||
# Run once at the start
|
||||
run_web_agent(topic=topic, folder=folder)
|
||||
|
||||
while True:
|
||||
schedule.run_pending()
|
||||
time.sleep(1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
# Run the asynchronous part
|
||||
if loop.is_running():
|
||||
loop.create_task(async_main())
|
||||
else:
|
||||
loop.run_until_complete(async_main())
|
||||
|
||||
# Run the synchronous part
|
||||
sync_main()
|
13
agent-py-bot/agents/runner.py
Normal file
13
agent-py-bot/agents/runner.py
Normal file
@ -0,0 +1,13 @@
|
||||
import subprocess
|
||||
import re
|
||||
|
||||
|
||||
def execute_python_code(code_block):
|
||||
try:
|
||||
result = subprocess.run(['python', '-c', code_block],
|
||||
capture_output=True, text=True, timeout=5)
|
||||
return result.stdout or result.stderr
|
||||
except Exception as e:
|
||||
return f"Execution error: {str(e)}"
|
||||
|
||||
|
35
agent-py-bot/agents/webagent.py
Normal file
35
agent-py-bot/agents/webagent.py
Normal file
@ -0,0 +1,35 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
import os
|
||||
import json
|
||||
from datetime import datetime
|
||||
|
||||
def search_news(topic):
|
||||
url = f"https://www.google.com/search?q={topic}"
|
||||
response = requests.get(url)
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
news_data = [] # Extract relevant information here
|
||||
return news_data
|
||||
|
||||
def save_data(data, folder):
|
||||
if not os.path.exists(folder):
|
||||
os.makedirs(folder)
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
file_path = os.path.join(folder, f"data_{timestamp}.json")
|
||||
|
||||
with open(file_path, 'w') as file:
|
||||
json.dump(data, file)
|
||||
|
||||
def summarize_data(data):
|
||||
summary = "Summarized information" # Replace with actual summarization logic
|
||||
return summary
|
||||
|
||||
def run_web_agent(topic, folder):
|
||||
print(f"Running web agent for topic: {topic}")
|
||||
news_data = search_news(topic)
|
||||
save_data(news_data, folder)
|
||||
summary = summarize_data(news_data)
|
||||
with open(os.path.join(folder, "summary_log.txt"), 'a') as log_file:
|
||||
log_file.write(f"{datetime.now()}: {summary}\n")
|
@ -20,4 +20,14 @@ make the telegram bot think while getting the response
|
||||
|
||||
/ask who are you
|
||||
/ai test
|
||||
/ask how to print numbers 1 to 100 in python? mark your answers in python code blocks
|
||||
/ask how to print numbers 1 to 100 in python? mark your answers in python code blocks
|
||||
|
||||
|
||||
|
||||
|
||||
# prompts
|
||||
in python, create an app that will search for a news about a specific topic on time based interval (it should be possible to also subscribe to updates). The information sources should be versatile and flexible - we can use web search engines, reddit, twitter, YouTube latest videos transcriptions and of course known news and info websites. All found information will be stored on separate folder and then we'll use LLM to summarize all of it in a simple trend. a log of all the summaries will be appended each interval
|
||||
|
||||
|
||||
|
||||
|
||||
|
@ -10,4 +10,11 @@ data = {
|
||||
# "content": "what is in this image?",
|
||||
# "images": ["iVBORw0KGgoAAAANSUhEUgAAAG0AAABmCAYAAADBPx+VAAAACXBIWXMAAAsTAAALEwEAmpwYAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAA3VSURBVHgB7Z27r0zdG8fX743i1bi1ikMoFMQloXRpKFFIqI7LH4BEQ+NWIkjQuSWCRIEoULk0gsK1kCBI0IhrQVT7tz/7zZo888yz1r7MnDl7z5xvsjkzs2fP3uu71nNfa7lkAsm7d++Sffv2JbNmzUqcc8m0adOSzZs3Z+/XES4ZckAWJEGWPiCxjsQNLWmQsWjRIpMseaxcuTKpG/7HP27I8P79e7dq1ars/yL4/v27S0ejqwv+cUOGEGGpKHR37tzJCEpHV9tnT58+dXXCJDdECBE2Ojrqjh071hpNECjx4cMHVycM1Uhbv359B2F79+51586daxN/+pyRkRFXKyRDAqxEp4yMlDDzXG1NPnnyJKkThoK0VFd1ELZu3TrzXKxKfW7dMBQ6bcuWLW2v0VlHjx41z717927ba22U9APcw7Nnz1oGEPeL3m3p2mTAYYnFmMOMXybPPXv2bNIPpFZr1NHn4HMw0KRBjg9NuRw95s8PEcz/6DZELQd/09C9QGq5RsmSRybqkwHGjh07OsJSsYYm3ijPpyHzoiacg35MLdDSIS/O1yM778jOTwYUkKNHWUzUWaOsylE00MyI0fcnOwIdjvtNdW/HZwNLGg+sR1kMepSNJXmIwxBZiG8tDTpEZzKg0GItNsosY8USkxDhD0Rinuiko2gfL/RbiD2LZAjU9zKQJj8RDR0vJBR1/Phx9+PHj9Z7REF4nTZkxzX4LCXHrV271qXkBAPGfP/atWvu/PnzHe4C97F48eIsRLZ9+3a3f/9+87dwP1JxaF7/3r17ba+5l4EcaVo0lj3SBq5kGTJSQmLWMjgYNei2GPT1MuMqGTDEFHzeQSP2wi/jGnkmPJ/nhccs44jvDAxpVcxnq0F6eT8h4ni/iIWpR5lPyA6ETkNXoSukvpJAD3AsXLiwpZs49+fPn5ke4j10TqYvegSfn0OnafC+Tv9ooA/JPkgQysqQNBzagXY55nO/oa1F7qvIPWkRL12WRpMWUvpVDYmxAPehxWSe8ZEXL20sadYIozfmNch4QJPAfeJgW3rNsnzphBKNJM2KKODo1rVOMRYik5ETy3ix4qWNI81qAAirizgMIc+yhTytx0JWZuNI03qsrgWlGtwjoS9XwgUhWGyhUaRZZQNNIEwCiXD16tXcAHUs79co0vSD8rrJCIW98pzvxpAWyyo3HYwqS0+H0BjStClcZJT5coMm6D2LOF8TolGJtK9fvyZpyiC5ePFi9nc/oJU4eiEP0jVoAnHa9wyJycITMP78+eMeP37sXrx44d6+fdt6f82aNdkx1pg9e3Zb5W+RSRE+n+VjksQWifvVaTKFhn5O8my63K8Qabdv33b379/PiAP//vuvW7BggZszZ072/+TJk91YgkafPn166zXB1rQHFvouAWHq9z3SEevSUerqCn2/dDCeta2jxYbr69evk4MHDyY7d+7MjhMnTiTPnz9Pfv/+nfQT2ggpO2dMF8cghuoM7Ygj5iWCqRlGFml0QC/ftGmTmzt3rmsaKDsgBSPh0/8yPeLLBihLkOKJc0jp8H8vUzcxIA1k6QJ/c78tWEyj5P3o4u9+jywNPdJi5rAH9x0KHcl4Hg570eQp3+vHXGyrmEeigzQsQsjavXt38ujRo44LQuDDhw+TW7duRS1HGgMxhNXHgflaNTOsHyKvHK5Ijo2jbFjJBQK9YwFd6RVMzfgRBmEfP37suBBm/p49e1qjEP2mwTViNRo0VJWH1deMXcNK08uUjVUu7s/zRaL+oLNxz1bpANco4npUgX4G2eFbpDFyQoQxojBCpEGSytmOH8qrH5Q9vuzD6ofQylkCUmh8DBAr+q8JCyVNtWQIidKQE9wNtLSQnS4jDSsxNHogzFuQBw4cyM61UKVsjfr3ooBkPSqqQHesUPWVtzi9/vQi1T+rJj7WiTz4Pt/l3LxUkr5P2VYZaZ4URpsE+st/dujQoaBBYokbrz/8TJNQYLSonrPS9kUaSkPeZyj1AWSj+d+VBoy1pIWVNed8P0Ll/ee5HdGRhrHhR5GGN0r4LGZBaj8oFDJitBTJzIZgFcmU0Y8ytWMZMzJOaXUSrUs5RxKnrxmbb5YXO9VGUhtpXldhEUogFr3IzIsvlpmdosVcGVGXFWp2oU9kLFL3dEkSz6NHEY1sjSRdIuDFWEhd8KxFqsRi1uM/nz9/zpxnwlESONdg6dKlbsaMGS4EHFHtjFIDHwKOo46l4TxSuxgDzi+rE2jg+BaFruOX4HXa0Nnf1lwAPufZeF8/r6zD97WK2qFnGjBxTw5qNGPxT+5T/r7/7RawFC3j4vTp09koCxkeHjqbHJqArmH5UrFKKksnxrK7FuRIs8STfBZv+luugXZ2pR/pP9Ois4z+TiMzUUkUjD0iEi1fzX8GmXyuxUBRcaUfykV0YZnlJGKQpOiGB76x5GeWkWWJc3mOrK6S7xdND+W5N6XyaRgtWJFe13GkaZnKOsYqGdOVVVbGupsyA/l7emTLHi7vwTdirNEt0qxnzAvBFcnQF16xh/TMpUuXHDowhlA9vQVraQhkudRdzOnK+04ZSP3DUhVSP61YsaLtd/ks7ZgtPcXqPqEafHkdqa84X6aCeL7YWlv6edGFHb+ZFICPlljHhg0bKuk0CSvVznWsotRu433alNdFrqG45ejoaPCaUkWERpLXjzFL2Rpllp7PJU2a/v7Ab8N05/9t27Z16KUqoFGsxnI9EosS2niSYg9SpU6B4JgTrvVW1flt1sT+0ADIJU2maXzcUTraGCRaL1Wp9rUMk16PMom8QhruxzvZIegJjFU7LLCePfS8uaQdPny4jTTL0dbee5mYokQsXTIWNY46kuMbnt8Kmec+LGWtOVIl9cT1rCB0V8WqkjAsRwta93TbwNYoGKsUSChN44lgBNCoHLHzquYKrU6qZ8lolCIN0Rh6cP0Q3U6I6IXILYOQI513hJaSKAorFpuHXJNfVlpRtmYBk1Su1obZr5dnKAO+L10Hrj3WZW+E3qh6IszE37F6EB+68mGpvKm4eb9bFrlzrok7fvr0Kfv727dvWRmdVTJHw0qiiCUSZ6wCK+7XL/AcsgNyL74DQQ730sv78Su7+t/A36MdY0sW5o40ahslXr58aZ5HtZB8GH64m9EmMZ7FpYw4T6QnrZfgenrhFxaSiSGXtPnz57e9TkNZLvTjeqhr734CNtrK41L40sUQckmj1lGKQ0rC37x544r8eNXRpnVE3ZZY7zXo8NomiO0ZUCj2uHz58rbXoZ6gc0uA+F6ZeKS/jhRDUq8MKrTho9fEkihMmhxtBI1DxKFY9XLpVcSkfoi8JGnToZO5sU5aiDQIW716ddt7ZLYtMQlhECdBGXZZMWldY5BHm5xgAroWj4C0hbYkSc/jBmggIrXJWlZM6pSETsEPGqZOndr2uuuR5rF169a2HoHPdurUKZM4CO1WTPqaDaAd+GFGKdIQkxAn9RuEWcTRyN2KSUgiSgF5aWzPTeA/lN5rZubMmR2bE4SIC4nJoltgAV/dVefZm72AtctUCJU2CMJ327hxY9t7EHbkyJFseq+EJSY16RPo3Dkq1kkr7+q0bNmyDuLQcZBEPYmHVdOBiJyIlrRDq41YPWfXOxUysi5fvtyaj+2BpcnsUV/oSoEMOk2CQGlr4ckhBwaetBhjCwH0ZHtJROPJkyc7UjcYLDjmrH7ADTEBXFfOYmB0k9oYBOjJ8b4aOYSe7QkKcYhFlq3QYLQhSidNmtS2RATwy8YOM3EQJsUjKiaWZ+vZToUQgzhkHXudb/PW5YMHD9yZM2faPsMwoc7RciYJXbGuBqJ1UIGKKLv915jsvgtJxCZDubdXr165mzdvtr1Hz5LONA8jrUwKPqsmVesKa49S3Q4WxmRPUEYdTjgiUcfUwLx589ySJUva3oMkP6IYddq6HMS4o55xBJBUeRjzfa4Zdeg56QZ43LhxoyPo7Lf1kNt7oO8wWAbNwaYjIv5lhyS7kRf96dvm5Jah8vfvX3flyhX35cuX6HfzFHOToS1H4BenCaHvO8pr8iDuwoUL7tevX+b5ZdbBair0xkFIlFDlW4ZknEClsp/TzXyAKVOmmHWFVSbDNw1l1+4f90U6IY/q4V27dpnE9bJ+v87QEydjqx/UamVVPRG+mwkNTYN+9tjkwzEx+atCm/X9WvWtDtAb68Wy9LXa1UmvCDDIpPkyOQ5ZwSzJ4jMrvFcr0rSjOUh+GcT4LSg5ugkW1Io0/SCDQBojh0hPlaJdah+tkVYrnTZowP8iq1F1TgMBBauufyB33x1v+NWFYmT5KmppgHC+NkAgbmRkpD3yn9QIseXymoTQFGQmIOKTxiZIWpvAatenVqRVXf2nTrAWMsPnKrMZHz6bJq5jvce6QK8J1cQNgKxlJapMPdZSR64/UivS9NztpkVEdKcrs5alhhWP9NeqlfWopzhZScI6QxseegZRGeg5a8C3Re1Mfl1ScP36ddcUaMuv24iOJtz7sbUjTS4qBvKmstYJoUauiuD3k5qhyr7QdUHMeCgLa1Ear9NquemdXgmum4fvJ6w1lqsuDhNrg1qSpleJK7K3TF0Q2jSd94uSZ60kK1e3qyVpQK6PVWXp2/FC3mp6jBhKKOiY2h3gtUV64TWM6wDETRPLDfSakXmH3w8g9Jlug8ZtTt4kVF0kLUYYmCCtD/DrQ5YhMGbA9L3ucdjh0y8kOHW5gU/VEEmJTcL4Pz/f7mgoAbYkAAAAAElFTkSuQmCC"],
|
||||
"stream": False
|
||||
}
|
||||
}
|
||||
|
||||
SYS: Write full run.py that can be executed in brand new environment
|
||||
USER: /ask how to print numbers 1 to 100 in python?
|
||||
|
||||
SYS: you are a AI agent that has different tools to work with. for example you can visit and read websites using the following format <a href="the url" request="text|snapshot">
|
||||
response will be in the same format with result tag: <a href="the url" request="text|snapshot" result="">. wait for the results in the next user message before answering.
|
||||
USER: what is the news today?
|
@ -19,3 +19,6 @@ trio-websocket==0.11.1
|
||||
urllib3==2.1.0
|
||||
wsproto==1.2.0
|
||||
nest_asyncio
|
||||
requests
|
||||
beautifulsoup4
|
||||
schedule
|
Loading…
x
Reference in New Issue
Block a user