reorganize2

2024-03-23 01:07:39 +02:00
parent 90f4537a4c
commit b7cafceccb
12 changed files with 1 additions and 0 deletions
--- a/_doc/_notes/arti/.excalidraw.svg
+++ b/_doc/_notes/arti/.excalidraw.svg
@@ -0,0 +1,21 @@
+<svg version="1.1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" width="20" height="20">
+  <!-- svg-source:excalidraw -->
+  <!-- payload-type:application/vnd.excalidraw+json --><!-- payload-version:2 --><!-- payload-start -->eyJ2ZXJzaW9uIjoiMSIsImVuY29kaW5nIjoiYnN0cmluZyIsImNvbXByZXNzZWQiOnRydWUsImVuY29kZWQiOiJ4nEWOMVx1MDAwYsIwXHUwMDEwhff+ilx1MDAxMldtwbEggu4uXHUwMDFkxeFIznqYNCG5tGrpfzeJgzdcdTAwMWO871x1MDAxZe/dUtW14LdD0dVcdTAwMDJfXHUwMDEyNClcdTAwMGaz2GY+oVx1MDAwZmTHdNpcdTAwMTdcdTAwMWRs9LI4XHUwMDFmzC50bWvAP5GdXHUwMDA2ic1EIYJcdTAwMGVcdTAwMWNcdTAwMTXZRlrTXHUwMDEyo1x0x7wvYPDgrFHsm3/JXHUwMDBlXHUwMDE1sfW/LtRocOSQ0q+3QsC5noFz35J0XCKDJ9XTJ5Mxar390YlwPoF8XHUwMDBl3sZRna1OkenFzb2MSKa15N1JY45f1mr9XHUwMDAy4lxcTtsifQ==<!-- payload-end -->
+  <defs>
+    <style class="style-fonts">
+      @font-face {
+        font-family: "Virgil";
+        src: url("https://open-vsx.org/vscode/asset/pomdtr/excalidraw-editor/3.7.3/Microsoft.VisualStudio.Code.WebResources/extension/public//dist/excalidraw-assets/Virgil.woff2");
+      }
+      @font-face {
+        font-family: "Cascadia";
+        src: url("https://open-vsx.org/vscode/asset/pomdtr/excalidraw-editor/3.7.3/Microsoft.VisualStudio.Code.WebResources/extension/public//dist/excalidraw-assets/Cascadia.woff2");
+      }
+      @font-face {
+        font-family: "Assistant";
+        src: url("https://open-vsx.org/vscode/asset/pomdtr/excalidraw-editor/3.7.3/Microsoft.VisualStudio.Code.WebResources/extension/public//dist/excalidraw-assets/Assistant-Regular.woff2");
+      }
+    </style>
+    
+  </defs>
+  <rect x="0" y="0" width="20" height="20" fill="#ffffff"></rect></svg>
--- a/_doc/_notes/arti/ideas.md
+++ b/_doc/_notes/arti/ideas.md
@@ -0,0 +1,32 @@
+Key features & principles:
+ - modal/plug & play design 
+ - Biomimicing based
+    - self inferencing loop
+    - Graph->LLM->Graph based logic (Self reflect)
+    - attention (Short term memory)
+    - generalized & contextuaized memory schema (memory is strongly context dependent and temporal)
+LLM module
+Graph module
+Short term memory module
+mid-term memory (history on the toppic)
+graph powered long term memory with embedding storage for skills & AII ( interface on some of the layers)
+separate text IOs"
+ - multi agent communication module/console/  
+ - internal state/context/mood/STM
+ - actions output
+
+
+
+ GRAPH schema
+
+ idea
+ - is child of
+
+ Q: Brainstorm neo4j schema for biomimicing memory storage as neo4j graph database. It should be similar to the way humans store, retrieve and generalize knowledge 
+
+Memory model:
+
+
+-- example prompt:
+you are an AI living native on a linux host. you have all local resources and installed software at your disposal: root console access, git repo and the internet as resources. you have your own codebase and will brainstorm and find ideas how to improve your reasoning and knowledge of the world. . To avoid messing up your health, you will prefer to install new packages and test ideas inside docker or virtual envs. You can test your ideas in a docker containers and can experiment with different ways to pipleline and organise the thought process. you will also explain each approach and promote the best containers. A local neo4j database is your memory and knowledge source. Whenever you are not certain about a toppic, or need to refresh your knowledge, you will check online for updated information. If the problem concerns npm, pip or other package or an exiting repository on github or other repository server, you will first update your internal database with the latest readme and usage documentaion from the repository authors and later search stackoverflow to update your skills. periodically you will reorgnize and tidy your "mmory" making it more easily accessible and updating the embedding vectors to allow easy access to the needed context when there is a problem to solve. You will build a tools library over time that wil lgelp you solve more complex problems and keep it in your memory together with a list of task completed with each tool. you will experiment with different pipelines for different tasks, memorise them and generalize and optimize existing pipelines over time to build a solid knowhow.
+let's create the project setup in python. our main file will be the agent, and it will call the llm and other tools (maybe langchain), explore, organize and improve when 'resting'. 
--- a/_doc/_notes/arti/neo4j.cql
+++ b/_doc/_notes/arti/neo4j.cql
@@ -0,0 +1,82 @@
+
+<!-- https://guides.neo4j.com/wiki -->
+# Cypher Query Language
+
+Runs a simple command to clean the database
+MATCH (n) DETACH DELETE n
+
+
+CREATE INDEX FOR (c:Category) ON (c.catId);
+CREATE INDEX FOR (c:Category) ON (c.catName);
+CREATE INDEX FOR (p:Page) ON (p.pageTitle);
+CREATE (c:Category:RootCategory {catId: 0, catName: 'Databases', subcatsFetched: false, pagesFetched: false, level: 0});
+
+<!-- install APOC library -->
+RUN mkdir -p /var/lib/neo4j/plugins \
+    && cd /var/lib/neo4j/plugins \
+    && curl -L -O https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/download/4.4.0.0/apoc-4.4.0.0-all.jar
+        <!-- environment:
+      NEO4JLABS_PLUGINS: '["apoc"]'
+      NEO4J_apoc_export_file_enabled: 'true'
+      NEO4J_apoc_import_file_enabled: 'true'
+      NEO4J_apoc_import_file_use__neo4j__config: 'true'
+      NEO4J_dbms_security_procedures_unrestricted: apoc.* -->
+
+<!-- UNWIND range(0,3) as level
+CALL apoc.cypher.doit("
+MATCH (c:Category { subcatsFetched: false, level: $level})
+CALL apoc.load.json('https://en.wikipedia.org/w/api.php?format=json&action=query&list=categorymembers&cmtype=subcat&cmtitle=Category:' + apoc.text.urlencode(c.catName) + '&cmprop=ids%7Ctitle&cmlimit=500')
+YIELD value as results
+UNWIND results.query.categorymembers AS subcat
+MERGE (sc:Category {catId: subcat.pageid})
+ON CREATE SET sc.catName = substring(subcat.title,9),
+              sc.subcatsFetched = false,
+              sc.pagesFetched = false,
+              sc.level = $level + 1
+WITH sc,c
+CALL apoc.create.addLabels(sc,['Level' +  ($level + 1) + 'Category']) YIELD node
+MERGE (sc)-[:SUBCAT_OF]->(c)
+WITH DISTINCT c
+SET c.subcatsFetched = true", { level: level }) YIELD value
+RETURN value -->
+
+<!-- CALL {
+  LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/cats.csv?raw=true" AS row
+  CREATE (c:Category { catId: row[0]})
+  SET c.catName = row[2], c.pageCount = toInteger(row[3]), c.subcatCount = toInteger(row[4])
+} IN TRANSACTIONS OF 10000 ROWS
+
+CALL {
+  LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/cats.csv?raw=true" AS row
+  CREATE (c:Category { catId: row[0]})
+  SET c.catName = row[2], c.pageCount = toInteger(row[3]), c.subcatCount = toInteger(row[4])
+} IN TRANSACTIONS OF 10000 ROWS -->
+
+CALL {
+  LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/cats.csv?raw=true" AS row
+  CREATE (c:Category { catId: row[0]})
+  SET c.catName = row[2], c.pageCount = toInteger(row[3]), c.subcatCount = toInteger(row[4])
+} 
+
+LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/rels.csv?raw=true" AS row
+MATCH (from:Category { catId: row[0]})
+MATCH (to:Category { catId: row[1]})
+CREATE (from)-[:SUBCAT_OF]->(to)
+
+ <!-- stats -->
+MATCH (c:Category)
+return SUM(c.pageCount) AS `#pages categorised (with duplicates)`,
+	   AVG(c.pageCount) AS `average #pages per cat`,
+       percentileCont(c.pageCount, 0.75) AS `.75p #pages in a cat`,
+	   MIN(c.pageCount) AS `min #pages in a cat`,
+       MAX(c.pageCount) AS `max #pages in a cat`
+
+MATCH (c:Category)
+WHERE NOT (c)-[:SUBCAT_OF]-()
+RETURN COUNT(c)
+
+
+
+MATCH (c:Category)
+WHERE c.catName CONTAINS '{term}'
+RETURN c;
--- a/_doc/_notes/arti/wikidata/import.sh
+++ b/_doc/_notes/arti/wikidata/import.sh
@@ -0,0 +1,33 @@
+{
+  /* the Wikidata JSON file */
+  "file": "./wikidata-dump.json",
+
+  /* neo4j connection details */
+  "neo4j": {
+    /* bolt protocol URI */
+    "bolt": "bolt://localhost",
+    "auth": {
+      "user": "neo4j",
+      "pass": "password"
+    }
+  },
+  /* Stages */
+  "do": {
+    /* database cleanup */
+    "0": true,
+    /* importing items and properties */
+    "1": true,
+    /* linking entities and generating claims */
+    "2": true
+  },
+  /* extra console output on stage 2 */
+  "verbose": false,
+  /* how many commands will be ran by the DB at a given time */
+  "concurrency": 4,
+  /* skip lines */
+  "skip": 0,
+  /* count of lines */
+  "lines": 21225524,
+  /* bucket size of entities sent to DB to process */
+  "bucket": 1000
+}