memory node, neo4j grapph db

This commit is contained in:
Dobromir Popov
2024-03-20 08:54:14 +00:00
parent 97b0f9b64f
commit c9f77a6001
18 changed files with 625 additions and 0 deletions

View File

@ -0,0 +1,21 @@
<svg version="1.1" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 20 20" width="20" height="20">
<!-- svg-source:excalidraw -->
<!-- payload-type:application/vnd.excalidraw+json --><!-- payload-version:2 --><!-- payload-start -->eyJ2ZXJzaW9uIjoiMSIsImVuY29kaW5nIjoiYnN0cmluZyIsImNvbXByZXNzZWQiOnRydWUsImVuY29kZWQiOiJ4nEWOMVx1MDAwYsIwXHUwMDEwhff+ilx1MDAxMldtwbEggu4uXHUwMDFkxeFIznqYNCG5tGrpfzeJgzdcdTAwMWO871x1MDAxZe/dUtW14LdD0dVcdTAwMDJfXHUwMDEyNClcdTAwMGaz2GY+oVx1MDAwZmTHdNpcdTAwMTdcdTAwMWRs9LI4XHUwMDFmzC50bWvAP5GdXHUwMDA2ic1EIYJcdTAwMGVcdTAwMWNcdTAwMTXZRlrTXHUwMDEyo1x0x7wvYPDgrFHsm3/JXHUwMDBlXHUwMDE1sfW/LtRocOSQ0q+3QsC5noFz35J0XCKDJ9XTJ5Mxar390YlwPoF8XHUwMDBl3sZRna1OkenFzb2MSKa15N1JY45f1mr9XHUwMDAy4lxcTtsifQ==<!-- payload-end -->
<defs>
<style class="style-fonts">
@font-face {
font-family: "Virgil";
src: url("https://open-vsx.org/vscode/asset/pomdtr/excalidraw-editor/3.7.3/Microsoft.VisualStudio.Code.WebResources/extension/public//dist/excalidraw-assets/Virgil.woff2");
}
@font-face {
font-family: "Cascadia";
src: url("https://open-vsx.org/vscode/asset/pomdtr/excalidraw-editor/3.7.3/Microsoft.VisualStudio.Code.WebResources/extension/public//dist/excalidraw-assets/Cascadia.woff2");
}
@font-face {
font-family: "Assistant";
src: url("https://open-vsx.org/vscode/asset/pomdtr/excalidraw-editor/3.7.3/Microsoft.VisualStudio.Code.WebResources/extension/public//dist/excalidraw-assets/Assistant-Regular.woff2");
}
</style>
</defs>
<rect x="0" y="0" width="20" height="20" fill="#ffffff"></rect></svg>

After

Width:  |  Height:  |  Size: 1.6 KiB

27
_notes/arti/ideas.md Normal file
View File

@ -0,0 +1,27 @@
Key features & principles:
- modal/plug & play design
- Biomimicing based
- self inferencing loop
- Graph->LLM->Graph based logic (Self reflect)
- attention (Short term memory)
- generalized & contextuaized memory schema (memory is strongly context dependent and temporal)
LLM module
Graph module
Short term memory module
mid-term memory (history on the toppic)
graph powered long term memory with embedding storage for skills & AII ( interface on some of the layers)
separate text IOs"
- multi agent communication module/console/
- internal state/context/mood/STM
- actions output
GRAPH schema
idea
- is child of
Q: Brainstorm neo4j schema for biomimicing memory storage as neo4j graph database. It should be similar to the way humans store, retrieve and generalize knowledge
Memory model:

82
_notes/arti/neo4j.cql Normal file
View File

@ -0,0 +1,82 @@
<!-- https://guides.neo4j.com/wiki -->
# Cypher Query Language
Runs a simple command to clean the database
MATCH (n) DETACH DELETE n
CREATE INDEX FOR (c:Category) ON (c.catId);
CREATE INDEX FOR (c:Category) ON (c.catName);
CREATE INDEX FOR (p:Page) ON (p.pageTitle);
CREATE (c:Category:RootCategory {catId: 0, catName: 'Databases', subcatsFetched: false, pagesFetched: false, level: 0});
<!-- install APOC library -->
RUN mkdir -p /var/lib/neo4j/plugins \
&& cd /var/lib/neo4j/plugins \
&& curl -L -O https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/download/4.4.0.0/apoc-4.4.0.0-all.jar
<!-- environment:
NEO4JLABS_PLUGINS: '["apoc"]'
NEO4J_apoc_export_file_enabled: 'true'
NEO4J_apoc_import_file_enabled: 'true'
NEO4J_apoc_import_file_use__neo4j__config: 'true'
NEO4J_dbms_security_procedures_unrestricted: apoc.* -->
<!-- UNWIND range(0,3) as level
CALL apoc.cypher.doit("
MATCH (c:Category { subcatsFetched: false, level: $level})
CALL apoc.load.json('https://en.wikipedia.org/w/api.php?format=json&action=query&list=categorymembers&cmtype=subcat&cmtitle=Category:' + apoc.text.urlencode(c.catName) + '&cmprop=ids%7Ctitle&cmlimit=500')
YIELD value as results
UNWIND results.query.categorymembers AS subcat
MERGE (sc:Category {catId: subcat.pageid})
ON CREATE SET sc.catName = substring(subcat.title,9),
sc.subcatsFetched = false,
sc.pagesFetched = false,
sc.level = $level + 1
WITH sc,c
CALL apoc.create.addLabels(sc,['Level' + ($level + 1) + 'Category']) YIELD node
MERGE (sc)-[:SUBCAT_OF]->(c)
WITH DISTINCT c
SET c.subcatsFetched = true", { level: level }) YIELD value
RETURN value -->
<!-- CALL {
LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/cats.csv?raw=true" AS row
CREATE (c:Category { catId: row[0]})
SET c.catName = row[2], c.pageCount = toInteger(row[3]), c.subcatCount = toInteger(row[4])
} IN TRANSACTIONS OF 10000 ROWS
CALL {
LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/cats.csv?raw=true" AS row
CREATE (c:Category { catId: row[0]})
SET c.catName = row[2], c.pageCount = toInteger(row[3]), c.subcatCount = toInteger(row[4])
} IN TRANSACTIONS OF 10000 ROWS -->
CALL {
LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/cats.csv?raw=true" AS row
CREATE (c:Category { catId: row[0]})
SET c.catName = row[2], c.pageCount = toInteger(row[3]), c.subcatCount = toInteger(row[4])
}
LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/rels.csv?raw=true" AS row
MATCH (from:Category { catId: row[0]})
MATCH (to:Category { catId: row[1]})
CREATE (from)-[:SUBCAT_OF]->(to)
<!-- stats -->
MATCH (c:Category)
return SUM(c.pageCount) AS `#pages categorised (with duplicates)`,
AVG(c.pageCount) AS `average #pages per cat`,
percentileCont(c.pageCount, 0.75) AS `.75p #pages in a cat`,
MIN(c.pageCount) AS `min #pages in a cat`,
MAX(c.pageCount) AS `max #pages in a cat`
MATCH (c:Category)
WHERE NOT (c)-[:SUBCAT_OF]-()
RETURN COUNT(c)
MATCH (c:Category)
WHERE c.catName CONTAINS '{term}'
RETURN c;

View File

@ -0,0 +1,33 @@
{
/* the Wikidata JSON file */
"file": "./wikidata-dump.json",
/* neo4j connection details */
"neo4j": {
/* bolt protocol URI */
"bolt": "bolt://localhost",
"auth": {
"user": "neo4j",
"pass": "password"
}
},
/* Stages */
"do": {
/* database cleanup */
"0": true,
/* importing items and properties */
"1": true,
/* linking entities and generating claims */
"2": true
},
/* extra console output on stage 2 */
"verbose": false,
/* how many commands will be ran by the DB at a given time */
"concurrency": 4,
/* skip lines */
"skip": 0,
/* count of lines */
"lines": 21225524,
/* bucket size of entities sent to DB to process */
"bucket": 1000
}