gogo2/_doc/_notes/arti/neo4j.cql
2024-03-23 01:07:39 +02:00

83 lines
3.1 KiB
SQL

<!-- https://guides.neo4j.com/wiki -->
# Cypher Query Language
Runs a simple command to clean the database
MATCH (n) DETACH DELETE n
CREATE INDEX FOR (c:Category) ON (c.catId);
CREATE INDEX FOR (c:Category) ON (c.catName);
CREATE INDEX FOR (p:Page) ON (p.pageTitle);
CREATE (c:Category:RootCategory {catId: 0, catName: 'Databases', subcatsFetched: false, pagesFetched: false, level: 0});
<!-- install APOC library -->
RUN mkdir -p /var/lib/neo4j/plugins \
&& cd /var/lib/neo4j/plugins \
&& curl -L -O https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/download/4.4.0.0/apoc-4.4.0.0-all.jar
<!-- environment:
NEO4JLABS_PLUGINS: '["apoc"]'
NEO4J_apoc_export_file_enabled: 'true'
NEO4J_apoc_import_file_enabled: 'true'
NEO4J_apoc_import_file_use__neo4j__config: 'true'
NEO4J_dbms_security_procedures_unrestricted: apoc.* -->
<!-- UNWIND range(0,3) as level
CALL apoc.cypher.doit("
MATCH (c:Category { subcatsFetched: false, level: $level})
CALL apoc.load.json('https://en.wikipedia.org/w/api.php?format=json&action=query&list=categorymembers&cmtype=subcat&cmtitle=Category:' + apoc.text.urlencode(c.catName) + '&cmprop=ids%7Ctitle&cmlimit=500')
YIELD value as results
UNWIND results.query.categorymembers AS subcat
MERGE (sc:Category {catId: subcat.pageid})
ON CREATE SET sc.catName = substring(subcat.title,9),
sc.subcatsFetched = false,
sc.pagesFetched = false,
sc.level = $level + 1
WITH sc,c
CALL apoc.create.addLabels(sc,['Level' + ($level + 1) + 'Category']) YIELD node
MERGE (sc)-[:SUBCAT_OF]->(c)
WITH DISTINCT c
SET c.subcatsFetched = true", { level: level }) YIELD value
RETURN value -->
<!-- CALL {
LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/cats.csv?raw=true" AS row
CREATE (c:Category { catId: row[0]})
SET c.catName = row[2], c.pageCount = toInteger(row[3]), c.subcatCount = toInteger(row[4])
} IN TRANSACTIONS OF 10000 ROWS
CALL {
LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/cats.csv?raw=true" AS row
CREATE (c:Category { catId: row[0]})
SET c.catName = row[2], c.pageCount = toInteger(row[3]), c.subcatCount = toInteger(row[4])
} IN TRANSACTIONS OF 10000 ROWS -->
CALL {
LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/cats.csv?raw=true" AS row
CREATE (c:Category { catId: row[0]})
SET c.catName = row[2], c.pageCount = toInteger(row[3]), c.subcatCount = toInteger(row[4])
}
LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/rels.csv?raw=true" AS row
MATCH (from:Category { catId: row[0]})
MATCH (to:Category { catId: row[1]})
CREATE (from)-[:SUBCAT_OF]->(to)
<!-- stats -->
MATCH (c:Category)
return SUM(c.pageCount) AS `#pages categorised (with duplicates)`,
AVG(c.pageCount) AS `average #pages per cat`,
percentileCont(c.pageCount, 0.75) AS `.75p #pages in a cat`,
MIN(c.pageCount) AS `min #pages in a cat`,
MAX(c.pageCount) AS `max #pages in a cat`
MATCH (c:Category)
WHERE NOT (c)-[:SUBCAT_OF]-()
RETURN COUNT(c)
MATCH (c:Category)
WHERE c.catName CONTAINS '{term}'
RETURN c;