83 lines
3.1 KiB
SQL
83 lines
3.1 KiB
SQL
|
|
<!-- https://guides.neo4j.com/wiki -->
|
|
# Cypher Query Language
|
|
|
|
Runs a simple command to clean the database
|
|
MATCH (n) DETACH DELETE n
|
|
|
|
|
|
CREATE INDEX FOR (c:Category) ON (c.catId);
|
|
CREATE INDEX FOR (c:Category) ON (c.catName);
|
|
CREATE INDEX FOR (p:Page) ON (p.pageTitle);
|
|
CREATE (c:Category:RootCategory {catId: 0, catName: 'Databases', subcatsFetched: false, pagesFetched: false, level: 0});
|
|
|
|
<!-- install APOC library -->
|
|
RUN mkdir -p /var/lib/neo4j/plugins \
|
|
&& cd /var/lib/neo4j/plugins \
|
|
&& curl -L -O https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/download/4.4.0.0/apoc-4.4.0.0-all.jar
|
|
<!-- environment:
|
|
NEO4JLABS_PLUGINS: '["apoc"]'
|
|
NEO4J_apoc_export_file_enabled: 'true'
|
|
NEO4J_apoc_import_file_enabled: 'true'
|
|
NEO4J_apoc_import_file_use__neo4j__config: 'true'
|
|
NEO4J_dbms_security_procedures_unrestricted: apoc.* -->
|
|
|
|
<!-- UNWIND range(0,3) as level
|
|
CALL apoc.cypher.doit("
|
|
MATCH (c:Category { subcatsFetched: false, level: $level})
|
|
CALL apoc.load.json('https://en.wikipedia.org/w/api.php?format=json&action=query&list=categorymembers&cmtype=subcat&cmtitle=Category:' + apoc.text.urlencode(c.catName) + '&cmprop=ids%7Ctitle&cmlimit=500')
|
|
YIELD value as results
|
|
UNWIND results.query.categorymembers AS subcat
|
|
MERGE (sc:Category {catId: subcat.pageid})
|
|
ON CREATE SET sc.catName = substring(subcat.title,9),
|
|
sc.subcatsFetched = false,
|
|
sc.pagesFetched = false,
|
|
sc.level = $level + 1
|
|
WITH sc,c
|
|
CALL apoc.create.addLabels(sc,['Level' + ($level + 1) + 'Category']) YIELD node
|
|
MERGE (sc)-[:SUBCAT_OF]->(c)
|
|
WITH DISTINCT c
|
|
SET c.subcatsFetched = true", { level: level }) YIELD value
|
|
RETURN value -->
|
|
|
|
<!-- CALL {
|
|
LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/cats.csv?raw=true" AS row
|
|
CREATE (c:Category { catId: row[0]})
|
|
SET c.catName = row[2], c.pageCount = toInteger(row[3]), c.subcatCount = toInteger(row[4])
|
|
} IN TRANSACTIONS OF 10000 ROWS
|
|
|
|
CALL {
|
|
LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/cats.csv?raw=true" AS row
|
|
CREATE (c:Category { catId: row[0]})
|
|
SET c.catName = row[2], c.pageCount = toInteger(row[3]), c.subcatCount = toInteger(row[4])
|
|
} IN TRANSACTIONS OF 10000 ROWS -->
|
|
|
|
CALL {
|
|
LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/cats.csv?raw=true" AS row
|
|
CREATE (c:Category { catId: row[0]})
|
|
SET c.catName = row[2], c.pageCount = toInteger(row[3]), c.subcatCount = toInteger(row[4])
|
|
}
|
|
|
|
LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/rels.csv?raw=true" AS row
|
|
MATCH (from:Category { catId: row[0]})
|
|
MATCH (to:Category { catId: row[1]})
|
|
CREATE (from)-[:SUBCAT_OF]->(to)
|
|
|
|
<!-- stats -->
|
|
MATCH (c:Category)
|
|
return SUM(c.pageCount) AS `#pages categorised (with duplicates)`,
|
|
AVG(c.pageCount) AS `average #pages per cat`,
|
|
percentileCont(c.pageCount, 0.75) AS `.75p #pages in a cat`,
|
|
MIN(c.pageCount) AS `min #pages in a cat`,
|
|
MAX(c.pageCount) AS `max #pages in a cat`
|
|
|
|
MATCH (c:Category)
|
|
WHERE NOT (c)-[:SUBCAT_OF]-()
|
|
RETURN COUNT(c)
|
|
|
|
|
|
|
|
MATCH (c:Category)
|
|
WHERE c.catName CONTAINS '{term}'
|
|
RETURN c;
|