# Cypher Query Language Runs a simple command to clean the database MATCH (n) DETACH DELETE n CREATE INDEX FOR (c:Category) ON (c.catId); CREATE INDEX FOR (c:Category) ON (c.catName); CREATE INDEX FOR (p:Page) ON (p.pageTitle); CREATE (c:Category:RootCategory {catId: 0, catName: 'Databases', subcatsFetched: false, pagesFetched: false, level: 0}); RUN mkdir -p /var/lib/neo4j/plugins \ && cd /var/lib/neo4j/plugins \ && curl -L -O https://github.com/neo4j-contrib/neo4j-apoc-procedures/releases/download/4.4.0.0/apoc-4.4.0.0-all.jar CALL { LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/cats.csv?raw=true" AS row CREATE (c:Category { catId: row[0]}) SET c.catName = row[2], c.pageCount = toInteger(row[3]), c.subcatCount = toInteger(row[4]) } LOAD CSV FROM "https://github.com/jbarrasa/datasets/blob/master/wikipedia/data/rels.csv?raw=true" AS row MATCH (from:Category { catId: row[0]}) MATCH (to:Category { catId: row[1]}) CREATE (from)-[:SUBCAT_OF]->(to) MATCH (c:Category) return SUM(c.pageCount) AS `#pages categorised (with duplicates)`, AVG(c.pageCount) AS `average #pages per cat`, percentileCont(c.pageCount, 0.75) AS `.75p #pages in a cat`, MIN(c.pageCount) AS `min #pages in a cat`, MAX(c.pageCount) AS `max #pages in a cat` MATCH (c:Category) WHERE NOT (c)-[:SUBCAT_OF]-() RETURN COUNT(c) MATCH (c:Category) WHERE c.catName CONTAINS '{term}' RETURN c;