Database
Neo4j
Overview
Neo4j is the world's leading graph database management system. It represents data as nodes (entities) and edges (relationships), optimized for storing, querying, and analyzing data with complex relationships. It uses the declarative query language "Cypher" to query graph patterns with intuitive syntax close to natural language.
Details
Neo4j was developed by Neo Technology (now Neo4j Inc.) in 2007. It efficiently processes relationship data that would be complex in traditional relational databases using native graph structures. It is widely used in applications where relationships are important, such as social networks, recommendation systems, fraud detection, and knowledge graphs.
Key features of Neo4j:
- Native graph storage and engine
- Full ACID properties support
- Declarative query language Cypher
- Indexing and query optimization
- High availability clustering
- Rich visualization tools
- Extensive programming language support
- Real-time transaction processing
- Scalable architecture
- Encryption and access control
Advantages and Disadvantages
Advantages
- Intuitive modeling: Natural data representation with graph structures
- Fast relationship queries: Excellent performance without JOINs
- Flexibility: Schema-free, adapts to evolving data models
- Visualization: Intuitive visual representation and browsing of graphs
- Cypher queries: SQL-like, easy to learn language
- Ecosystem: Rich tools and libraries
- Real-time: Immediate graph traversal
Disadvantages
- Learning cost: Need to learn graph database concepts
- Memory usage: High memory consumption for large graphs
- Specialized: Inefficient for tabular data
- Complex aggregation: Statistical processing more complex than relational DB
- Tool limitations: Limited BI tool integration
Key Links
Code Examples
Installation & Setup
# Run with Docker
docker run --name neo4j-container \
-p 7474:7474 -p 7687:7687 \
-e NEO4J_AUTH=neo4j/password \
-v neo4j-data:/data \
neo4j:latest
# Desktop installation
# Download from https://neo4j.com/download/
# Neo4j Browser access
# http://localhost:7474
# Python driver installation
pip install neo4j
# Node.js driver installation
npm install neo4j-driver
Basic Operations (CRUD)
// Create nodes (Create)
CREATE (p:Person {name: 'John Doe', age: 30, email: '[email protected]'})
CREATE (c:Company {name: 'Tech Corp', industry: 'IT'})
// Create relationships
CREATE (p)-[:WORKS_FOR {since: 2020, position: 'Engineer'}]->(c)
// Create multiple nodes
CREATE
(alice:Person {name: 'Alice', age: 25}),
(bob:Person {name: 'Bob', age: 28}),
(alice)-[:FRIENDS_WITH {since: '2019-01-15'}]->(bob)
// Read nodes (Read)
MATCH (p:Person) RETURN p
// Query with specific conditions
MATCH (p:Person {name: 'John Doe'}) RETURN p
// Query including relationships
MATCH (p:Person)-[r:WORKS_FOR]->(c:Company)
RETURN p.name, r.position, c.name
// Update nodes (Update)
MATCH (p:Person {name: 'John Doe'})
SET p.age = 31, p.location = 'New York'
RETURN p
// Update relationships
MATCH (p:Person {name: 'John Doe'})-[r:WORKS_FOR]->(c:Company)
SET r.position = 'Senior Engineer'
// Delete nodes (Delete)
MATCH (p:Person {name: 'John Doe'})
DELETE p
// Delete including relationships
MATCH (p:Person {name: 'John Doe'})
DETACH DELETE p
Data Modeling
// User and product relationship model
CREATE
(user:User {id: 1, name: 'Jane Smith', email: '[email protected]'}),
(product1:Product {id: 101, name: 'Laptop', price: 800}),
(product2:Product {id: 102, name: 'Mouse', price: 20}),
(category:Category {name: 'Computer'}),
(user)-[:PURCHASED {date: '2024-01-15', quantity: 1}]->(product1),
(user)-[:VIEWED {timestamp: datetime()}]->(product2),
(product1)-[:BELONGS_TO]->(category),
(product2)-[:BELONGS_TO]->(category)
// Social network model
CREATE
(alice:Person {name: 'Alice', age: 25, city: 'New York'}),
(bob:Person {name: 'Bob', age: 28, city: 'Los Angeles'}),
(charlie:Person {name: 'Charlie', age: 30, city: 'New York'}),
(alice)-[:FOLLOWS]->(bob),
(bob)-[:FOLLOWS]->(charlie),
(charlie)-[:FOLLOWS]->(alice),
(alice)-[:FRIENDS_WITH]->(charlie)
Indexing & Optimization
// Create indexes
CREATE INDEX person_name_index FOR (p:Person) ON (p.name)
CREATE INDEX product_id_index FOR (p:Product) ON (p.id)
// Composite index
CREATE INDEX person_name_age_index FOR (p:Person) ON (p.name, p.age)
// Create constraints
CREATE CONSTRAINT person_email_unique FOR (p:Person) REQUIRE p.email IS UNIQUE
CREATE CONSTRAINT product_id_exists FOR (p:Product) REQUIRE p.id IS NOT NULL
// Show indexes
SHOW INDEXES
// Show constraints
SHOW CONSTRAINTS
// Query plan analysis
EXPLAIN MATCH (p:Person {name: 'John Doe'}) RETURN p
PROFILE MATCH (p:Person)-[:WORKS_FOR]->(c:Company) RETURN p, c
// Database statistics
CALL db.stats.retrieve()
Practical Examples
// Path finding (shortest path)
MATCH path = shortestPath(
(start:Person {name: 'Alice'})-[*]-(end:Person {name: 'Charlie'})
)
RETURN path, length(path)
// Recommendation system (collaborative filtering)
MATCH (user:User {name: 'Jane Smith'})-[:PURCHASED]->(product:Product)<-[:PURCHASED]-(otherUser:User)
MATCH (otherUser)-[:PURCHASED]->(recommendation:Product)
WHERE NOT (user)-[:PURCHASED]->(recommendation)
RETURN recommendation.name, count(*) as score
ORDER BY score DESC
LIMIT 5
// Fraud detection (anomaly pattern detection)
MATCH (account:Account)-[t:TRANSFER]->(suspicious:Account)
WHERE t.amount > 100000
AND t.timestamp > datetime() - duration('P1D')
WITH suspicious, count(t) as transfer_count
WHERE transfer_count > 10
RETURN suspicious
// Social network analysis
MATCH (person:Person)-[:FRIENDS_WITH*2]-(friendOfFriend:Person)
WHERE person.name = 'Alice' AND person <> friendOfFriend
RETURN DISTINCT friendOfFriend.name as suggestions
// Influence analysis (centrality)
MATCH (p:Person)-[:FOLLOWS]->(other:Person)
RETURN p.name, count(other) as followers
ORDER BY followers DESC
LIMIT 10
Best Practices
// Transaction management
BEGIN
CREATE (p:Person {name: 'New User', email: '[email protected]'})
CREATE (p)-[:WORKS_FOR]->(c:Company {name: 'New Company'})
COMMIT
// Batch processing (LOAD CSV)
LOAD CSV WITH HEADERS FROM 'file:///users.csv' AS row
CREATE (p:Person {
id: toInteger(row.id),
name: row.name,
email: row.email,
age: toInteger(row.age)
})
// Performance optimization
// Using MERGE (avoiding duplicates)
MERGE (p:Person {email: '[email protected]'})
ON CREATE SET p.created = datetime()
ON MATCH SET p.lastSeen = datetime()
// WITH clause for result filtering
MATCH (p:Person)
WITH p
WHERE p.age > 25
MATCH (p)-[:WORKS_FOR]->(c:Company)
RETURN p.name, c.name
// Using parameters
MATCH (p:Person {name: $personName})
RETURN p
Python Usage Example
from neo4j import GraphDatabase
# Database connection
driver = GraphDatabase.driver(
"bolt://localhost:7687",
auth=("neo4j", "password")
)
def create_person(tx, name, age):
result = tx.run(
"CREATE (p:Person {name: $name, age: $age}) RETURN p",
name=name, age=age
)
return result.single()[0]
def find_person(tx, name):
result = tx.run(
"MATCH (p:Person {name: $name}) RETURN p",
name=name
)
return [record["p"] for record in result]
# Transaction execution
with driver.session() as session:
# Write transaction
person = session.execute_write(
create_person, "John Doe", 30
)
# Read transaction
persons = session.execute_read(
find_person, "John Doe"
)
for person in persons:
print(f"Name: {person['name']}, Age: {person['age']}")
# Close connection
driver.close()
Configuration and Tuning
# Key neo4j.conf settings
dbms.memory.heap.initial_size=1G
dbms.memory.heap.max_size=2G
dbms.memory.pagecache.size=1G
# Security settings
dbms.security.auth_enabled=true
dbms.ssl.policy.bolt.enabled=true
# Network settings
dbms.default_listen_address=0.0.0.0
dbms.connector.bolt.listen_address=:7687
dbms.connector.http.listen_address=:7474