Technology Catalog | Developer's Catalog

Database

Neo4j

Overview

Neo4j is the world's leading graph database management system. It represents data as nodes (entities) and edges (relationships), optimized for storing, querying, and analyzing data with complex relationships. It uses the declarative query language "Cypher" to query graph patterns with intuitive syntax close to natural language.

Details

Neo4j was developed by Neo Technology (now Neo4j Inc.) in 2007. It efficiently processes relationship data that would be complex in traditional relational databases using native graph structures. It is widely used in applications where relationships are important, such as social networks, recommendation systems, fraud detection, and knowledge graphs.

Key features of Neo4j:

Native graph storage and engine
Full ACID properties support
Declarative query language Cypher
Indexing and query optimization
High availability clustering
Rich visualization tools
Extensive programming language support
Real-time transaction processing
Scalable architecture
Encryption and access control

Advantages and Disadvantages

Advantages

Intuitive modeling: Natural data representation with graph structures
Fast relationship queries: Excellent performance without JOINs
Flexibility: Schema-free, adapts to evolving data models
Visualization: Intuitive visual representation and browsing of graphs
Cypher queries: SQL-like, easy to learn language
Ecosystem: Rich tools and libraries
Real-time: Immediate graph traversal

Disadvantages

Learning cost: Need to learn graph database concepts
Memory usage: High memory consumption for large graphs
Specialized: Inefficient for tabular data
Complex aggregation: Statistical processing more complex than relational DB
Tool limitations: Limited BI tool integration

Key Links

Code Examples

Installation & Setup

# Run with Docker
docker run --name neo4j-container \
  -p 7474:7474 -p 7687:7687 \
  -e NEO4J_AUTH=neo4j/password \
  -v neo4j-data:/data \
  neo4j:latest

# Desktop installation
# Download from https://neo4j.com/download/

# Neo4j Browser access
# http://localhost:7474

# Python driver installation
pip install neo4j

# Node.js driver installation
npm install neo4j-driver

Basic Operations (CRUD)

// Create nodes (Create)
CREATE (p:Person {name: 'John Doe', age: 30, email: '[email protected]'})
CREATE (c:Company {name: 'Tech Corp', industry: 'IT'})

// Create relationships
CREATE (p)-[:WORKS_FOR {since: 2020, position: 'Engineer'}]->(c)

// Create multiple nodes
CREATE 
  (alice:Person {name: 'Alice', age: 25}),
  (bob:Person {name: 'Bob', age: 28}),
  (alice)-[:FRIENDS_WITH {since: '2019-01-15'}]->(bob)

// Read nodes (Read)
MATCH (p:Person) RETURN p

// Query with specific conditions
MATCH (p:Person {name: 'John Doe'}) RETURN p

// Query including relationships
MATCH (p:Person)-[r:WORKS_FOR]->(c:Company)
RETURN p.name, r.position, c.name

// Update nodes (Update)
MATCH (p:Person {name: 'John Doe'})
SET p.age = 31, p.location = 'New York'
RETURN p

// Update relationships
MATCH (p:Person {name: 'John Doe'})-[r:WORKS_FOR]->(c:Company)
SET r.position = 'Senior Engineer'

// Delete nodes (Delete)
MATCH (p:Person {name: 'John Doe'})
DELETE p

// Delete including relationships
MATCH (p:Person {name: 'John Doe'})
DETACH DELETE p

Data Modeling

// User and product relationship model
CREATE 
  (user:User {id: 1, name: 'Jane Smith', email: '[email protected]'}),
  (product1:Product {id: 101, name: 'Laptop', price: 800}),
  (product2:Product {id: 102, name: 'Mouse', price: 20}),
  (category:Category {name: 'Computer'}),
  (user)-[:PURCHASED {date: '2024-01-15', quantity: 1}]->(product1),
  (user)-[:VIEWED {timestamp: datetime()}]->(product2),
  (product1)-[:BELONGS_TO]->(category),
  (product2)-[:BELONGS_TO]->(category)

// Social network model
CREATE 
  (alice:Person {name: 'Alice', age: 25, city: 'New York'}),
  (bob:Person {name: 'Bob', age: 28, city: 'Los Angeles'}),
  (charlie:Person {name: 'Charlie', age: 30, city: 'New York'}),
  (alice)-[:FOLLOWS]->(bob),
  (bob)-[:FOLLOWS]->(charlie),
  (charlie)-[:FOLLOWS]->(alice),
  (alice)-[:FRIENDS_WITH]->(charlie)

Indexing & Optimization

// Create indexes
CREATE INDEX person_name_index FOR (p:Person) ON (p.name)
CREATE INDEX product_id_index FOR (p:Product) ON (p.id)

// Composite index
CREATE INDEX person_name_age_index FOR (p:Person) ON (p.name, p.age)

// Create constraints
CREATE CONSTRAINT person_email_unique FOR (p:Person) REQUIRE p.email IS UNIQUE
CREATE CONSTRAINT product_id_exists FOR (p:Product) REQUIRE p.id IS NOT NULL

// Show indexes
SHOW INDEXES

// Show constraints
SHOW CONSTRAINTS

// Query plan analysis
EXPLAIN MATCH (p:Person {name: 'John Doe'}) RETURN p
PROFILE MATCH (p:Person)-[:WORKS_FOR]->(c:Company) RETURN p, c

// Database statistics
CALL db.stats.retrieve()

Practical Examples

// Path finding (shortest path)
MATCH path = shortestPath(
  (start:Person {name: 'Alice'})-[*]-(end:Person {name: 'Charlie'})
)
RETURN path, length(path)

// Recommendation system (collaborative filtering)
MATCH (user:User {name: 'Jane Smith'})-[:PURCHASED]->(product:Product)<-[:PURCHASED]-(otherUser:User)
MATCH (otherUser)-[:PURCHASED]->(recommendation:Product)
WHERE NOT (user)-[:PURCHASED]->(recommendation)
RETURN recommendation.name, count(*) as score
ORDER BY score DESC
LIMIT 5

// Fraud detection (anomaly pattern detection)
MATCH (account:Account)-[t:TRANSFER]->(suspicious:Account)
WHERE t.amount > 100000 
  AND t.timestamp > datetime() - duration('P1D')
WITH suspicious, count(t) as transfer_count
WHERE transfer_count > 10
RETURN suspicious

// Social network analysis
MATCH (person:Person)-[:FRIENDS_WITH*2]-(friendOfFriend:Person)
WHERE person.name = 'Alice' AND person <> friendOfFriend
RETURN DISTINCT friendOfFriend.name as suggestions

// Influence analysis (centrality)
MATCH (p:Person)-[:FOLLOWS]->(other:Person)
RETURN p.name, count(other) as followers
ORDER BY followers DESC
LIMIT 10

Best Practices

// Transaction management
BEGIN
CREATE (p:Person {name: 'New User', email: '[email protected]'})
CREATE (p)-[:WORKS_FOR]->(c:Company {name: 'New Company'})
COMMIT

// Batch processing (LOAD CSV)
LOAD CSV WITH HEADERS FROM 'file:///users.csv' AS row
CREATE (p:Person {
  id: toInteger(row.id),
  name: row.name,
  email: row.email,
  age: toInteger(row.age)
})

// Performance optimization
// Using MERGE (avoiding duplicates)
MERGE (p:Person {email: '[email protected]'})
ON CREATE SET p.created = datetime()
ON MATCH SET p.lastSeen = datetime()

// WITH clause for result filtering
MATCH (p:Person)
WITH p
WHERE p.age > 25
MATCH (p)-[:WORKS_FOR]->(c:Company)
RETURN p.name, c.name

// Using parameters
MATCH (p:Person {name: $personName})
RETURN p

Python Usage Example

from neo4j import GraphDatabase

# Database connection
driver = GraphDatabase.driver(
    "bolt://localhost:7687",
    auth=("neo4j", "password")
)

def create_person(tx, name, age):
    result = tx.run(
        "CREATE (p:Person {name: $name, age: $age}) RETURN p",
        name=name, age=age
    )
    return result.single()[0]

def find_person(tx, name):
    result = tx.run(
        "MATCH (p:Person {name: $name}) RETURN p",
        name=name
    )
    return [record["p"] for record in result]

# Transaction execution
with driver.session() as session:
    # Write transaction
    person = session.execute_write(
        create_person, "John Doe", 30
    )
    
    # Read transaction
    persons = session.execute_read(
        find_person, "John Doe"
    )
    
    for person in persons:
        print(f"Name: {person['name']}, Age: {person['age']}")

# Close connection
driver.close()

Configuration and Tuning

# Key neo4j.conf settings
dbms.memory.heap.initial_size=1G
dbms.memory.heap.max_size=2G
dbms.memory.pagecache.size=1G

# Security settings
dbms.security.auth_enabled=true
dbms.ssl.policy.bolt.enabled=true

# Network settings
dbms.default_listen_address=0.0.0.0
dbms.connector.bolt.listen_address=:7687
dbms.connector.http.listen_address=:7474