Database
TigerGraph
Overview
TigerGraph is an ultra-fast graph analytics platform specialized for real-time analytics and machine learning. With its proprietary GSQL query language and Native Parallel Graph™ (NPG) architecture, it delivers high-speed query processing on large-scale graph data. Supporting HTAP (Hybrid Transactional/Analytical Processing), it can simultaneously execute real-time graph updates and analytical processing.
Details
TigerGraph was developed by a company founded in 2012 as an enterprise-grade graph database. Unlike traditional graph databases, it adopts a design optimized for parallel processing from storage to compute layers. It can handle tens of terabytes of data with trillions of edges and is widely used in fraud detection, customer 360 analytics, IoT, and AI/machine learning applications.
Key features of TigerGraph:
- Native Parallel Graph™ (NPG) architecture
- GSQL Turing-complete query language
- Real-time HTAP (Hybrid processing)
- Ultra-fast parallel compute engine
- Built-in graph data science library
- Real-time incremental graph updates
- Scalable cloud-native design
- CoPilot AI for knowledge graph enhancement
- Enterprise security and governance
- Visual development environment GraphStudio
Advantages and Disadvantages
Advantages
- Ultra-fast processing: Sub-second response for tens of millions of entities/relationships
- Real-time analytics: Simultaneous execution of transactional and analytical processing
- Scalability: Linear scaling up to tens of TB and trillions of edges
- GSQL language: SQL-like syntax that's easy to learn and Turing-complete
- Machine learning integration: Built-in graph feature extraction and analytics
- Parallel processing: Optimized parallel execution at every layer
- Enterprise-ready: High availability, security, and governance
- AI enhancement: Automatic knowledge graph expansion with CoPilot AI
Disadvantages
- Learning curve: Need to master GSQL language and graph concepts
- Resource requirements: Requires significant compute resources for high performance
- License cost: Expensive enterprise licensing
- Ecosystem: Limited third-party tools compared to Neo4j
- Operational complexity: Complex operations management in large-scale environments
Key Links
Code Examples
Installation & Setup
# Docker execution
docker run -d -p 14022:22 -p 9000:9000 -p 14240:14240 \
--name tigergraph \
--ulimit nofile=1000000:1000000 \
tigergraph/tigergraph:latest
# TigerGraph Cloud usage
# Create account at https://tgcloud.tigergraph.com/
# Local installation (Linux)
curl -O https://dl.tigergraph.com/download.php
tar -xzf tigergraph-*.tar.gz
sudo ./install.sh
# Python Client installation
pip install pyTigerGraph
# Java Client installation (Maven)
# Add dependency to pom.xml
Basic Operations (CRUD)
// Schema definition
CREATE GRAPH SocialNetwork()
USE GRAPH SocialNetwork
// Vertex type definition
CREATE VERTEX Person (PRIMARY_ID name STRING, age INT, city STRING)
CREATE VERTEX Company (PRIMARY_ID company_name STRING, industry STRING)
// Edge type definition
CREATE DIRECTED EDGE WORKS_FOR (FROM Person, TO Company, since INT, position STRING)
CREATE UNDIRECTED EDGE FRIENDS_WITH (FROM Person, TO Person, since STRING)
// Data insertion (Create)
INSERT INTO Person VALUES ("John Doe", 30, "New York")
INSERT INTO Person VALUES ("Jane Smith", 28, "Los Angeles")
INSERT INTO Company VALUES ("Tech Corp", "IT")
// Relationship insertion
INSERT INTO WORKS_FOR VALUES ("John Doe", "Tech Corp", 2020, "Engineer")
INSERT INTO FRIENDS_WITH VALUES ("John Doe", "Jane Smith", "2019-05-15")
// Batch insertion
INSERT INTO Person VALUES
("Bob Johnson", 25, "Chicago"),
("Alice Brown", 32, "Houston"),
("Mike Wilson", 29, "Phoenix")
// Data reading (Read)
SELECT * FROM Person
// Conditional query
SELECT * FROM Person WHERE age > 25
// JOIN query (with relationships)
SELECT p.name, p.age, c.company_name, e.position
FROM Person p -(WORKS_FOR:e)-> Company c
WHERE p.city == "New York"
// Data update (Update)
UPDATE Person SET age = 31 WHERE name == "John Doe"
// Data deletion (Delete)
DELETE FROM Person WHERE name == "John Doe"
Query Analytics (Advanced Pattern Matching)
// Path finding
CREATE QUERY findShortestPath(VERTEX<Person> source, VERTEX<Person> target) {
MinAccum<INT> @minDist = 999999;
OrAccum @visited = false;
source.@minDist = 0;
S = {source};
WHILE S.size() > 0 DO
S = SELECT v FROM S:s -(FRIENDS_WITH:e)- Person:v
WHERE v.@minDist > s.@minDist + 1
ACCUM v.@minDist += s.@minDist + 1;
END;
PRINT target.@minDist;
}
// Influence analysis (centrality calculation)
CREATE QUERY calculateCentrality() {
SumAccum<INT> @degree_centrality;
Start = {Person.*};
Result = SELECT s FROM Start:s -(FRIENDS_WITH:e)- Person:t
ACCUM s.@degree_centrality += 1
ORDER BY s.@degree_centrality DESC;
PRINT Result[Result.name, Result.@degree_centrality];
}
// Community detection
CREATE QUERY detectCommunities() {
GroupByAccum<STRING city, SetAccum<STRING>> @@community_members;
Start = {Person.*};
Result = SELECT s FROM Start:s
ACCUM @@community_members += (s.city -> s.name);
PRINT @@community_members;
}
// Real-time recommendation
CREATE QUERY realTimeRecommendation(VERTEX<Person> user) {
SumAccum<INT> @score;
// Collaborative filtering
Friends = SELECT t FROM user:s -(FRIENDS_WITH:e)- Person:t;
Recommendations = SELECT c FROM Friends:f -(WORKS_FOR:e)- Company:c
WHERE c != ANY(SELECT comp FROM user -(WORKS_FOR:ew)- Company:comp)
ACCUM c.@score += 1
ORDER BY c.@score DESC
LIMIT 5;
PRINT Recommendations[Recommendations.company_name, Recommendations.@score];
}
Advanced Features (Machine Learning & Analytics)
// Graph feature extraction
CREATE QUERY extractGraphFeatures(VERTEX<Person> target_person) {
SumAccum<INT> @neighbor_count;
AvgAccum @avg_neighbor_age;
SetAccum<STRING> @neighbor_cities;
MaxAccum<INT> @max_connection_strength;
// First-degree neighbor features
Level1 = SELECT t FROM target_person:s -(FRIENDS_WITH:e)- Person:t
ACCUM
s.@neighbor_count += 1,
s.@avg_neighbor_age += t.age,
s.@neighbor_cities += t.city;
// Second-degree neighbor features
Level2 = SELECT t2 FROM Level1:l1 -(FRIENDS_WITH:e2)- Person:t2
WHERE t2 != target_person
ACCUM target_person.@max_connection_strength += 1;
// Feature vector output
PRINT target_person.name,
target_person.@neighbor_count as direct_friends,
target_person.@avg_neighbor_age as avg_friend_age,
target_person.@neighbor_cities.size() as unique_cities,
target_person.@max_connection_strength as network_reach;
}
// Fraud detection analysis
CREATE QUERY fraudDetection(VERTEX<Person> account) {
SumAccum<FLOAT> @risk_score;
MaxAccum<INT> @max_transaction_amount;
SetAccum<STRING> @transaction_patterns;
// Transaction pattern analysis
Transactions = SELECT t FROM account:s -(TRANSACTION:e)- Account:t
WHERE e.timestamp > now() - 86400 // Within 24 hours
ACCUM
s.@risk_score += (CASE WHEN e.amount > 100000 THEN 10 ELSE 1 END),
s.@max_transaction_amount += e.amount,
s.@transaction_patterns += (e.type + "_" + to_string(e.amount));
// Risk score calculation
IF account.@risk_score > 50 OR account.@max_transaction_amount > 500000 THEN
PRINT account.name, account.@risk_score, "HIGH_RISK";
ELSE
PRINT account.name, account.@risk_score, "LOW_RISK";
END;
}
// Real-time alerting
CREATE QUERY realTimeAlert(VERTEX<Person> user, STRING alert_type) {
TYPEDEF TUPLE<STRING user_name, FLOAT score, STRING alert_level> AlertRecord;
ListAccum<AlertRecord> @@alerts;
// Alert condition check
Current = {user};
Result = SELECT s FROM Current:s
POST-ACCUM
IF alert_type == "FRAUD" AND s.risk_score > 75 THEN
@@alerts += AlertRecord(s.name, s.risk_score, "CRITICAL")
ELSE IF alert_type == "ACTIVITY" AND s.activity_count > 100 THEN
@@alerts += AlertRecord(s.name, s.activity_count, "WARNING")
END;
// Send alert
PRINT @@alerts;
}
Optimization & Performance
// Index creation
CREATE INDEX ON VERTEX Person(age)
CREATE INDEX ON VERTEX Company(industry)
CREATE INDEX ON EDGE WORKS_FOR(since)
// Partitioning configuration
ALTER GRAPH SocialNetwork SET distributed_storage_config = '{
"replication_factor": 3,
"partition_count": 8,
"partition_key": "name"
}'
// Query optimization
CREATE QUERY optimizedQuery() RETURNS (STRING) {
// Parallel execution optimization
SetAccum<VERTEX<Person>> @@processed_vertices;
// Batch processing for efficiency
Start = {Person.*};
FOREACH batch_size IN RANGE[1, 1000] DO
CurrentBatch = SELECT s FROM Start:s
WHERE s.id % 1000 == batch_size - 1
ACCUM @@processed_vertices += s;
END;
RETURN "Processed " + to_string(@@processed_vertices.size()) + " vertices";
}
// Memory usage optimization
CREATE QUERY memoryOptimizedQuery() {
// Streaming processing
Start = {Person.*};
// Step-by-step processing for memory efficiency
FOREACH vertex_set IN RANGE[1, 10] DO
Subset = SELECT s FROM Start:s
WHERE s.id % 10 == vertex_set - 1;
// Process in small batches
ProcessedSubset = SELECT s FROM Subset:s -(FRIENDS_WITH)- Person:t
ACCUM s.@local_count += 1;
END;
}
// Statistics gathering
RUN QUERY getGraphStatistics()
Practical Examples (Enterprise Use Cases)
// Customer 360 analysis
CREATE QUERY customer360Analysis(VERTEX<Customer> customer_id) {
// Analysis of all customer touchpoints
// Purchase history
Purchases = SELECT p FROM customer_id:c -(PURCHASED:e)- Product:p
ACCUM c.@total_spent += e.amount;
// Support history
SupportTickets = SELECT t FROM customer_id:c -(CREATED:e)- Ticket:t;
// Digital touchpoints
DigitalInteractions = SELECT i FROM customer_id:c -(INTERACTED:e)- DigitalChannel:i;
// Segment analysis
CASE
WHEN customer_id.@total_spent > 100000 THEN
UPDATE customer_id SET segment = "Premium"
WHEN customer_id.@total_spent > 50000 THEN
UPDATE customer_id SET segment = "Gold"
ELSE
UPDATE customer_id SET segment = "Standard"
END;
PRINT customer_id, Purchases, SupportTickets, DigitalInteractions;
}
// Supply chain analysis
CREATE QUERY supplyChainAnalysis() {
// Risk propagation analysis
SumAccum<FLOAT> @risk_propagation;
HighRiskSuppliers = {Supplier.* WHERE risk_level > 0.7};
// Downstream risk propagation
AffectedProducts = SELECT p FROM HighRiskSuppliers:s -(SUPPLIES:e)- Product:p
ACCUM p.@risk_propagation += s.risk_level * e.dependency_weight;
// Alternative path finding
CREATE QUERY findAlternativeSuppliers(VERTEX<Product> product) {
Alternatives = SELECT alt FROM product:p <-(SUPPLIES:e)- Supplier:alt
WHERE alt.risk_level < 0.3
ORDER BY e.cost_efficiency DESC;
RETURN Alternatives;
}
}
// IoT data streaming analysis
CREATE QUERY iotStreamAnalysis() {
// Real-time sensor data processing
SensorData = {IoTDevice.* WHERE last_update > now() - 300}; // Within 5 minutes
// Anomaly detection
Anomalies = SELECT d FROM SensorData:d
WHERE d.temperature > d.threshold_max OR
d.temperature < d.threshold_min
ACCUM d.@alert_count += 1;
// Predictive maintenance
MaintenanceNeeded = SELECT d FROM SensorData:d
WHERE d.vibration_level > 0.8 AND
d.operating_hours > 1000;
PRINT Anomalies, MaintenanceNeeded;
}
Python Client Usage
import pyTigerGraph as tg
# Connection setup
conn = tg.TigerGraphConnection(
host="https://your-instance.i.tgcloud.io",
graphname="SocialNetwork",
username="tigergraph",
password="your_password",
apiToken="your_api_token"
)
# Vertex insertion
def create_person(name, age, city):
result = conn.upsertVertex("Person", name, {
"age": age,
"city": city
})
return result
# Edge insertion
def create_friendship(person1, person2, since):
result = conn.upsertEdge("Person", person1, "FRIENDS_WITH", "Person", person2, {
"since": since
})
return result
# Query execution
def run_custom_query(query_name, params={}):
result = conn.runInstalledQuery(query_name, params)
return result
# Batch data loading
def batch_load_data(data_file):
job = conn.gsql(f'''
CREATE LOADING JOB load_persons FOR GRAPH SocialNetwork {{
DEFINE FILENAME f1 = "{data_file}";
LOAD f1 TO VERTEX Person VALUES ($0, $1, $2)
USING header="true", separator=",";
}}
''')
# Execute job
conn.gsql("RUN LOADING JOB load_persons")
# Real-time analysis
def real_time_analysis():
# Streaming data processing
while True:
# Get new data
new_data = get_streaming_data()
# Update graph
for record in new_data:
create_person(record['name'], record['age'], record['city'])
# Execute real-time analysis
result = run_custom_query("realTimeAnalysis", {
"threshold": 100,
"time_window": 3600
})
# Alert processing
if result['alert_level'] == 'HIGH':
send_alert(result)
time.sleep(1) # 1-second interval
# Usage example
if __name__ == "__main__":
# Data insertion
create_person("John Doe", 30, "New York")
create_person("Jane Smith", 28, "Los Angeles")
create_friendship("John Doe", "Jane Smith", "2024-01-15")
# Execute analysis
centrality_result = run_custom_query("calculateCentrality")
print("Centrality analysis result:", centrality_result)
# Start real-time analysis
real_time_analysis()
Performance Configuration
# TigerGraph configuration file (tigergraph.cfg)
[System]
MemoryLimit=32GB
ThreadCount=16
StorageRoot=/data/tigergraph
[GSQL]
QueryTimeout=300
MaxResultSize=100MB
EnableParallelLoading=true
[GPE]
WorkerThreads=8
BatchSize=10000
EnableRealTimeUpdate=true
[REST]
MaxConcurrentRequests=1000
RequestTimeout=60
# System optimization
echo 'vm.overcommit_memory=1' >> /etc/sysctl.conf
echo 'vm.max_map_count=262144' >> /etc/sysctl.conf
sysctl -p
# JVM settings (for GSQL)
export JAVA_OPTIONS="-Xms8g -Xmx16g -XX:+UseG1GC"