GitHub Overview
asg017/sqlite-vec
A vector search SQLite extension that runs anywhere!
Repository:https://github.com/asg017/sqlite-vec
Stars5,927
Watchers61
Forks221
Created:April 20, 2024
Language:C
License:Apache License 2.0
Topics
sqlitesqlite-extension
Star History
Data as of: 7/30/2025, 02:37 AM
Database
SQLite + sqlite-vec
Overview
sqlite-vec is an extension that adds vector search capabilities to SQLite, the world's most widely used embedded database. It operates serverless, lightweight, and zero-configuration, enabling vector search in mobile apps, desktop applications, and edge devices.
Details
sqlite-vec was developed by Alex Garcia and is provided as a SQLite extension. Implemented in pure C, it provides vector search functionality without additional dependencies. While lightweight, it offers basic vector similarity search and indexing capabilities, making it ideal for local applications and edge computing environments.
Key features of sqlite-vec:
- Works as SQLite extension
- Zero dependencies
- Lightweight (few hundred KB)
- Brute-force and ANN indexing
- L2 distance, cosine similarity, inner product support
- JSON vector storage
- Vector operations via SQL functions
- Cross-platform support
- WASM support (browser execution)
- Mobile-friendly
Implementation Features
- Pure C implementation
- SQLite virtual table mechanism
- Memory-efficient design
- Simple API
Pros and Cons
Pros
- Ultra-lightweight: Operates with minimal footprint
- Easy deployment: Just load SQLite extension
- Zero configuration: No additional setup or server required
- Portability: Works everywhere (mobile, edge, browser)
- SQLite integration: Easily add to existing SQLite apps
- Offline capable: No network connection required
Cons
- Performance limitations: Slow with large datasets
- Feature limitations: Basic vector search functionality only
- Scalability: Single-file database constraints
- Concurrency: SQLite write lock limitations
- Index options: Limited indexing choices
Key Links
Usage Examples
Setup and Installation
import sqlite3
import numpy as np
import json
# SQLite connection and extension loading
conn = sqlite3.connect(':memory:')
conn.enable_load_extension(True)
# Load sqlite-vec extension
# Linux: ./vec0.so
# macOS: ./vec0.dylib
# Windows: ./vec0.dll
conn.load_extension('./vec0')
# Create vector table
conn.execute("""
CREATE VIRTUAL TABLE documents USING vec0(
id INTEGER PRIMARY KEY,
title TEXT,
content TEXT,
embedding FLOAT[768]
)
""")
Basic Vector Operations
# Insert document
def insert_document(conn, title, content, embedding):
# Convert vector to JSON format
embedding_json = json.dumps(embedding.tolist())
conn.execute("""
INSERT INTO documents(title, content, embedding)
VALUES (?, ?, vec_f32(?))
""", (title, content, embedding_json))
conn.commit()
# Insert sample data
embedding = np.random.rand(768).astype(np.float32)
insert_document(
conn,
"SQLite Vector Search",
"Vector search in lightweight embedded database",
embedding
)
# Vector search (cosine similarity)
def vector_search_cosine(conn, query_vector, limit=10):
query_json = json.dumps(query_vector.tolist())
cursor = conn.execute("""
SELECT
id,
title,
content,
vec_distance_cosine(embedding, vec_f32(?)) as distance
FROM documents
ORDER BY distance
LIMIT ?
""", (query_json, limit))
return cursor.fetchall()
# L2 distance search
def vector_search_l2(conn, query_vector, limit=10):
query_json = json.dumps(query_vector.tolist())
cursor = conn.execute("""
SELECT
id,
title,
content,
vec_distance_l2(embedding, vec_f32(?)) as distance
FROM documents
ORDER BY distance
LIMIT ?
""", (query_json, limit))
return cursor.fetchall()
Index Creation and Management
# Create ANN index
conn.execute("""
CREATE INDEX idx_embedding ON documents(embedding)
USING vec_ann(metric='cosine', trees=10)
""")
# Vector search with metadata
def search_with_metadata(conn, query_vector, category=None):
query_json = json.dumps(query_vector.tolist())
if category:
cursor = conn.execute("""
SELECT
d.id,
d.title,
d.content,
vec_distance_cosine(d.embedding, vec_f32(?)) as distance,
m.category,
m.tags
FROM documents d
JOIN metadata m ON d.id = m.doc_id
WHERE m.category = ?
ORDER BY distance
LIMIT 10
""", (query_json, category))
else:
cursor = conn.execute("""
SELECT
d.id,
d.title,
d.content,
vec_distance_cosine(d.embedding, vec_f32(?)) as distance
FROM documents d
ORDER BY distance
LIMIT 10
""", (query_json,))
return cursor.fetchall()
Batch Processing and Optimization
# Batch insert
def batch_insert_documents(conn, documents):
conn.execute("BEGIN TRANSACTION")
for doc in documents:
embedding_json = json.dumps(doc['embedding'].tolist())
conn.execute("""
INSERT INTO documents(title, content, embedding)
VALUES (?, ?, vec_f32(?))
""", (doc['title'], doc['content'], embedding_json))
conn.execute("COMMIT")
# Memory optimization settings
def optimize_sqlite_settings(conn):
# Set page cache size
conn.execute("PRAGMA cache_size = 10000")
# Enable WAL mode
conn.execute("PRAGMA journal_mode = WAL")
# Adjust synchronous mode
conn.execute("PRAGMA synchronous = NORMAL")
# Enable memory-mapped I/O
conn.execute("PRAGMA mmap_size = 268435456")
# Get vector statistics
def get_vector_stats(conn):
cursor = conn.execute("""
SELECT
COUNT(*) as total_vectors,
AVG(vec_length(embedding)) as avg_vector_length
FROM documents
""")
return cursor.fetchone()
Mobile/Edge Implementation
# Lightweight vector search class
class LiteVectorSearch:
def __init__(self, db_path):
self.conn = sqlite3.connect(db_path)
self.conn.enable_load_extension(True)
self.conn.load_extension('./vec0')
self._setup_tables()
def _setup_tables(self):
self.conn.execute("""
CREATE TABLE IF NOT EXISTS vectors (
id INTEGER PRIMARY KEY AUTOINCREMENT,
embedding BLOB,
metadata TEXT
)
""")
self.conn.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS vec_index USING vec0(
embedding FLOAT[384] # Dimension for lightweight models
)
""")
def add_vector(self, embedding, metadata=None):
embedding_json = json.dumps(embedding.tolist())
metadata_json = json.dumps(metadata) if metadata else None
self.conn.execute("""
INSERT INTO vectors(embedding, metadata)
VALUES (?, ?)
""", (embedding_json, metadata_json))
self.conn.execute("""
INSERT INTO vec_index(embedding)
VALUES (vec_f32(?))
""", (embedding_json,))
self.conn.commit()
def search(self, query_vector, k=5):
query_json = json.dumps(query_vector.tolist())
cursor = self.conn.execute("""
SELECT
v.id,
v.metadata,
vec_distance_cosine(vi.embedding, vec_f32(?)) as distance
FROM vec_index vi
JOIN vectors v ON vi.rowid = v.id
ORDER BY distance
LIMIT ?
""", (query_json, k))
results = []
for row in cursor:
results.append({
'id': row[0],
'metadata': json.loads(row[1]) if row[1] else None,
'distance': row[2]
})
return results