Technology Catalog | Developer's Catalog

Overview

Pinecone is a high-performance, fully managed vector database service designed for AI applications. It provides storage, indexing, and fast similarity search for vector embeddings, enabling the development of knowledgeable AI applications in large-scale production environments.

Details

Architecture

Pinecone employs a next-generation serverless architecture with the following characteristics:

Control Plane and Data Plane

Control Plane: Manages indexes and collections (create, delete, configure)
Data Plane: Handles vector operations (store, query, fetch, delete, update)

Serverless Architecture Features

Separation of Storage and Compute: Compute resources are used only when needed
File-based Architecture: Indexes are composed of files
Disk-based Metadata Filtering: High-cardinality filtering capabilities
Real-time Indexing: Write operations are immediately reflected

Index Types

Serverless Indexes

Fully managed with automatic scaling
Usage-based pricing model
High availability and reliability

Pod-based Indexes

Customizable pod types and counts
Optimized for specific workloads
Fixed per-minute pricing

Key Features

Search Capabilities

Semantic Search: Similarity search using dense vectors
Lexical Search: Keyword matching using sparse vectors
Hybrid Search: Combination of semantic and lexical search
Metadata Filtering: Advanced filtering with complex queries
Namespaces: Logical data separation and multi-tenancy

AI Integration Features

Pinecone Inference: Built-in embedding models and reranking
Pinecone Assistant: Rapid development of chatbots and agent applications
Multiple Embedding Models: OpenAI, Cohere, Sentence Transformers, and more

Enterprise Features

Security

Encryption at rest and in transit
Hierarchical encryption keys
Private networking
SOC 2, GDPR, ISO 27001, HIPAA certified

BYOC (Bring Your Own Cloud)

Deploy private Pinecone regions on AWS
Ensure data sovereignty and compliance
Maintain benefits of fully managed SaaS

Pros and Cons

Pros

Fully Managed: No infrastructure management required
Rapid Setup: Launch vector databases in seconds
Auto-scaling: Resources automatically adjust to demand
High Reliability: Robust design for production use
Rich Integrations: Works with LangChain, OpenAI, Hugging Face, and more
Global Deployment: Multiple cloud providers and regions

Cons

Vendor Lock-in: Dependency on proprietary service
Cost: Can be expensive for large-scale usage
Limited Customization: Less flexible compared to open-source solutions
No Offline Usage: Requires constant cloud connectivity

Key Links

Code Examples

Basic Usage

from pinecone import Pinecone, ServerlessSpec, CloudProvider, AwsRegion

# Initialize Pinecone client
pc = Pinecone(api_key="YOUR_API_KEY")

# Create a serverless index
index_config = pc.create_index(
    name="semantic-search",
    dimension=1536,  # OpenAI ada-002 dimension
    metric="cosine",
    spec=ServerlessSpec(
        cloud=CloudProvider.AWS,
        region=AwsRegion.US_EAST_1
    )
)

# Connect to the index
index = pc.Index(host=index_config.host)

# Upsert vectors (insert/update)
index.upsert(
    vectors=[
        (
            "doc1",  # ID
            [0.1, 0.2, 0.3, ...],  # 1536-dimensional vector
            {"title": "AI Fundamentals", "category": "technology"}  # Metadata
        ),
        (
            "doc2",
            [0.2, 0.3, 0.4, ...],
            {"title": "Introduction to ML", "category": "technology"}
        )
    ],
    namespace="tech-docs"
)

# Query vectors
query_embedding = [0.15, 0.25, 0.35, ...]  # Query vector
results = index.query(
    vector=query_embedding,
    top_k=5,
    namespace="tech-docs",
    filter={"category": {"$eq": "technology"}},
    include_metadata=True
)

# Display results
for match in results.matches:
    print(f"ID: {match.id}, Score: {match.score}, Metadata: {match.metadata}")

Using Integrated Embedding Models

from pinecone import Pinecone, ServerlessSpec, CloudProvider, AwsRegion

# Create an index with integrated embedding model
pc = Pinecone(api_key="YOUR_API_KEY")

index_config = pc.create_index(
    name="text-search",
    dimension=1024,
    metric="cosine",
    spec=ServerlessSpec(
        cloud=CloudProvider.AWS,
        region=AwsRegion.US_EAST_1
    ),
    embedding_model="multilingual-e5-large"  # Integrated embedding model
)

index = pc.Index(host=index_config.host)

# Upsert text directly (automatically vectorized)
index.upsert(
    data=[
        {
            "id": "article1",
            "text": "Artificial intelligence is transforming our lives.",
            "metadata": {"language": "en", "topic": "AI"}
        },
        {
            "id": "article2",
            "text": "Machine learning learns from large amounts of data.",
            "metadata": {"language": "en", "topic": "ML"}
        }
    ]
)

# Query with text (automatically vectorized)
results = index.query(
    text="Tell me about recent AI trends",
    top_k=3,
    include_metadata=True
)

Asynchronous Operations

import asyncio
from pinecone import PineconeAsyncio

async def async_vector_operations():
    # Use async client
    async with PineconeAsyncio(api_key="YOUR_API_KEY") as pc:
        idx = pc.IndexAsyncio(host="YOUR_INDEX_HOST")
        
        # Async upsert vectors
        await idx.upsert(vectors=[
            ("async1", [1.0, 2.0, 3.0, ...]),
            ("async2", [2.0, 3.0, 4.0, ...])
        ])
        
        # Async query
        results = await idx.query(
            vector=[1.5, 2.5, 3.5, ...],
            top_k=10
        )
        
        return results

# Run async function
results = asyncio.run(async_vector_operations())

RAG Application Example

from pinecone import Pinecone
import openai

# Initialize OpenAI and Pinecone
openai.api_key = "YOUR_OPENAI_API_KEY"
pc = Pinecone(api_key="YOUR_PINECONE_API_KEY")
index = pc.Index("knowledge-base")

def generate_embedding(text):
    """Generate text embedding using OpenAI API"""
    response = openai.embeddings.create(
        model="text-embedding-ada-002",
        input=text
    )
    return response.data[0].embedding

def search_knowledge_base(query, top_k=5):
    """Search relevant information from knowledge base"""
    query_embedding = generate_embedding(query)
    
    results = index.query(
        vector=query_embedding,
        top_k=top_k,
        include_metadata=True
    )
    
    contexts = []
    for match in results.matches:
        contexts.append(match.metadata['text'])
    
    return contexts

def generate_answer(query, contexts):
    """Generate answer based on search results"""
    context_str = "\n\n".join(contexts)
    
    prompt = f"""Answer the question using the following context.

Context:
{context_str}

Question: {query}

Answer:"""
    
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    
    return response.choices[0].message.content

# Use RAG system
query = "Tell me about Pinecone's serverless architecture"
contexts = search_knowledge_base(query)
answer = generate_answer(query, contexts)
print(answer)