Examples & Patterns
Complete end-to-end examples demonstrating real-world use cases of the Search API.
Example 1: E-commerce Product Search#
A complete example showing how to build a product search with filters, ranking, and pagination.
from chromadb import Search, K, Knn, And
def search_products(collection, user_query, min_price=None, max_price=None,
category=None, in_stock_only=True, page=0, page_size=20):
"""
Search for products with semantic search and filters.
Args:
collection: Chroma collection
user_query: Natural language search query (e.g., "wireless headphones")
min_price: Minimum price filter
max_price: Maximum price filter
category: Product category filter
in_stock_only: Only show in-stock items
page: Page number (0-indexed)
page_size: Results per page
"""
# Build filter conditions
from chromadb import And
combined_filter = And([])
if in_stock_only:
combined_filter &= K("in_stock") == True
if category:
combined_filter &= K("category") == category
if min_price is not None:
combined_filter &= K("price") >= min_price
if max_price is not None:
combined_filter &= K("price") <= max_price
# Build search
search = Search().where(combined_filter)
search = (search
.rank(Knn(query=user_query))
.limit(page_size, offset=page * page_size)
.select(K.DOCUMENT, K.SCORE, "name", "price", "category", "rating", "image_url"))
# Execute search
results = collection.search(search)
rows = results.rows()[0]
# Format results for display
products = []
for row in rows:
products.append({
"id": row["id"],
"name": row["metadata"]["name"],
"description": row["document"][:200] + "...",
"price": row["metadata"]["price"],
"category": row["metadata"]["category"],
"rating": row["metadata"]["rating"],
"image_url": row["metadata"]["image_url"],
"relevance_score": row["score"]
})
return products
# Example usage
products = search_products(
collection,
user_query="noise cancelling headphones for travel",
min_price=50,
max_price=300,
category="electronics",
page=0,
page_size=20
)
for i, product in enumerate(products, 1):
print(f"{i}. {product['name']}")
print(f" Price: ${product['price']:.2f} | Rating: {product['rating']}/5")
print(f" {product['description']}")
print(f" Relevance: {product['relevance_score']:.3f}")
print()
Example output:
1. Sony WH-1000XM5 Wireless Headphones Price: $279.99 | Rating: 4.8/5 Premium noise cancelling headphones with exceptional sound quality, perfect for long flights and commutes. Features 30-hour battery life... Relevance: 0.234 2. Bose QuietComfort 45 Price: $249.99 | Rating: 4.7/5 Industry-leading noise cancellation with comfortable over-ear design. Ideal for frequent travelers with adjustable ANC levels... Relevance: 0.267
Example 2: Content Recommendation System#
Build a personalized content recommendation system that excludes already-seen items and respects user preferences.
from chromadb import Search, K, Knn, Rrf
def get_recommendations(collection, user_id, user_preferences,
seen_content_ids, num_recommendations=10):
"""
Get personalized content recommendations for a user.
Args:
collection: Chroma collection
user_id: User identifier
user_preferences: Dict with user interests and preferences
seen_content_ids: List of content IDs the user has already seen
num_recommendations: Number of recommendations to return
"""
# Build filter to exclude seen content and match preferences
combined_filter = K.ID.not_in(seen_content_ids)
# Filter by preferred categories
if user_preferences.get("categories"):
combined_filter &= K("category").is_in(user_preferences["categories"])
# Filter by language preference
if user_preferences.get("language"):
combined_filter &= K("language") == user_preferences["language"]
# Filter by minimum rating
min_rating = user_preferences.get("min_rating", 3.5)
combined_filter &= K("rating") >= min_rating
# Only show published content
combined_filter &= K("status") == "published"
# Create hybrid search combining multiple signals
# Signal 1: User interest embedding
user_interest_query = " ".join(user_preferences.get("interests", ["general"]))
# Signal 2: Similar to user's favorite content
favorite_topics_query = " ".join(user_preferences.get("favorite_topics", []))
# Use RRF to combine both signals
hybrid_rank = Rrf(
ranks=[
Knn(query=user_interest_query, return_rank=True, limit=200),
Knn(query=favorite_topics_query, return_rank=True, limit=200)
],
weights=[0.6, 0.4], # User interests weighted higher
k=60
)
search = (Search()
.where(combined_filter)
.rank(hybrid_rank)
.limit(num_recommendations)
.select(K.DOCUMENT, K.SCORE, "title", "category", "author",
"rating", "published_date", "thumbnail_url"))
results = collection.search(search)
rows = results.rows()[0]
# Format recommendations
recommendations = []
for row in rows:
recommendations.append({
"id": row["id"],
"title": row["metadata"]["title"],
"description": row["document"][:150] + "...",
"category": row["metadata"]["category"],
"author": row["metadata"]["author"],
"rating": row["metadata"]["rating"],
"published_date": row["metadata"]["published_date"],
"thumbnail_url": row["metadata"]["thumbnail_url"],
"relevance_score": row["score"]
})
return recommendations
# Example usage
user_preferences = {
"interests": ["machine learning", "artificial intelligence", "data science"],
"favorite_topics": ["neural networks", "deep learning", "transformers"],
"categories": ["technology", "science", "research"],
"language": "en",
"min_rating": 4.0
}
seen_content = ["content_001", "content_045", "content_123"]
recommendations = get_recommendations(
collection,
user_id="user_42",
user_preferences=user_preferences,
seen_content_ids=seen_content,
num_recommendations=10
)
print("Personalized Recommendations:")
for i, rec in enumerate(recommendations, 1):
print(f"\n{i}. {rec['title']}")
print(f" Category: {rec['category']} | Author: {rec['author']}")
print(f" Rating: {rec['rating']}/5 | Published: {rec['published_date']}")
print(f" {rec['description']}")
print(f" Match Score: {rec['relevance_score']:.3f}")
Example output:
Personalized Recommendations: 1. Advanced Transformer Architectures in 2024 Category: technology | Author: Dr. Sarah Chen Rating: 4.5/5 | Published: 2024-10-15 An in-depth exploration of the latest transformer models and their applications in modern NLP tasks. This article covers attention mechanisms, positional encodings... Match Score: -0.0342 2. Practical Guide to Neural Network Optimization Category: research | Author: Prof. James Wilson Rating: 4.7/5 | Published: 2024-09-28 Learn cutting-edge techniques for optimizing deep neural networks, including adaptive learning rates, batch normalization strategies, and efficient backpropagation... Match Score: -0.0389
Example 3: Multi-Category Search with Batch Operations#
Use batch operations to search across multiple categories simultaneously and compare results.
from chromadb import Search, K, Knn
def search_across_categories(collection, user_query, categories, results_per_category=5):
"""
Search across multiple categories in parallel using batch operations.
Args:
collection: Chroma collection
user_query: User's search query
categories: List of categories to search
results_per_category: Number of results per category
"""
# Build a search for each category
searches = []
for category in categories:
search = (Search()
.where(K("category") == category)
.rank(Knn(query=user_query))
.limit(results_per_category)
.select(K.DOCUMENT, K.SCORE, "title", "category", "date"))
searches.append(search)
# Execute all searches in one batch
results = collection.search(searches)
# Process results by category
category_results = {}
for i, category in enumerate(categories):
rows = results.rows()[i]
category_results[category] = [
{
"id": row["id"],
"title": row["metadata"]["title"],
"description": row["document"][:100] + "...",
"date": row["metadata"]["date"],
"score": row["score"]
}
for row in rows
]
return category_results
# Example usage
query = "latest developments in renewable energy"
categories = ["technology", "science", "news", "research"]
results_by_category = search_across_categories(
collection,
user_query=query,
categories=categories,
results_per_category=3
)
# Display results
for category, results in results_by_category.items():
print(f"\n{'='*60}")
print(f"Category: {category.upper()}")
print('='*60)
if not results:
print(" No results found")
continue
for i, result in enumerate(results, 1):
print(f"\n {i}. {result['title']}")
print(f" Date: {result['date']}")
print(f" {result['description']}")
print(f" Relevance: {result['score']:.3f}")
Example output:
============================================================ Category: TECHNOLOGY ============================================================ 1. Solar Panel Efficiency Breakthrough Date: 2024-10-20 New silicon-carbon composite cells achieve 31% efficiency, setting industry records. Researchers at MIT have developed... Relevance: 0.245 2. Wind Turbine Design Innovations Date: 2024-10-15 Advanced blade designs increase energy capture by 18% while reducing noise pollution. The new turbines feature... Relevance: 0.289 ============================================================ Category: SCIENCE ============================================================ 1. Photosynthesis-Inspired Energy Storage Date: 2024-10-18 Scientists develop bio-inspired battery system that mimics natural photosynthesis for efficient solar energy storage... Relevance: 0.256
Best Practices#
Based on these examples, here are key best practices:
- Build filters incrementally - Construct complex filters by combining simpler conditions
- Use batch operations - When searching multiple variations, use batch operations for better performance
- Select only needed fields - Reduce data transfer by selecting only the fields you'll use
- Handle empty results gracefully - Always check if results exist before processing
- Use hybrid search for personalization - Combine multiple ranking signals with RRF for better recommendations
- Paginate large result sets - Use limit and offset for efficient pagination
- Format results for your use case - Transform raw results into application-specific formats
Next Steps#
- Review Search Basics for core concepts
- Learn about Filtering for advanced filter expressions
- Explore Ranking for custom scoring strategies
- See Hybrid Search for combining multiple ranking methods