SuperlinkedRetriever Examples
This notebook demonstrates how to build a Superlinked App and Query Descriptor and use them with the LangChain SuperlinkedRetriever
.
Install the integration from PyPI:
pip install -U langchain-superlinked superlinked
Setupโ
Install the integration and its peer dependency:
pip install -U langchain-superlinked superlinked
Instantiationโ
See below for creating a Superlinked App (sl_client
) and a QueryDescriptor
(sl_query
), then wiring them into SuperlinkedRetriever
.
Usageโ
Call retriever.invoke(query_text, **params)
to retrieve Document
objects. Examples below show single-space and multi-space setups.
Use within a chainโ
The retriever can be used in LangChain chains by piping it into your prompt and model. See the main Superlinked retriever page for a full RAG example.
API referenceโ
Refer to the API docs:
import superlinked.framework as sl
from langchain_superlinked import SuperlinkedRetriever
from datetime import timedelta
# Define schema
class DocumentSchema(sl.Schema):
id: sl.IdField
content: sl.String
doc_schema = DocumentSchema()
# Space + index
text_space = sl.TextSimilaritySpace(
text=doc_schema.content, model="sentence-transformers/all-MiniLM-L6-v2"
)
doc_index = sl.Index([text_space])
# Query descriptor
query = (
sl.Query(doc_index)
.find(doc_schema)
.similar(text_space.text, sl.Param("query_text"))
.select([doc_schema.content])
.limit(sl.Param("limit"))
)
# Minimal app
source = sl.InMemorySource(schema=doc_schema)
executor = sl.InMemoryExecutor(sources=[source], indices=[doc_index])
app = executor.run()
# Data
source.put(
[
{"id": "1", "content": "Machine learning algorithms process data efficiently."},
{
"id": "2",
"content": "Natural language processing understands human language.",
},
{"id": "3", "content": "Deep learning models require significant compute."},
]
)
# Retriever
retriever = SuperlinkedRetriever(
sl_client=app, sl_query=query, page_content_field="content"
)
retriever.invoke("artificial intelligence", limit=2)
# Multi-space example (blog posts)
class BlogPostSchema(sl.Schema):
id: sl.IdField
title: sl.String
content: sl.String
category: sl.String
published_date: sl.Timestamp
blog = BlogPostSchema()
content_space = sl.TextSimilaritySpace(
text=blog.content, model="sentence-transformers/all-MiniLM-L6-v2"
)
title_space = sl.TextSimilaritySpace(
text=blog.title, model="sentence-transformers/all-MiniLM-L6-v2"
)
cat_space = sl.CategoricalSimilaritySpace(
category_input=blog.category, categories=["technology", "science", "business"]
)
recency_space = sl.RecencySpace(
timestamp=blog.published_date,
period_time_list=[
sl.PeriodTime(timedelta(days=30)),
sl.PeriodTime(timedelta(days=90)),
],
)
blog_index = sl.Index([content_space, title_space, cat_space, recency_space])
blog_query = (
sl.Query(
blog_index,
weights={
content_space: sl.Param("content_weight"),
title_space: sl.Param("title_weight"),
cat_space: sl.Param("category_weight"),
recency_space: sl.Param("recency_weight"),
},
)
.find(blog)
.similar(content_space.text, sl.Param("query_text"))
.select([blog.title, blog.content, blog.category, blog.published_date])
.limit(sl.Param("limit"))
)
source = sl.InMemorySource(schema=blog)
app = sl.InMemoryExecutor(sources=[source], indices=[blog_index]).run()
from datetime import datetime
source.put(
[
{
"id": "p1",
"title": "Intro to ML",
"content": "Machine learning 101",
"category": "technology",
"published_date": int((datetime.now() - timedelta(days=5)).timestamp()),
},
{
"id": "p2",
"title": "AI in Healthcare",
"content": "Transforming diagnosis",
"category": "science",
"published_date": int((datetime.now() - timedelta(days=15)).timestamp()),
},
]
)
blog_retriever = SuperlinkedRetriever(
sl_client=app,
sl_query=blog_query,
page_content_field="content",
metadata_fields=["title", "category", "published_date"],
)
blog_retriever.invoke(
"machine learning", content_weight=1.0, recency_weight=0.5, limit=2
)
Relatedโ
- Retriever conceptual guide
- Retriever how-to guides