Overview
Ideal multilingual model for high performance while keeping with open source. Works well on messy data. Good for short queries expected to return medium-length passages of text (1-2 paragraphs).
Installation
pip install --upgrade pinecone
Create index
from pinecone import Pinecone
pc = Pinecone(api_key="YOUR_API_KEY")
# Create a dense index with integrated inference
index_name = "multilingual-e5-large"
pc.create_index_for_ model(
name=index_name,
cloud="aws",
region="us-east-1",
embed={
"model": "multilingual-e5-large",
"field_map": {
"text": "text" # Map the record field to be embedded
}
}
)
index = pc.Index(index_name)
Embed & upsert
data = [
{"id": "vec1", "text": "Apple is a popular fruit known for its sweetness and crisp texture."},
{"id": "vec2", "text": "The tech company Apple is known for its innovative products like the iPhone."},
{"id": "vec3", "text": "Many people enjoy eating apples as a healthy snack."},
{"id": "vec4", "text": "Apple Inc. has revolutionized the tech industry with its sleek designs and user-friendly interfaces."},
{"id": "vec5", "text": "An apple a day keeps the doctor away, as the saying goes."},
{"id": "vec6", "text": "Apple Computer Company was founded on April 1, 1976, by Steve Jobs, Steve Wozniak, and Ronald Wayne as a partnership."}
]
index.upsert_records(
namespace="example-namespace",
records=data
)
Query
query_payload = {
"inputs": {
"text": "Tell me about the tech company known as Apple."
},
"top_k": 3
}
results = index.search(
namespace="example-namespace",
query=query_payload
)
print(results)