This project is for research purposes only. Third-party datasets may be subject to additional terms and conditions under their associated licenses. Please refer to specific papers for more details:
import torch
import torch.nn.functional as F
from torch import Tensor
from transformers import AutoTokenizer, AutoModel
deflast_token_pool(last_hidden_states: Tensor, attention_mask: Tensor) -> Tensor:
left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
if left_padding:
return last_hidden_states[:, -1]
else:
sequence_lengths = attention_mask.sum(dim=1) - 1
batch_size = last_hidden_states.shape[0]
return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
defget_detailed_instruct(task_description: str, query: str) -> str:
returnf'Instruct: {task_description}\nQuery: {query}'# Each query must come with a one-sentence instruction that describes the task
task = 'Given a web search query, retrieve relevant passages that answer the query'
queries = [
get_detailed_instruct(task, 'How to bake a chocolate cake'),
get_detailed_instruct(task, 'Symptoms of the flu')
]
# No need to add instruction for retrieval documents
passages = [
"To bake a delicious chocolate cake, you'll need the following ingredients: all-purpose flour, sugar, cocoa powder, baking powder, baking soda, salt, eggs, milk, vegetable oil, and vanilla extract. Start by preheating your oven to 350°F (175°C). In a mixing bowl, combine the dry ingredients (flour, sugar, cocoa powder, baking powder, baking soda, and salt). In a separate bowl, whisk together the wet ingredients (eggs, milk, vegetable oil, and vanilla extract). Gradually add the wet mixture to the dry ingredients, stirring until well combined. Pour the batter into a greased cake pan and bake for 30-35 minutes. Let it cool before frosting with your favorite chocolate frosting. Enjoy your homemade chocolate cake!",
"The flu, or influenza, is an illness caused by influenza viruses. Common symptoms of the flu include a high fever, chills, cough, sore throat, runny or stuffy nose, body aches, headache, fatigue, and sometimes nausea and vomiting. These symptoms can come on suddenly and are usually more severe than the common cold. It's important to get plenty of rest, stay hydrated, and consult a healthcare professional if you suspect you have the flu. In some cases, antiviral medications can help alleviate symptoms and reduce the duration of the illness."
]
# load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained('Salesforce/SFR-Embedding-Mistral')
model = AutoModel.from_pretrained('Salesforce/SFR-Embedding-Mistral')
# get the embeddings
max_length = 4096
input_texts = queries + passages
batch_dict = tokenizer(input_texts, max_length=max_length, padding=True, truncation=True, return_tensors="pt")
outputs = model(**batch_dict)
embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
# normalize embeddings
embeddings = F.normalize(embeddings, p=2, dim=1)
scores = (embeddings[:2] @ embeddings[2:].T) * 100print(scores.tolist())
# [[86.7153549194336, 36.64569091796875], [35.00493621826172, 82.0738525390625]]
Sentence Transformers
from sentence_transformers import SentenceTransformer, util
model = SentenceTransformer("Salesforce/SFR-Embedding-Mistral")
defget_detailed_instruct(task_description: str, query: str) -> str:
returnf'Instruct: {task_description}\nQuery: {query}'# Each query must come with a one-sentence instruction that describes the task
task = 'Given a web search query, retrieve relevant passages that answer the query'
queries = [
get_detailed_instruct(task, 'How to bake a chocolate cake'),
get_detailed_instruct(task, 'Symptoms of the flu')
]
# No need to add instruction for retrieval documents
passages = [
"To bake a delicious chocolate cake, you'll need the following ingredients: all-purpose flour, sugar, cocoa powder, baking powder, baking soda, salt, eggs, milk, vegetable oil, and vanilla extract. Start by preheating your oven to 350°F (175°C). In a mixing bowl, combine the dry ingredients (flour, sugar, cocoa powder, baking powder, baking soda, and salt). In a separate bowl, whisk together the wet ingredients (eggs, milk, vegetable oil, and vanilla extract). Gradually add the wet mixture to the dry ingredients, stirring until well combined. Pour the batter into a greased cake pan and bake for 30-35 minutes. Let it cool before frosting with your favorite chocolate frosting. Enjoy your homemade chocolate cake!",
"The flu, or influenza, is an illness caused by influenza viruses. Common symptoms of the flu include a high fever, chills, cough, sore throat, runny or stuffy nose, body aches, headache, fatigue, and sometimes nausea and vomiting. These symptoms can come on suddenly and are usually more severe than the common cold. It's important to get plenty of rest, stay hydrated, and consult a healthcare professional if you suspect you have the flu. In some cases, antiviral medications can help alleviate symptoms and reduce the duration of the illness."
]
embeddings = model.encode(queries + passages)
scores = util.cos_sim(embeddings[:2], embeddings[2:]) * 100print(scores.tolist())
# [[86.71537780761719, 36.645721435546875], [35.00497055053711, 82.07388305664062]]
SFR-Embedding Team (∗indicates lead contributors).
Rui Meng*
Ye Liu*
Shafiq Rayhan Joty
Caiming Xiong
Yingbo Zhou
Semih Yavuz
Citation
@misc{SFRAIResearch2024,
title={SFR-Embedding-Mistral:Enhance Text Retrieval with Transfer Learning},
author={Rui Meng, Ye Liu, Shafiq Rayhan Joty, Caiming Xiong, Yingbo Zhou, Semih Yavuz},
howpublished={Salesforce AI Research Blog},
year={2024},
url={https://blog.salesforceairesearch.com/sfr-embedded-mistral/}
}
Runs of Salesforce SFR-Embedding-Mistral on huggingface.co
34.9K
Total runs
0
24-hour runs
795
3-day runs
3.0K
7-day runs
-1.3K
30-day runs
More Information About SFR-Embedding-Mistral huggingface.co Model
SFR-Embedding-Mistral huggingface.co is an AI model on huggingface.co that provides SFR-Embedding-Mistral's model effect (), which can be used instantly with this Salesforce SFR-Embedding-Mistral model. huggingface.co supports a free trial of the SFR-Embedding-Mistral model, and also provides paid use of the SFR-Embedding-Mistral. Support call SFR-Embedding-Mistral model through api, including Node.js, Python, http.
SFR-Embedding-Mistral huggingface.co is an online trial and call api platform, which integrates SFR-Embedding-Mistral's modeling effects, including api services, and provides a free online trial of SFR-Embedding-Mistral, you can try SFR-Embedding-Mistral online for free by clicking the link below.
Salesforce SFR-Embedding-Mistral online free url in huggingface.co:
SFR-Embedding-Mistral is an open source model from GitHub that offers a free installation service, and any user can find SFR-Embedding-Mistral on GitHub to install. At the same time, huggingface.co provides the effect of SFR-Embedding-Mistral install, users can directly use SFR-Embedding-Mistral installed effect in huggingface.co for debugging and trial. It also supports api for free installation.
SFR-Embedding-Mistral install url in huggingface.co: