Test / embedder.py
ArunSamespace's picture
Upload 8 files
9921884 verified
from typing import List
import requests
from langchain.pydantic_v1 import BaseModel
from langchain.schema.embeddings import Embeddings
from retry import retry
from tqdm import tqdm
# @dataclass
class CustomEmbeddings(BaseModel, Embeddings):
"""Wrapper around OpenAI embedding models.
To use, you should have the ``openai`` python package installed, and the
environment variable ``OPENAI_API_KEY`` set with your API key or pass it
as a named parameter to the constructor.
Example:
.. code-block:: python
from langchain.embeddings import OpenAIEmbeddings
openai = OpenAIEmbeddings(model_name="davinci", openai_api_key="my-api-key")
"""
model: str = ""
model_url: str = ""
api_key: str = "EMPTY"
# engine: str = None
# api_type: str = None
def _embedding_func(self, text: str) -> List[float]:
"""Call out to OpenAI's embedding endpoint."""
# replace newlines, which can negatively affect performance.
text = text.replace("\n", " ")
result = self.api_call(input_text=text)
return result['data'][0]['embedding']
@retry(tries=3, delay=2, backoff=2, exceptions=(requests.RequestException,))
def api_call(self, input_text: str):
data = {
"input": input_text,
"model": self.model
}
response = requests.post(
self.model_url,
headers={
"Content-Type": "application/json",
# "Authorization": f"Bearer {self.api_key}",
"api-key": self.api_key
},
json=data
)
if response.status_code == 200:
return response.json()
else:
response.raise_for_status()
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Call out to OpenAI's embedding endpoint for embedding search docs.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
return [self._embedding_func(text) for text in tqdm(texts)]
def embed_query(self, text: str) -> List[float]:
"""Call out to OpenAI's embedding endpoint for embedding query text.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
return self._embedding_func(text)