emb-rep / FastAPI /app /utils /embedding.py
abadesalex's picture
bar graphs
1ad4e76
raw
history blame contribute delete
699 Bytes
import os
from fastapi import HTTPException
import gensim.downloader as api
# Ensure the environment variable is set correctly
gensim_data_dir = os.getenv("GENSIM_DATA_DIR", "/home/user/gensim-data")
# Load the GloVe model
model = api.load("glove-wiki-gigaword-50")
def get_embedding(word: str) -> list:
min_val = -5.4593
max_val = 5.3101
global model
try:
embediing = model[word.lower()]
# normalize vector min max to -1 to 1
embediing = (embediing - min_val) / (max_val - min_val) * 2 - 1
return embediing
except KeyError:
print("Word not in vocabulary")
raise HTTPException(status_code=404, detail="Word not in vocabulary")