File size: 699 Bytes
2a00cb3
1ad4e76
2a00cb3
8765030
2a00cb3
1ad4e76
2a00cb3
 
 
8765030
 
1ad4e76
 
 
8765030
1ad4e76
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import os
from fastapi import HTTPException
import gensim.downloader as api

# Ensure the environment variable is set correctly
gensim_data_dir = os.getenv("GENSIM_DATA_DIR", "/home/user/gensim-data")

# Load the GloVe model
model = api.load("glove-wiki-gigaword-50")


def get_embedding(word: str) -> list:
    min_val = -5.4593
    max_val = 5.3101
    global model
    try:
        embediing = model[word.lower()]
        # normalize vector min max to -1 to 1
        embediing = (embediing - min_val) / (max_val - min_val) * 2 - 1
        return embediing
    except KeyError:
        print("Word not in vocabulary")
        raise HTTPException(status_code=404, detail="Word not in vocabulary")