Spaces:
Runtime error
Runtime error
File size: 1,265 Bytes
9e80f82 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
from embedding import embedding
from preprocessing import preprocess
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import streamlit as st
def pipeline(input_doc:str , ori_documents, embedding_type='bert'):
documents = np.array([doc['content'] for doc in ori_documents])
documents = np.insert(documents, 0, input_doc)
# st.write(documents)
preprocessed_documents = preprocess(documents)
# st.write(preprocessed_documents)
print("Encoding with BERT...")
documents_vectors = embedding(preprocessed_documents, embedding=embedding_type)
print("Encoding finished")
#compute cosine similarity
pairwise = cosine_similarity(documents_vectors)
#only retain useful information
pairwise = pairwise[0,1:]
sorted_idx = np.argsort(pairwise)[::-1]
result_pairwise = pairwise[sorted_idx]
results = []
print('Resume ranking:')
for idx in sorted_idx:
single_result = {
'rank': idx,
'name': ori_documents[idx]['name'],
'similarity': pairwise[idx].item()
}
results.append(single_result)
print(f'Resume of candidite {idx}')
print(f'Cosine Similarity: {pairwise[idx]}\n')
return results, result_pairwise |