File size: 1,839 Bytes
2a1f5d0 23dc09a ca2394b 2a1f5d0 eb6370b 2a1f5d0 eb6370b 2a1f5d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import gradio as gr
from bs4 import BeautifulSoup
from langchain.embeddings import HuggingFaceEmbeddings
import pickle
import torch
import io
class CPU_Unpickler(pickle.Unpickler):
def find_class(self, module, name):
if module == 'torch.storage' and name == '_load_from_bytes':
return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
else: return super().find_class(module, name)
def get_hugging_face_model():
model_name = "mchochlov/codebert-base-cd-ft"
hf = HuggingFaceEmbeddings(model_name=model_name)
return hf
def get_db():
with open("codesearchdb.pickle", "r") as f:
#db = CPU_Unpickler(f).load()
db = f.load()
return db
def get_similar_links(query, db, embeddings):
embedding_vector = embeddings.embed_query(query)
docs_and_scores = db.similarity_search_by_vector(embedding_vector, k = 10)
hrefs = []
for docs in docs_and_scores:
html_doc = docs.page_content
soup = BeautifulSoup(html_doc, 'html.parser')
href = [a['href'] for a in soup.find_all('a', href=True)]
hrefs.append(href)
links = []
for href_list in hrefs:
for link in href_list:
links.append(link)
return links
def find_similar_questions(text_input):
embedding_vector = get_hugging_face_model()
db = get_db()
query = text_input
answer = get_similar_links(query, db, embedding_vector)
return "\n".join(set(answer))
iface = gr.Interface(
fn=find_similar_questions,
inputs=gr.inputs.Textbox(lines=20, label="Enter a Code Example From Leetcode"),
outputs=gr.outputs.Textbox(label="Similar Questions on Leetcode"),
title="π DSASearch Engine π€",
description="Find similar questions on Leetcode based on a code example.",
allow_flagging=False,
)
iface.launch()
|