PinoCorgi commited on
Commit
2a1f5d0
β€’
1 Parent(s): 995d9e6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from bs4 import BeautifulSoup
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+ import pickle
5
+ import torch
6
+ import io
7
+
8
+ class CPU_Unpickler(pickle.Unpickler):
9
+ def find_class(self, module, name):
10
+ if module == 'torch.storage' and name == '_load_from_bytes':
11
+ return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
12
+ else: return super().find_class(module, name)
13
+
14
+
15
+ def get_hugging_face_model():
16
+ model_name = "mchochlov/codebert-base-cd-ft"
17
+ hf = HuggingFaceEmbeddings(model_name=model_name)
18
+ return hf
19
+
20
+
21
+ def get_db():
22
+ with open("codesearchdb.pickle", "rb") as f:
23
+ db = CPU_Unpickler(f).load()
24
+ return db
25
+
26
+
27
+ def get_similar_links(query, db, embeddings):
28
+ embedding_vector = embeddings.embed_query(query)
29
+ docs_and_scores = db.similarity_search_by_vector(embedding_vector, k = 10)
30
+ hrefs = []
31
+ for docs in docs_and_scores:
32
+ html_doc = docs.page_content
33
+ soup = BeautifulSoup(html_doc, 'html.parser')
34
+ href = [a['href'] for a in soup.find_all('a', href=True)]
35
+ hrefs.append(href)
36
+ links = []
37
+ for href_list in hrefs:
38
+ for link in href_list:
39
+ links.append(link)
40
+ return links
41
+
42
+
43
+ def find_similar_questions(text_input):
44
+ embedding_vector = get_hugging_face_model()
45
+ db = get_db()
46
+ query = text_input
47
+ answer = get_similar_links(query, db, embedding_vector)
48
+ return "\n".join(set(answer))
49
+
50
+
51
+ iface = gr.Interface(
52
+ fn=find_similar_questions,
53
+ inputs=gr.inputs.Textbox(lines=20, label="Enter a Code Example"),
54
+ outputs=gr.outputs.Textbox(label="Similar Questions on Leetcode"),
55
+ title="πŸ“’ DSASearch Engine πŸ€–",
56
+ description="Find similar questions on Leetcode based on a code example.",
57
+ allow_flagging=False,
58
+ )
59
+
60
+ iface.launch()