File size: 5,297 Bytes
18c08a2 b7d0764 18c08a2 b7d0764 18c08a2 b7d0764 24d6a96 b7d0764 2d0606c b7d0764 18c08a2 b7d0764 1cdbe99 b7d0764 18c08a2 b7d0764 24d6a96 b7d0764 18c08a2 b7d0764 18c08a2 568fef8 2b88e44 ac09253 2b88e44 6cfc625 2b88e44 6cfc625 18c08a2 2d0606c 18c08a2 2d0606c 18c08a2 b7d0764 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import difflib
import json
import numpy as np
import streamlit as st
from pyserini.search.lucene import LuceneSearcher
def read_json(file_name):
with open(file_name, "r") as f:
json_data = json.load(f)
return json_data
class SearchApplication:
def __init__(self):
self.title = "Awesome ChatGPT repositories search"
self.set_page_config()
self.searcher = self.set_searcher()
st.header(self.title)
col1, col2 = st.columns(2)
with col1:
self.query = st.text_input("Search English words", value="")
with col2:
st.write("#")
self.search_button = st.button("🔎")
st.caption(
"You can search for open-source software from [1250+ "
" repositories](https://github.com/taishi-i/awesome-ChatGPT-repositories)."
)
st.write("#")
candidate_words_file = "candidate_words.json"
candidate_words_json = read_json(candidate_words_file)
self.candidate_words = candidate_words_json["candidate_words"]
self.show_popular_words()
self.show_search_results()
def set_page_config(self):
st.set_page_config(
page_title=self.title,
page_icon="😎",
layout="centered",
)
def set_searcher(self):
searcher = LuceneSearcher("indexes/docs")
return searcher
def show_popular_words(self):
st.caption("Popular words")
word1, word2, word3, word4, word5, word6 = st.columns(6)
with word1:
button1 = st.button("Prompt")
if button1:
self.query = "prompt"
with word2:
button2 = st.button("Chatbot")
if button2:
self.query = "chatbot"
with word3:
button3 = st.button("Langchain")
if button3:
self.query = "langchain"
with word4:
button4 = st.button("Extension")
if button4:
self.query = "extension"
with word5:
button5 = st.button("LLMs")
if button5:
self.query = "llms"
with word6:
button6 = st.button("API")
if button6:
self.query = "api"
def show_search_results(self):
if self.query or self.search_button:
st.write("#")
search_results = self.searcher.search(self.query, k=500)
num_search_results = len(search_results)
st.write(f"A total of {num_search_results} repositories found.")
if num_search_results > 0:
json_search_results = []
for result in search_results:
# print(result.lucene_document.getValues())
# print(result.lucene_document())
# print(result.lucene_document.toString())
# json_data = json.loads(result.raw)
docid = result.docid
doc = self.searcher.doc(docid)
print(docid)
print(doc)
print(doc.raw())
# print(doc.get("contents"))
json_data = json.loads(doc.raw())
# json_data = doc.get("contents")
json_search_results.append(json_data)
for json_data in sorted(
json_search_results, key=lambda x: x["freq"], reverse=True
):
description = json_data["description"]
url = json_data["url"]
project_name = json_data["project_name"]
st.write("---")
st.subheader(f"[{project_name}]({url})")
st.write(description)
info = []
language = json_data["language"]
if language is not None and len(language) > 0:
info.append(language)
else:
info.append("Laugage: Unkwown")
license = json_data["license"]
if license is None:
info.append("License: Unkwown")
else:
info.append(license)
st.caption(" / ".join(info))
else:
if len(self.query) > 0:
scores = []
for candidate_word in self.candidate_words:
score = difflib.SequenceMatcher(
None, self.query, candidate_word
).ratio()
scores.append(score)
num_candidate_words = 6
indexes = np.argsort(scores)[::-1][:num_candidate_words]
suggestions = [self.candidate_words[i] for i in indexes]
suggestions = sorted(
set(suggestions), key=suggestions.index
)
st.caption("Suggestions")
for i, word in enumerate(suggestions, start=1):
st.write(f"{i}: {word}")
def main():
SearchApplication()
if __name__ == "__main__":
main()
|