Spaces:

taishi-i
/

awesome-ChatGPT-repositories-search

Running

File size: 5,297 Bytes

import difflib
import json

import numpy as np
import streamlit as st
from pyserini.search.lucene import LuceneSearcher


def read_json(file_name):
    with open(file_name, "r") as f:
        json_data = json.load(f)
    return json_data


class SearchApplication:
    def __init__(self):
        self.title = "Awesome ChatGPT repositories search"

        self.set_page_config()
        self.searcher = self.set_searcher()

        st.header(self.title)
        col1, col2 = st.columns(2)
        with col1:
            self.query = st.text_input("Search English words", value="")

        with col2:
            st.write("#")
            self.search_button = st.button("🔎")

        st.caption(
            "You can search for open-source software from [1250+ "
            " repositories](https://github.com/taishi-i/awesome-ChatGPT-repositories)."
        )
        st.write("#")

        candidate_words_file = "candidate_words.json"
        candidate_words_json = read_json(candidate_words_file)
        self.candidate_words = candidate_words_json["candidate_words"]

        self.show_popular_words()
        self.show_search_results()

    def set_page_config(self):
        st.set_page_config(
            page_title=self.title,
            page_icon="😎",
            layout="centered",
        )

    def set_searcher(self):
        searcher = LuceneSearcher("indexes/docs")
        return searcher

    def show_popular_words(self):
        st.caption("Popular words")

        word1, word2, word3, word4, word5, word6 = st.columns(6)
        with word1:
            button1 = st.button("Prompt")
            if button1:
                self.query = "prompt"

        with word2:
            button2 = st.button("Chatbot")
            if button2:
                self.query = "chatbot"

        with word3:
            button3 = st.button("Langchain")
            if button3:
                self.query = "langchain"

        with word4:
            button4 = st.button("Extension")
            if button4:
                self.query = "extension"

        with word5:
            button5 = st.button("LLMs")
            if button5:
                self.query = "llms"

        with word6:
            button6 = st.button("API")
            if button6:
                self.query = "api"

    def show_search_results(self):
        if self.query or self.search_button:
            st.write("#")

            search_results = self.searcher.search(self.query, k=500)
            num_search_results = len(search_results)
            st.write(f"A total of {num_search_results} repositories found.")

            if num_search_results > 0:
                json_search_results = []
                for result in search_results:
                    # print(result.lucene_document.getValues())
                    # print(result.lucene_document())
                    # print(result.lucene_document.toString())
                    # json_data = json.loads(result.raw)
                    docid = result.docid
                    doc = self.searcher.doc(docid)
                    print(docid)
                    print(doc)
                    print(doc.raw())
                    # print(doc.get("contents"))
                    json_data = json.loads(doc.raw())
                    # json_data = doc.get("contents")
                    json_search_results.append(json_data)

                for json_data in sorted(
                    json_search_results, key=lambda x: x["freq"], reverse=True
                ):
                    description = json_data["description"]
                    url = json_data["url"]
                    project_name = json_data["project_name"]

                    st.write("---")
                    st.subheader(f"[{project_name}]({url})")
                    st.write(description)

                    info = []
                    language = json_data["language"]
                    if language is not None and len(language) > 0:
                        info.append(language)
                    else:
                        info.append("Laugage: Unkwown")

                    license = json_data["license"]
                    if license is None:
                        info.append("License: Unkwown")
                    else:
                        info.append(license)

                    st.caption(" / ".join(info))

            else:
                if len(self.query) > 0:
                    scores = []
                    for candidate_word in self.candidate_words:
                        score = difflib.SequenceMatcher(
                            None, self.query, candidate_word
                        ).ratio()
                        scores.append(score)

                    num_candidate_words = 6

                    indexes = np.argsort(scores)[::-1][:num_candidate_words]
                    suggestions = [self.candidate_words[i] for i in indexes]
                    suggestions = sorted(
                        set(suggestions), key=suggestions.index
                    )
                    st.caption("Suggestions")
                    for i, word in enumerate(suggestions, start=1):
                        st.write(f"{i}: {word}")


def main():
    SearchApplication()


if __name__ == "__main__":
    main()