import os import streamlit as st from pinecone import Pinecone, ServerlessSpec from sentence_transformers import SentenceTransformer # Title of the Streamlit App st.title("Pinecone Index Management with Streamlit") # Function to initialize Pinecone def initialize_pinecone(): api_key = os.getenv('PINECONE_API_KEY') # Get Pinecone API key from environment variable if api_key: # Initialize Pinecone client pc = Pinecone(api_key=api_key) return pc else: st.error("Pinecone API key not found! Please set the PINECONE_API_KEY environment variable.") return None # Function to create or connect to an index def create_or_connect_index(pc, index_name, dimension): if index_name not in pc.list_indexes().names(): # Create index if it doesn't exist pc.create_index( name=index_name, dimension=dimension, metric='dotproduct', # Change this based on your use case spec=ServerlessSpec(cloud='aws', region='us-west-2') # Change to your cloud provider and region ) st.success(f"Created new index '{index_name}'") else: st.info(f"Index '{index_name}' already exists.") # Connect to the index index = pc.Index(index_name) return index # Function to encode query using sentence transformers model def encode_query(model, query_text): return model.encode(query_text).tolist() # Initialize Pinecone pc = initialize_pinecone() # If Pinecone initialized successfully, proceed with index management if pc: index_name = st.text_input("Enter Index Name", "my_index") dimension = st.number_input("Enter Vector Dimension", min_value=1, value=768) # Button to create or connect to index if st.button("Create or Connect to Index"): index = create_or_connect_index(pc, index_name, dimension) if index: st.success(f"Successfully connected to index '{index_name}'") # Model for query encoding model = SentenceTransformer('msmarco-bert-base-dot-v5') # Query input query_text = st.text_input("Enter a Query to Search", "Can clinicians use the PHQ-9 to assess depression?") # Button to encode query and search the Pinecone index if st.button("Search Query"): if query_text and index: dense_vector = encode_query(model, query_text) st.write(f"Encoded Query Vector: {dense_vector}") # Search the index (sparse values can be added here as well) results = index.query( vector=dense_vector, top_k=5, include_metadata=True ) st.write("Search Results:") for match in results.matches: st.write(f"ID: {match.id}, Score: {match.score}, Metadata: {match.metadata}") else: st.error("Please enter a query and ensure the index is initialized.") # Option to delete index if st.button("Delete Index"): if pc and index_name in pc.list_indexes().names(): pc.delete_index(index_name) st.success(f"Index '{index_name}' deleted successfully.") else: st.error("Index not found.")