Spaces:
Sleeping
Sleeping
import os | |
import streamlit as st | |
from pinecone import Pinecone, ServerlessSpec | |
from sentence_transformers import SentenceTransformer | |
# Title of the Streamlit App | |
st.title("Pinecone Index Management with Streamlit") | |
# Initialize Pinecone globally | |
index = None | |
# Function to initialize Pinecone | |
def initialize_pinecone(): | |
api_key = os.getenv('PINECONE_API_KEY') # Get Pinecone API key from environment variable | |
if api_key: | |
# Initialize Pinecone client using the new class instance method | |
pc = Pinecone(api_key=api_key) | |
return pc | |
else: | |
st.error("Pinecone API key not found! Please set the PINECONE_API_KEY environment variable.") | |
return None | |
# Function to create or connect to an index | |
def create_or_connect_index(pc, index_name, dimension): | |
if index_name not in pc.list_indexes().names(): | |
# Create index if it doesn't exist | |
pc.create_index( | |
name=index_name, | |
dimension=dimension, | |
metric='dotproduct', # Change this based on your use case | |
spec=ServerlessSpec(cloud='aws', region='us-west-2') # Change to your cloud provider and region | |
) | |
st.success(f"Created new index '{index_name}'") | |
else: | |
st.info(f"Index '{index_name}' already exists.") | |
# Connect to the index | |
index = pc.Index(index_name) | |
return index | |
# Function to encode query using sentence transformers model | |
def encode_query(model, query_text): | |
return model.encode(query_text).tolist() | |
# Initialize Pinecone | |
pc = initialize_pinecone() | |
# If Pinecone initialized successfully, proceed with index management | |
if pc: | |
index_name = st.text_input("Enter Index Name", "my_index") | |
dimension = st.number_input("Enter Vector Dimension", min_value=1, value=768) | |
# Button to create or connect to index | |
if st.button("Create or Connect to Index"): | |
global index # Make index a global variable | |
index = create_or_connect_index(pc, index_name, dimension) | |
if index: | |
st.success(f"Successfully connected to index '{index_name}'") | |
# Model for query encoding | |
model = SentenceTransformer('msmarco-bert-base-dot-v5') | |
# Query input | |
query_text = st.text_input("Enter a Query to Search", "Can clinicians use the PHQ-9 to assess depression?") | |
# Button to encode query and search the Pinecone index | |
if st.button("Search Query"): | |
if query_text and index: | |
dense_vector = encode_query(model, query_text) | |
st.write(f"Encoded Query Vector: {dense_vector}") | |
# Search the index (sparse values can be added here as well) | |
results = index.query( | |
vector=dense_vector, | |
top_k=5, | |
include_metadata=True | |
) | |
st.write("Search Results:") | |
for match in results.matches: | |
st.write(f"ID: {match.id}, Score: {match.score}, Metadata: {match.metadata}") | |
else: | |
st.error("Please enter a query and ensure the index is initialized.") | |
# Option to delete index | |
if st.button("Delete Index"): | |
if pc and index_name in pc.list_indexes().names(): | |
pc.delete_index(index_name) | |
st.success(f"Index '{index_name}' deleted successfully.") | |
else: | |
st.error("Index not found.") | |