from llama_index.core import VectorStoreIndex from llama_index.core import StorageContext from pinecone import Pinecone, ServerlessSpec from llama_index.llms.openai import OpenAI from llama_index.vector_stores.pinecone import PineconeVectorStore from fastapi import HTTPException, status from config import PINECONE_CONFIG import os import json class IndexManager: def __init__(self): self.vector_index = None self.index_name = "summarizer-semantic-index" def _get_pinecone_client(self): """Initialize and return the Pinecone client.""" # api_key = os.getenv("PINECONE_API_KEY") api_key = PINECONE_CONFIG.PINECONE_API_KEY if not api_key: raise ValueError( "Pinecone API key is missing. Please set it in environment variables." ) return Pinecone(api_key=api_key) def _create_pinecone_index(self, client): """Create Pinecone index if it doesn't already exist.""" if self.index_name not in client.list_indexes().names(): client.create_index( name=self.index_name, dimension=1536, metric="cosine", spec=ServerlessSpec(cloud="aws", region="us-east-1"), ) return client.Index(self.index_name) def _initialize_vector_store(self, pinecone_index): """Initialize and return the vector store with the Pinecone index.""" vector_store = PineconeVectorStore(pinecone_index=pinecone_index) return StorageContext.from_defaults(vector_store=vector_store) def build_indexes(self, nodes): """Build vector and tree indexes from nodes.""" try: client = self._get_pinecone_client() pinecone_index = self._create_pinecone_index(client) storage_context = self._initialize_vector_store(pinecone_index) self.vector_index = VectorStoreIndex(nodes, storage_context=storage_context) self.vector_index.set_index_id("vector") print(f"Vector Index ID: {self.vector_index.index_id}") print("Vector Index created successfully.") response = { "status": "success", "message": "Existing Vector Index loaded successfully.", } return json.dumps(response) except HTTPException as http_exc: raise http_exc # Re-raise HTTPExceptions to ensure FastAPI handles them except Exception as e: raise HTTPException( status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=f"Error loading existing indexes: {str(e)}" ) def load_existing_indexes(self): """Load existing indexes from Pinecone.""" try: client = self._get_pinecone_client() pinecone_index = client.Index(self.index_name) print(pinecone_index.describe_index_stats()) vector_store = PineconeVectorStore(pinecone_index=pinecone_index) retriever = VectorStoreIndex.from_vector_store(vector_store) print("Existing Vector Index loaded successfully.") return retriever except Exception as e: print(f"Error loading existing indexes: {e}") raise