Bot_Development / script /get_topic.py
dsmultimedika's picture
Build Application
9002555
raw
history blame
No virus
2.44 kB
import nest_asyncio
import os
from dotenv import load_dotenv
from jinja2 import Template
from pydantic import BaseModel, Field
from pymongo.mongo_client import MongoClient
from llama_index.program.openai import OpenAIPydanticProgram
from llama_index.core.extractors import PydanticProgramExtractor
from llama_index.llms.openai import OpenAI
from core.prompt import ADD_METADATA_TEMPLATE
from core.summarization.summarizer import SummarizeGenerator
nest_asyncio.apply()
load_dotenv()
class NodeMetadata(BaseModel):
"""Metadata for nodes, capturing topic and subtopic from the book."""
topic: str = Field(
...,
description="The main subject or category that the node is associated with, representing a broad theme within the book.",
)
subtopic: str = Field(
...,
description="A more specific aspect or section under the main topic, refining the context of the node within the book.",
)
def extract_topic(references, content_table):
uri = os.getenv("MONGO_URI")
client = MongoClient(uri)
try:
client.admin.command('ping')
print("Pinged your deployment. You successfully connected to MongoDB!")
except Exception as e:
print(e)
# Access a specific database
db = client["summarizer"]
# Access a collection within the database
collection = db["topic_collection"]
generate_content_table = SummarizeGenerator(references)
extractor_output, extractor_dics = generate_content_table.extract_content_table(content_table)
print(extractor_output)
data_to_insert = {
"title": references["title"],
**extractor_dics # Unpack the extractor_output dictionary
}
collection.insert_one(data_to_insert)
add_metadata_template = str(
Template(ADD_METADATA_TEMPLATE).render(extractor_output=extractor_output)
)
print("add metadata template : ", add_metadata_template)
llm = OpenAI(temperature=0.1, model="gpt-4o-mini")
openai_program = OpenAIPydanticProgram.from_defaults(
output_cls=NodeMetadata,
prompt_template_str="{input}",
extract_template_str=add_metadata_template,
llm=llm,
)
topic_extractor = PydanticProgramExtractor(
program=openai_program,
input_key="input",
show_progress=True,
extract_template_str=add_metadata_template,
llm=llm,
)
return topic_extractor