Bot_Development / script /get_metadata.py
dsmultimedika's picture
Update Repository
0743bb0
# Get reference
class Metadata:
def __init__(self, reference):
self.reference = reference
def add_metadata(self, documents, metadata):
"""Add metadata to each document and include page number."""
for page_number, document in enumerate(documents, start=1):
# Ensure the document has a metadata attribute
if not hasattr(document, "metadata") or document.metadata is None:
document.metadata = {}
# Update metadata with page number
document.metadata["page"] = page_number
document.metadata.update(metadata)
print(f"Metadata added to page {page_number}")
# self.logger.log_action(f"Metadata added to document {document.id_}", action_type="METADATA")
return documents
def _generate_metadata(self):
"""Generate metadata and return it."""
metadata = {
"title": self.reference["title"],
"author": self.reference["author"],
"category": self.reference["category"],
"year": self.reference["year"],
"publisher": self.reference["publisher"],
"reference": f"{self.reference['author']}. ({self.reference['year']}). *{self.reference['title']}*. {self.reference['publisher']}." # APA style reference
}
print("metadata is generated")
return metadata
def apply_metadata(self, documents):
"""Apply generated metadata to documents."""
metadata = self._generate_metadata()
print("metadata is applied")
return self.add_metadata(documents, metadata)