Spaces:
Running
Running
from app.infrastructure.models.my_models import ChunksResponse | |
from app.infrastructure.repository.document_handeler_repository import ( | |
DocumentHandelerRepository, | |
) | |
class GetAllChunkedTextFeature: | |
def __init__(self, document_handeler_repository: DocumentHandelerRepository): | |
self.document_handeler_repository = document_handeler_repository | |
async def get_all_chunked_text(self): | |
qdrant_response = self.document_handeler_repository.get_all_documents() | |
transformed_data = {} | |
for document in qdrant_response[0]: | |
document_id = document.payload["document_id"] | |
chunk_index = document.payload["chunk_index"] | |
text = document.payload["chunk-text"] | |
if document_id not in transformed_data: | |
transformed_data[document_id] = [] | |
transformed_data[document_id].append({"index": chunk_index, "text": text}) | |
for doc in transformed_data: | |
transformed_data[doc] = sorted( | |
transformed_data[doc], key=lambda x: x["index"] | |
) | |
return ChunksResponse(data=transformed_data) | |