File size: 1,131 Bytes
47b5f0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from app.infrastructure.models.my_models import ChunksResponse
from app.infrastructure.repository.document_handeler_repository import (
    DocumentHandelerRepository,
)


class GetAllChunkedTextFeature:
    def __init__(self, document_handeler_repository: DocumentHandelerRepository):
        self.document_handeler_repository = document_handeler_repository

    async def get_all_chunked_text(self):

        qdrant_response = self.document_handeler_repository.get_all_documents()

        transformed_data = {}

        for document in qdrant_response[0]:
            document_id = document.payload["document_id"]
            chunk_index = document.payload["chunk_index"]
            text = document.payload["chunk-text"]

            if document_id not in transformed_data:
                transformed_data[document_id] = []

            transformed_data[document_id].append({"index": chunk_index, "text": text})

        for doc in transformed_data:
            transformed_data[doc] = sorted(
                transformed_data[doc], key=lambda x: x["index"]
            )

        return ChunksResponse(data=transformed_data)