Spaces:
Runtime error
Runtime error
ffreemt
commited on
Commit
•
21c3825
1
Parent(s):
e882a86
Add test.epub to docs
Browse files- .gitignore +3 -0
- app.py +4 -2
- docs/test.epub +0 -0
- epub_loader.py +38 -0
.gitignore
CHANGED
@@ -1,3 +1,6 @@
|
|
1 |
.venv
|
2 |
db
|
3 |
dummy
|
|
|
|
|
|
|
|
1 |
.venv
|
2 |
db
|
3 |
dummy
|
4 |
+
.ENV
|
5 |
+
.env
|
6 |
+
__pycache__
|
app.py
CHANGED
@@ -289,12 +289,12 @@ def ingest(
|
|
289 |
]
|
290 |
|
291 |
|
|
|
292 |
# https://huggingface.co/TheBloke/vicuna-7B-1.1-HF
|
293 |
def gen_local_llm(model_id="TheBloke/vicuna-7B-1.1-HF"):
|
294 |
"""Gen a local llm.
|
295 |
|
296 |
localgpt run_localgpt
|
297 |
-
|
298 |
https://medium.com/pytorch/bettertransformer-out-of-the-box-performance-for-huggingface-transformers-3fbe27d50ab2
|
299 |
with torch.device(“cuda”):
|
300 |
model = AutoModelForCausalLM.from_pretrained(“gpt2-large”, torch_dtype=torch.float16)
|
@@ -354,7 +354,9 @@ def load_qa(device=None, model_name: str = "hkunlp/instructor-base"):
|
|
354 |
llm = gen_local_llm() # "TheBloke/vicuna-7B-1.1-HF" 12G?
|
355 |
|
356 |
qa = RetrievalQA.from_chain_type(
|
357 |
-
llm=llm, chain_type="stuff",
|
|
|
|
|
358 |
)
|
359 |
|
360 |
logger.info("Done qa")
|
|
|
289 |
]
|
290 |
|
291 |
|
292 |
+
# TheBloke/Wizard-Vicuna-7B-Uncensored-HF
|
293 |
# https://huggingface.co/TheBloke/vicuna-7B-1.1-HF
|
294 |
def gen_local_llm(model_id="TheBloke/vicuna-7B-1.1-HF"):
|
295 |
"""Gen a local llm.
|
296 |
|
297 |
localgpt run_localgpt
|
|
|
298 |
https://medium.com/pytorch/bettertransformer-out-of-the-box-performance-for-huggingface-transformers-3fbe27d50ab2
|
299 |
with torch.device(“cuda”):
|
300 |
model = AutoModelForCausalLM.from_pretrained(“gpt2-large”, torch_dtype=torch.float16)
|
|
|
354 |
llm = gen_local_llm() # "TheBloke/vicuna-7B-1.1-HF" 12G?
|
355 |
|
356 |
qa = RetrievalQA.from_chain_type(
|
357 |
+
llm=llm, chain_type="stuff",
|
358 |
+
retriever=retriever,
|
359 |
+
return_source_documents=True,
|
360 |
)
|
361 |
|
362 |
logger.info("Done qa")
|
docs/test.epub
ADDED
Binary file (261 kB). View file
|
|
epub_loader.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Loads an epub file into a list of documents."""
|
2 |
+
from dataclasses import dataclass
|
3 |
+
from pathlib import Path
|
4 |
+
from typing import List, Union
|
5 |
+
|
6 |
+
from epub2txt import epub2txt
|
7 |
+
from langchain.docstore.document import Document
|
8 |
+
from langchain.document_loaders.base import BaseLoader
|
9 |
+
from loguru import logger
|
10 |
+
|
11 |
+
|
12 |
+
@dataclass
|
13 |
+
class EpubLoader(BaseLoader):
|
14 |
+
"""Load an epub file into a list of documents.
|
15 |
+
|
16 |
+
Args:
|
17 |
+
file_path: file path or url to epub
|
18 |
+
Returns:
|
19 |
+
self.load() -> list of Documents
|
20 |
+
"""
|
21 |
+
file_path: Union[str, Path]
|
22 |
+
|
23 |
+
def load(self) -> List[Document]:
|
24 |
+
"""Load data into document objects."""
|
25 |
+
try:
|
26 |
+
texts = epub2txt(self.file_path, outputlist=True)
|
27 |
+
ch_titles = epub2txt.content_titles
|
28 |
+
|
29 |
+
except Exception as exc:
|
30 |
+
logger.error(exc)
|
31 |
+
raise
|
32 |
+
|
33 |
+
docs = []
|
34 |
+
for title, text in zip(ch_titles, texts):
|
35 |
+
metadata = {"source": self.file_path, "ch.": title}
|
36 |
+
docs.append(Document(page_content=text, metadata=metadata))
|
37 |
+
|
38 |
+
return docs
|