Spaces:
Runtime error
Runtime error
Daryl Fung
commited on
Commit
•
932db78
1
Parent(s):
6103344
added imgur bucket
Browse files- app.py +10 -1
- db/__init__.py +4 -0
- db/audio_db/is3/demo.py +32 -0
- db/audio_db/is3/is3.py +146 -0
- db/audio_db/is3/requirements.txt +0 -0
- db/audio_db/is3/utils.py +108 -0
- db/audio_db/is3/wrapper.py +97 -0
- db/create_db.py +52 -0
- db/db_connect.py +16 -0
- db/generate_audio.py +45 -0
- db/load_db.py +27 -0
- db/responses.txt +87 -0
- requirements.txt +3 -1
- responses.txt +0 -29
app.py
CHANGED
@@ -1,14 +1,23 @@
|
|
1 |
from fastapi import FastAPI
|
2 |
import uvicorn
|
|
|
3 |
from sentence_transformers import SentenceTransformer
|
4 |
|
5 |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
|
|
|
|
|
|
6 |
|
7 |
app = FastAPI()
|
8 |
|
|
|
|
|
9 |
@app.post('/tts')
|
10 |
async def transcribe(text: str):
|
11 |
-
embeddings = model.encode([text]
|
|
|
|
|
|
|
12 |
return embeddings[0]
|
13 |
|
14 |
|
|
|
1 |
from fastapi import FastAPI
|
2 |
import uvicorn
|
3 |
+
import faiss
|
4 |
from sentence_transformers import SentenceTransformer
|
5 |
|
6 |
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
7 |
+
index = faiss.IndexFlatL2(model.get_sentence_embedding_dimension()) # build the index
|
8 |
+
|
9 |
+
index.add(model.encode(['hello']))
|
10 |
|
11 |
app = FastAPI()
|
12 |
|
13 |
+
|
14 |
+
|
15 |
@app.post('/tts')
|
16 |
async def transcribe(text: str):
|
17 |
+
embeddings = model.encode([text])
|
18 |
+
|
19 |
+
# store the text to a file
|
20 |
+
|
21 |
return embeddings[0]
|
22 |
|
23 |
|
db/__init__.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dotenv import load_dotenv
|
2 |
+
|
3 |
+
load_dotenv('./.env')
|
4 |
+
|
db/audio_db/is3/demo.py
ADDED
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
|
3 |
+
|
4 |
+
async def demo():
|
5 |
+
import is3
|
6 |
+
|
7 |
+
# creating a new bucket
|
8 |
+
bucket = is3.Bucket('my-is3-bucket')
|
9 |
+
|
10 |
+
# adding items to a bucket and uploading them
|
11 |
+
some_object = {'hello': b'world', ('foo', 'bar'): {'baz'}}
|
12 |
+
bucket.stage_obj(some_object, 'my-object')
|
13 |
+
bucket.stage_obj(['another', 'one'], 'my-other-object')
|
14 |
+
|
15 |
+
await bucket.commit()
|
16 |
+
|
17 |
+
# loading a bucket from disk
|
18 |
+
bucket = is3.Bucket.load('my-is3-bucket')
|
19 |
+
|
20 |
+
# retrieving items stored in a bucket
|
21 |
+
retrieved_object = await bucket.get_obj('my-object')
|
22 |
+
assert retrieved_object == {'hello': b'world', ('foo', 'bar'): {'baz'}}
|
23 |
+
|
24 |
+
# delete a specific item in a bucket
|
25 |
+
await bucket.delete_obj('my-object')
|
26 |
+
|
27 |
+
# delete an entire bucket and its contents
|
28 |
+
await bucket.delete()
|
29 |
+
|
30 |
+
|
31 |
+
loop = asyncio.new_event_loop()
|
32 |
+
loop.run_until_complete(demo())
|
db/audio_db/is3/is3.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import asyncio
|
3 |
+
from pathlib import Path
|
4 |
+
from pydantic import BaseModel
|
5 |
+
|
6 |
+
from typing import Any
|
7 |
+
from PIL import Image
|
8 |
+
ObjectName = ObjectId = BucketName = str
|
9 |
+
|
10 |
+
from .utils import image_to_object, object_to_image, write_compressed, read_compressed
|
11 |
+
from .wrapper import ImgurClient as Imgur
|
12 |
+
|
13 |
+
BUCKETS_FOLDER = Path(__file__).parent / 'buckets'
|
14 |
+
EXTENSION = '.bkt'
|
15 |
+
|
16 |
+
def filename(name: str):
|
17 |
+
return name + EXTENSION
|
18 |
+
|
19 |
+
|
20 |
+
class UploadedObject(BaseModel):
|
21 |
+
"""Represents an object that has been uploaded"""
|
22 |
+
name: ObjectName
|
23 |
+
obj_id: ObjectId
|
24 |
+
deletehash: str
|
25 |
+
cached_obj: Any = None
|
26 |
+
|
27 |
+
def __getstate__(self):
|
28 |
+
d = super().__getstate__()
|
29 |
+
d['__dict__']['cached_obj'] = None
|
30 |
+
d['__fields_set__'].discard('cached_obj')
|
31 |
+
return d
|
32 |
+
|
33 |
+
async def download(self) -> Any:
|
34 |
+
"""Return the wrapped object."""
|
35 |
+
if self.cached_obj is not None:
|
36 |
+
return self.cached_obj
|
37 |
+
|
38 |
+
async with Imgur() as imgur:
|
39 |
+
img = await imgur.download_image(self.obj_id)
|
40 |
+
|
41 |
+
obj = self.cached_obj = image_to_object(img)
|
42 |
+
return obj
|
43 |
+
|
44 |
+
async def delete(self) -> None:
|
45 |
+
"""Delete the uploaded object"""
|
46 |
+
async with Imgur() as imgur:
|
47 |
+
await imgur.delete_image(self.deletehash)
|
48 |
+
|
49 |
+
|
50 |
+
class StagedObject(BaseModel):
|
51 |
+
"""Represents a bucket object that has been added to a bucket but not yet
|
52 |
+
uploaded"""
|
53 |
+
name: ObjectName
|
54 |
+
obj: Any
|
55 |
+
|
56 |
+
def image(self) -> Image.Image:
|
57 |
+
return object_to_image(self.obj)
|
58 |
+
|
59 |
+
async def upload(self) -> UploadedObject:
|
60 |
+
"""Upload the wrapped object and return an UploadedObject.
|
61 |
+
|
62 |
+
The wrapped object is cached to the UploadObject so that a retrieval
|
63 |
+
during the same runtime does not need to download the object.
|
64 |
+
"""
|
65 |
+
async with Imgur() as imgur:
|
66 |
+
oid, delete = await imgur.upload_image(self.image())
|
67 |
+
|
68 |
+
return UploadedObject(
|
69 |
+
name=self.name,
|
70 |
+
obj_id=oid,
|
71 |
+
deletehash=delete,
|
72 |
+
cached_obj=self.obj
|
73 |
+
)
|
74 |
+
|
75 |
+
|
76 |
+
class Bucket:
|
77 |
+
def __init__(self, name) -> None:
|
78 |
+
self.name = name
|
79 |
+
self.uploaded: dict[ObjectName, UploadedObject] = {}
|
80 |
+
self.pending: dict[ObjectName, StagedObject] = {}
|
81 |
+
|
82 |
+
def __repr__(self) -> str:
|
83 |
+
n_pending = len(self.pending)
|
84 |
+
n_uploaded = len(self.uploaded)
|
85 |
+
return f'<Bucket {self.name} (pending: {n_pending}, uploaded: {n_uploaded})>'
|
86 |
+
|
87 |
+
def _save(self):
|
88 |
+
"""Pickle and dump the bucket to the buckets folder"""
|
89 |
+
fn = filename(self.name)
|
90 |
+
write_compressed(pickle.dumps(self), BUCKETS_FOLDER / fn)
|
91 |
+
|
92 |
+
def stage_obj(self, obj: Any, name: str) -> None:
|
93 |
+
self.pending[name] = StagedObject(obj=obj, name=name)
|
94 |
+
|
95 |
+
self._save()
|
96 |
+
|
97 |
+
def unstage_obj(self, name: str) -> None:
|
98 |
+
del self.pending[name]
|
99 |
+
self._save()
|
100 |
+
|
101 |
+
async def commit(self):
|
102 |
+
"""Upload all staged objects"""
|
103 |
+
coros = [o.upload() for o in self.pending.values()]
|
104 |
+
|
105 |
+
# upload concurrently and filter out errors
|
106 |
+
results = await asyncio.gather(*coros, return_exceptions=True)
|
107 |
+
uploaded = [e for e in results if isinstance(e, UploadedObject)]
|
108 |
+
|
109 |
+
# remove succesful uploads from pending
|
110 |
+
for o in uploaded:
|
111 |
+
del self.pending[o.name]
|
112 |
+
|
113 |
+
# track uploaded objects
|
114 |
+
self.uploaded.update({o.name: o for o in uploaded})
|
115 |
+
|
116 |
+
self._save()
|
117 |
+
|
118 |
+
# warn about unuploaded pending objects
|
119 |
+
if self.pending:
|
120 |
+
msg = (
|
121 |
+
f"{len(self.pending)} objects failed to upload:\n" +
|
122 |
+
'\n'.join(o.name for o in self.pending.values())
|
123 |
+
)
|
124 |
+
raise Warning(msg)
|
125 |
+
|
126 |
+
async def get_obj(self, name: str) -> Any:
|
127 |
+
return await self.uploaded[name].download()
|
128 |
+
|
129 |
+
async def delete_obj(self, name: str) -> None:
|
130 |
+
"""Remove the object with the given name from uploaded objects"""
|
131 |
+
if not (o := self.uploaded.pop(name, 0)):
|
132 |
+
raise ValueError(f'No obj with name {name} found in {self}')
|
133 |
+
|
134 |
+
await o.delete()
|
135 |
+
self._save()
|
136 |
+
|
137 |
+
async def delete(self):
|
138 |
+
"""Delete the bucket and all objects it holds"""
|
139 |
+
coros = [o.delete() for o in self.uploaded.values()]
|
140 |
+
await asyncio.gather(*coros)
|
141 |
+
(BUCKETS_FOLDER / filename(self.name)).unlink()
|
142 |
+
|
143 |
+
@classmethod
|
144 |
+
def load(cls, name: str) -> "Bucket":
|
145 |
+
fn = filename(name)
|
146 |
+
return pickle.loads(read_compressed(BUCKETS_FOLDER / fn))
|
db/audio_db/is3/requirements.txt
ADDED
Binary file (170 Bytes). View file
|
|
db/audio_db/is3/utils.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io
|
2 |
+
import math
|
3 |
+
import zlib
|
4 |
+
import base64
|
5 |
+
import pickle
|
6 |
+
import numpy as np
|
7 |
+
from PIL import Image
|
8 |
+
|
9 |
+
from os import PathLike
|
10 |
+
from typing import Any
|
11 |
+
|
12 |
+
HEADER_SIZE = 4 # bytes
|
13 |
+
|
14 |
+
|
15 |
+
def object_to_image(obj: Any) -> Image.Image:
|
16 |
+
"""Take a object and convert it to an image.
|
17 |
+
|
18 |
+
The object is first pickled to bytes, then the array is padded and reshaped
|
19 |
+
into a NxNx4 array and converted to an RGBA image
|
20 |
+
|
21 |
+
Example of 1d pickle array to image array:
|
22 |
+
[1,2,3,4,5,6,7] ->
|
23 |
+
[
|
24 |
+
[[0,0,0,7], [1,2,3,4]],
|
25 |
+
[[5,6,7,0], [0,0,0,0]]
|
26 |
+
]
|
27 |
+
The array begins with a 4 byte header representing the length of the data.
|
28 |
+
Zeroes are added to the end the ensure that the number pixels is a suare number
|
29 |
+
"""
|
30 |
+
data = compress(pickle.dumps(obj))
|
31 |
+
header = len(data).to_bytes(length=HEADER_SIZE, byteorder='big')
|
32 |
+
data = header + data
|
33 |
+
|
34 |
+
# divide the data into pixels, add an extra if data doesn't perfectly fit
|
35 |
+
whole_pixels, remainder = divmod(len(data), 4)
|
36 |
+
n_pixels = whole_pixels + 1 * (remainder != 0)
|
37 |
+
|
38 |
+
# ensure n_pixels is a square number
|
39 |
+
side_length = math.ceil(math.sqrt(n_pixels))
|
40 |
+
n_pixels = side_length ** 2
|
41 |
+
n_bytes = n_pixels * 4
|
42 |
+
|
43 |
+
# right pad the data with zeros so it can be shaped to (n,n,4)
|
44 |
+
data += b'\x00' * (n_bytes - len(data))
|
45 |
+
|
46 |
+
# create (n,n,4) array from pickle data
|
47 |
+
data_arr = np.frombuffer(data, dtype=np.uint8)
|
48 |
+
img_arr = np.reshape(data_arr, (side_length, side_length, 4))
|
49 |
+
|
50 |
+
return Image.fromarray(img_arr)
|
51 |
+
|
52 |
+
|
53 |
+
def image_to_object(image: Image.Image) -> Any:
|
54 |
+
"""Take a PIL Image and unpickle it's data to an object
|
55 |
+
|
56 |
+
Convert the image to an array, flatten to obtain serial bytes, then unpickle
|
57 |
+
these bytes.
|
58 |
+
"""
|
59 |
+
data_arr = np.array(image).flatten()
|
60 |
+
data = data_arr.tobytes()
|
61 |
+
|
62 |
+
# number of bytes containing meaningful data
|
63 |
+
length = int.from_bytes(data[:HEADER_SIZE], 'big')
|
64 |
+
|
65 |
+
# slice off header
|
66 |
+
data = data[HEADER_SIZE:]
|
67 |
+
# slice off zero padding if any
|
68 |
+
data = data[:length]
|
69 |
+
|
70 |
+
return pickle.loads(decompress(data))
|
71 |
+
|
72 |
+
|
73 |
+
def image_to_b64_string(img: Image.Image) -> str:
|
74 |
+
"""Return a str representing the image as b64 encoded bytes"""
|
75 |
+
# save the image as PNG to a buffer
|
76 |
+
buffer = io.BytesIO()
|
77 |
+
img.save(buffer, 'png')
|
78 |
+
buffer.seek(0)
|
79 |
+
|
80 |
+
return base64.b64encode(buffer.read()).decode()
|
81 |
+
|
82 |
+
|
83 |
+
def bytes_to_image(b: bytes) -> Image.Image:
|
84 |
+
"""Create an Image using raw bytes"""
|
85 |
+
buffer = io.BytesIO(b)
|
86 |
+
buffer.seek(0)
|
87 |
+
|
88 |
+
return Image.open(buffer)
|
89 |
+
|
90 |
+
|
91 |
+
def compress(b: bytes, level=9) -> bytes:
|
92 |
+
"""Compress the bytes using zlib"""
|
93 |
+
return zlib.compress(b, level)
|
94 |
+
|
95 |
+
|
96 |
+
def decompress(b: bytes) -> bytes:
|
97 |
+
"""Decompress the bytes using zlib"""
|
98 |
+
return zlib.decompress(b)
|
99 |
+
|
100 |
+
|
101 |
+
def write_compressed(data: bytes, fp: PathLike) -> None:
|
102 |
+
with open(fp, 'wb') as f:
|
103 |
+
f.write(compress(data))
|
104 |
+
|
105 |
+
|
106 |
+
def read_compressed(fp: PathLike) -> bytes:
|
107 |
+
with open(fp, 'rb') as f:
|
108 |
+
return decompress(f.read())
|
db/audio_db/is3/wrapper.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
"""This module proivdes wrapper functionality for the imgur API"""
|
3 |
+
|
4 |
+
import dotenv, os
|
5 |
+
from PIL import Image
|
6 |
+
import asyncio
|
7 |
+
from aiohttp import ClientSession
|
8 |
+
|
9 |
+
from typing import Optional, Union, Tuple
|
10 |
+
|
11 |
+
from .utils import image_to_b64_string, bytes_to_image
|
12 |
+
|
13 |
+
dotenv.load_dotenv()
|
14 |
+
AUTH_HEADER = {'Authorization': f"Client-ID {os.getenv('IS3_CLIENT_ID')}"}
|
15 |
+
API_ENDPOINTS = {
|
16 |
+
'upload': 'https://api.imgur.com/3/upload/',
|
17 |
+
'download': 'http://i.imgur.com/',
|
18 |
+
'info': 'https://api.imgur.com/3/image/',
|
19 |
+
'delete': 'https://api.imgur.com/3/image/',
|
20 |
+
'auth': f'https://api.imgur.com/oauth2/token'
|
21 |
+
}
|
22 |
+
|
23 |
+
# get access and refresh token
|
24 |
+
async def get_tokens():
|
25 |
+
session = ClientSession()
|
26 |
+
r = await session.request(
|
27 |
+
method='post',
|
28 |
+
url=API_ENDPOINTS['auth'],
|
29 |
+
headers=AUTH_HEADER,
|
30 |
+
data={
|
31 |
+
'refresh_token': os.getenv("IS3_REFRESH_TOKEN"),
|
32 |
+
'client_id': os.getenv("IS3_CLIENT_ID"),
|
33 |
+
'client_secret': os.getenv("IS3_CLIENT_SECRET"),
|
34 |
+
'grant_type': 'refresh_token',
|
35 |
+
}
|
36 |
+
)
|
37 |
+
r = await r.json()
|
38 |
+
return r['access_token'], r['refresh_token']
|
39 |
+
|
40 |
+
ACCESS_TOKEN, REFRESH_TOKEN = asyncio.run(get_tokens())
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
class ImgurClient:
|
45 |
+
"""Class to interact with various API endpoints"""
|
46 |
+
def __init__(self, session: Optional[ClientSession] = None) -> None:
|
47 |
+
self._session = session or ClientSession()
|
48 |
+
|
49 |
+
async def __aenter__(self):
|
50 |
+
return self
|
51 |
+
|
52 |
+
async def __aexit__(self, *err):
|
53 |
+
await self._session.close()
|
54 |
+
|
55 |
+
async def _request(self, method: str, url: str, *args, **kwargs) -> Union[dict, bytes]:
|
56 |
+
"""Make a request with the specified method to the endpoint. All requests
|
57 |
+
should either return raw image data as bytes or other data as JSON"""
|
58 |
+
async with self._session.request(method, url, *args, **kwargs) as resp:
|
59 |
+
content_type = resp.content_type
|
60 |
+
if content_type == 'image/png':
|
61 |
+
return await resp.read()
|
62 |
+
elif content_type == 'application/json':
|
63 |
+
return (await resp.json())['data']
|
64 |
+
else:
|
65 |
+
raise RuntimeError(f'Unexpected response content-type "{content_type}"')
|
66 |
+
|
67 |
+
async def upload_image(self, img: Image.Image) -> Tuple[str, str]:
|
68 |
+
"""Upload an image and return img id and deletehash"""
|
69 |
+
data = image_to_b64_string(img)
|
70 |
+
r = await self._request(
|
71 |
+
method='post',
|
72 |
+
url=API_ENDPOINTS['upload'],
|
73 |
+
headers=AUTH_HEADER,
|
74 |
+
data={'image': data, 'type': 'base64'}
|
75 |
+
)
|
76 |
+
return r['id'], r['deletehash']
|
77 |
+
|
78 |
+
async def download_image(self, image_id: str) -> Image.Image:
|
79 |
+
"""Download the image and return the data as bytes."""
|
80 |
+
url = API_ENDPOINTS['download'] + image_id + '.png'
|
81 |
+
data = await self._request('get', url)
|
82 |
+
|
83 |
+
return bytes_to_image(data)
|
84 |
+
|
85 |
+
async def delete_image(self, deletehash: str) -> None:
|
86 |
+
"""Delete an image using a deletehash string"""
|
87 |
+
url = API_ENDPOINTS['delete'] + deletehash
|
88 |
+
await self._request('delete', url, headers=AUTH_HEADER)
|
89 |
+
|
90 |
+
|
91 |
+
async def get_token():
|
92 |
+
im = ImgurClient()
|
93 |
+
await im.get_access_token()
|
94 |
+
|
95 |
+
import asyncio
|
96 |
+
loop = asyncio.get_event_loop()
|
97 |
+
loop.run_until_complete(get_token())
|
db/create_db.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pymilvus import (
|
3 |
+
utility,
|
4 |
+
FieldSchema,
|
5 |
+
CollectionSchema,
|
6 |
+
DataType,
|
7 |
+
Collection,
|
8 |
+
)
|
9 |
+
|
10 |
+
from db_connect import connect
|
11 |
+
|
12 |
+
connect()
|
13 |
+
|
14 |
+
#region creating collections
|
15 |
+
### Create collections ###
|
16 |
+
fields = [
|
17 |
+
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
|
18 |
+
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=65535),
|
19 |
+
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=384)
|
20 |
+
]
|
21 |
+
schema = CollectionSchema(fields, "Texts to generate audio for. "
|
22 |
+
"This collection cache the texts needed to generate audio. "
|
23 |
+
"We can then do offline generation for the audio file.")
|
24 |
+
utility.drop_collection("Response")
|
25 |
+
response_collection = Collection("Response", schema)
|
26 |
+
index_params = {
|
27 |
+
"metric_type": "COSINE",
|
28 |
+
"index_type": "IVF_FLAT",
|
29 |
+
"params": {"nlist": 1024}
|
30 |
+
}
|
31 |
+
response_collection.create_index(field_name='embeddings', index_params=index_params)
|
32 |
+
utility.index_building_progress("Response")
|
33 |
+
|
34 |
+
|
35 |
+
fields = [
|
36 |
+
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
|
37 |
+
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=65535),
|
38 |
+
FieldSchema(name="filename", dtype=DataType.VARCHAR, max_length=65535),
|
39 |
+
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=384)
|
40 |
+
]
|
41 |
+
audio_schema = CollectionSchema(fields, "The text that corresponds to the audio file.")
|
42 |
+
utility.drop_collection("AudioResponse")
|
43 |
+
audio_response_collection = Collection("AudioResponse", audio_schema)
|
44 |
+
audio_response_collection = Collection("AudioResponse")
|
45 |
+
index_params = {
|
46 |
+
"metric_type": "COSINE",
|
47 |
+
"index_type": "IVF_FLAT",
|
48 |
+
"params": {"nlist": 1024}
|
49 |
+
}
|
50 |
+
audio_response_collection.create_index(field_name='embeddings', index_params=index_params)
|
51 |
+
utility.index_building_progress("AudioResponse")
|
52 |
+
#endregion
|
db/db_connect.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from pymilvus import connections
|
3 |
+
|
4 |
+
|
5 |
+
def connect():
|
6 |
+
# Initialize a MilvusClient instance
|
7 |
+
# Replace uri and API key with your own
|
8 |
+
connections.connect(
|
9 |
+
alias=os.getenv('MILVUS_DEFAULT'),
|
10 |
+
uri=os.getenv("MILVUS_CLUSTER_ENDPOINT"), # Cluster endpoint obtained from the console
|
11 |
+
token=os.getenv("MILVUS_TOKEN") # API key or a colon-separated cluster username and password
|
12 |
+
)
|
13 |
+
|
14 |
+
|
15 |
+
def disconnect():
|
16 |
+
connections.disconnect("MILVUS_DEFAULT")
|
db/generate_audio.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
from pymilvus import Collection
|
3 |
+
import asyncio
|
4 |
+
|
5 |
+
from db_connect import connect
|
6 |
+
|
7 |
+
# for audio storage
|
8 |
+
from audio_db.is3 import is3
|
9 |
+
|
10 |
+
connect()
|
11 |
+
|
12 |
+
async def generate_audio():
|
13 |
+
response = Collection("Response")
|
14 |
+
audio_response = Collection("AudioResponse")
|
15 |
+
|
16 |
+
data = []
|
17 |
+
|
18 |
+
response_iterator = response.query_iterator(limit=1, output_fields=['text', 'embeddings'])
|
19 |
+
while True:
|
20 |
+
res = response_iterator.next()
|
21 |
+
if len(res) == 0:
|
22 |
+
print("query iteration finished, close")
|
23 |
+
# close the iterator
|
24 |
+
response_iterator.close()
|
25 |
+
break
|
26 |
+
|
27 |
+
bucket = is3.Bucket(str(res[0]['id']))
|
28 |
+
|
29 |
+
# generate audio
|
30 |
+
audio_bytes = open('445766006129375465.wav', 'rb').read()
|
31 |
+
|
32 |
+
# store the audio
|
33 |
+
bucket.stage_obj(audio_bytes, 'audio')
|
34 |
+
await bucket.commit()
|
35 |
+
|
36 |
+
# save the audio record to AudioResponse
|
37 |
+
data.append([res[0]['text'], str(res[0]['id']), res[0]['embeddings']])
|
38 |
+
|
39 |
+
audio_response.insert(list(zip(*data)))
|
40 |
+
audio_response.flush()
|
41 |
+
|
42 |
+
|
43 |
+
if __name__ == '__main__':
|
44 |
+
loop = asyncio.get_event_loop()
|
45 |
+
loop.run_until_complete(generate_audio())
|
db/load_db.py
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
# Connect using a MilvusClient object
|
3 |
+
from pymilvus import Collection
|
4 |
+
|
5 |
+
from db_connect import connect
|
6 |
+
|
7 |
+
#
|
8 |
+
with open('responses.txt', 'r') as file:
|
9 |
+
content = file.read()
|
10 |
+
contents = content.split('\n\n')
|
11 |
+
|
12 |
+
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
13 |
+
embeddings = model.encode(contents)
|
14 |
+
|
15 |
+
data = [
|
16 |
+
contents,
|
17 |
+
embeddings
|
18 |
+
]
|
19 |
+
|
20 |
+
connect()
|
21 |
+
|
22 |
+
collection = Collection("Response")
|
23 |
+
collection.insert(data)
|
24 |
+
collection.flush()
|
25 |
+
|
26 |
+
|
27 |
+
|
db/responses.txt
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Our check-in time is at 3 PM, and check-out is at 11 AM. Let us know if you need any adjustments.
|
2 |
+
|
3 |
+
Yes, we offer complimentary Wi-Fi throughout the hotel for all our guests.
|
4 |
+
|
5 |
+
Currently, we offer a complimentary continental breakfast from 7 AM to 10 AM in the lobby area.
|
6 |
+
|
7 |
+
Yes, we provide free parking for our guests. There's no need for reservations for parking spaces.
|
8 |
+
|
9 |
+
Absolutely! Red Roof Inn is pet-friendly, and your furry friends can stay for free. Please be aware of our pet policy for any size or breed restrictions.
|
10 |
+
|
11 |
+
We have a fitness center open from 6 AM to 10 PM. However, we do not have a pool at this location.
|
12 |
+
|
13 |
+
Early check-in and late check-out requests are subject to availability. Please contact the front desk to arrange it.
|
14 |
+
|
15 |
+
You can book directly through our website, call us, or use any major hotel booking platform. We're happy to assist with your reservation!
|
16 |
+
|
17 |
+
Yes, we have various discounts including for AAA members, seniors, government employees, and military personnel. Please inquire for more details.
|
18 |
+
|
19 |
+
Certainly! There are several dining options and shops within a short distance. Our front desk can provide recommendations and directions.
|
20 |
+
|
21 |
+
Good afternoon! Welcome to our hotel. Our Wi-Fi is complimentary and available throughout the hotel. For breakfast, we offer a free continental breakfast in the lobby from 7 AM to 10 AM.
|
22 |
+
|
23 |
+
Yes, we provide free parking for our guests. There's no need to reserve a space; you can park anywhere available.
|
24 |
+
|
25 |
+
Absolutely! We're a pet-friendly hotel, so your furry friends are welcome to stay for free. There are some size and breed restrictions, so please let us know if you have any specific concerns.
|
26 |
+
|
27 |
+
Of course! There are several excellent restaurants within a short walking distance. I can provide you with a list and directions if you like.
|
28 |
+
|
29 |
+
You're welcome! If you have any more questions or need assistance during your stay, please don't hesitate to ask. Enjoy your stay!
|
30 |
+
|
31 |
+
Thank you for bringing this to our attention. We'll investigate and take the necessary action immediately.
|
32 |
+
|
33 |
+
I apologize for the delay. I'll check with our staff and ensure your luggage is delivered promptly.
|
34 |
+
|
35 |
+
We're sorry for the inconvenience. It's being repaired, and we're happy to assist you with the stairs or alternative routes.
|
36 |
+
|
37 |
+
I understand. Let's find you a quieter room further from the elevator.
|
38 |
+
|
39 |
+
I apologize for that oversight. We'll have it stocked immediately.
|
40 |
+
|
41 |
+
We're sorry for the discomfort. We'll have someone look at the thermostat or offer a different room.
|
42 |
+
|
43 |
+
Thank you for notifying us. We'll make sure it's cleaned right away for your safety and comfort.
|
44 |
+
|
45 |
+
We apologize for the inconvenience. We'll work on getting them repaired or replaced as soon as possible.
|
46 |
+
|
47 |
+
Let's review your bill together. We'll correct any discrepancies immediately.
|
48 |
+
|
49 |
+
That's unacceptable, and I apologize. We'll resolve this issue at once or offer you a different room.
|
50 |
+
|
51 |
+
We're sorry to hear that. We'll send someone to fix it or offer you a room with proper curtains.
|
52 |
+
|
53 |
+
Thank you for your patience while I look into this.
|
54 |
+
|
55 |
+
Just a brief moment, please.
|
56 |
+
|
57 |
+
I'll be right with you, please hold on.
|
58 |
+
|
59 |
+
Let me check that for you, please stay on the line.
|
60 |
+
|
61 |
+
I'll be back with you shortly, please bear with me.
|
62 |
+
|
63 |
+
Please allow me a moment to address your request.
|
64 |
+
|
65 |
+
I'll need a moment to find the answer, please hold tight.
|
66 |
+
|
67 |
+
I'll work on it, please hold the line.
|
68 |
+
|
69 |
+
I'll look into that right now, please stay on the call.
|
70 |
+
|
71 |
+
Just a quick pause while I verify the details.
|
72 |
+
|
73 |
+
I'll find the information, please wait.
|
74 |
+
|
75 |
+
I'm investigating that for you, please give me a second.
|
76 |
+
|
77 |
+
I'm going to check, please hold on for a moment.
|
78 |
+
|
79 |
+
I'll be with you shortly, thank you for your patience.
|
80 |
+
|
81 |
+
I'm actively working on your request, please stay on hold.
|
82 |
+
|
83 |
+
Please allow me a moment to research and provide an answer.
|
84 |
+
|
85 |
+
I'm gathering the necessary details, please bear with me.
|
86 |
+
|
87 |
+
I'll get back to you shortly, please remain on the line.
|
requirements.txt
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
sentence_transformers==2.2.2
|
2 |
fastapi==0.104.1
|
3 |
uvicorn==0.24.0.post1
|
4 |
-
python-multipart==0.0.6
|
|
|
|
|
|
1 |
sentence_transformers==2.2.2
|
2 |
fastapi==0.104.1
|
3 |
uvicorn==0.24.0.post1
|
4 |
+
python-multipart==0.0.6
|
5 |
+
faiss-cpu==1.7.4
|
6 |
+
pymilvus==2.3.3
|
responses.txt
DELETED
@@ -1,29 +0,0 @@
|
|
1 |
-
"Our check-in time is at 3 PM, and check-out is at 11 AM. Let us know if you need any adjustments."
|
2 |
-
|
3 |
-
"Yes, we offer complimentary Wi-Fi throughout the hotel for all our guests."
|
4 |
-
|
5 |
-
"Currently, we offer a complimentary continental breakfast from 7 AM to 10 AM in the lobby area."
|
6 |
-
|
7 |
-
"Yes, we provide free parking for our guests. There's no need for reservations for parking spaces."
|
8 |
-
|
9 |
-
"Absolutely! Red Roof Inn is pet-friendly, and your furry friends can stay for free. Please be aware of our pet policy for any size or breed restrictions."
|
10 |
-
|
11 |
-
"We have a fitness center open from 6 AM to 10 PM. However, we do not have a pool at this location."
|
12 |
-
|
13 |
-
"Early check-in and late check-out requests are subject to availability. Please contact the front desk to arrange it."
|
14 |
-
|
15 |
-
"You can book directly through our website, call us, or use any major hotel booking platform. We're happy to assist with your reservation!"
|
16 |
-
|
17 |
-
"Yes, we have various discounts including for AAA members, seniors, government employees, and military personnel. Please inquire for more details."
|
18 |
-
|
19 |
-
"Certainly! There are several dining options and shops within a short distance. Our front desk can provide recommendations and directions."
|
20 |
-
|
21 |
-
"Good afternoon! Welcome to our hotel. Our Wi-Fi is complimentary and available throughout the hotel. For breakfast, we offer a free continental breakfast in the lobby from 7 AM to 10 AM."
|
22 |
-
|
23 |
-
"Yes, we provide free parking for our guests. There's no need to reserve a space; you can park anywhere available."
|
24 |
-
|
25 |
-
"Absolutely! We're a pet-friendly hotel, so your furry friends are welcome to stay for free. There are some size and breed restrictions, so please let us know if you have any specific concerns."
|
26 |
-
|
27 |
-
"Of course! There are several excellent restaurants within a short walking distance. I can provide you with a list and directions if you like."
|
28 |
-
|
29 |
-
"You're welcome! If you have any more questions or need assistance during your stay, please don't hesitate to ask. Enjoy your stay!"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|