Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -1,65 +1,8 @@
|
|
1 |
model = "gemma2:27b"
|
2 |
-
|
3 |
-
import os
|
4 |
-
|
5 |
-
os.system("
|
6 |
-
|
7 |
-
import nest_asyncio
|
8 |
-
nest_asyncio.apply()
|
9 |
-
|
10 |
-
import asyncio
|
11 |
-
|
12 |
-
# Run Async Ollama
|
13 |
-
# Taken from: https://stackoverflow.com/questions/77697302/how-to-run-ollama-in-google-colab
|
14 |
-
# NB: You may need to set these depending and get cuda working depending which backend you are running.
|
15 |
-
# Set environment variable for NVIDIA library
|
16 |
-
# Set environment variables for CUDA
|
17 |
-
os.environ['PATH'] += ':/usr/local/cuda/bin'
|
18 |
-
# Set LD_LIBRARY_PATH to include both /usr/lib64-nvidia and CUDA lib directories
|
19 |
-
os.environ['LD_LIBRARY_PATH'] = '/usr/lib64-nvidia:/usr/local/cuda/lib64'
|
20 |
-
|
21 |
-
async def run_process(cmd):
|
22 |
-
print('>>> starting', *cmd)
|
23 |
-
process = await asyncio.create_subprocess_exec(
|
24 |
-
*cmd,
|
25 |
-
stdout=asyncio.subprocess.PIPE,
|
26 |
-
stderr=asyncio.subprocess.PIPE
|
27 |
-
)
|
28 |
-
|
29 |
-
# define an async pipe function
|
30 |
-
async def pipe(lines):
|
31 |
-
async for line in lines:
|
32 |
-
print(line.decode().strip())
|
33 |
-
|
34 |
-
await asyncio.gather(
|
35 |
-
pipe(process.stdout),
|
36 |
-
pipe(process.stderr),
|
37 |
-
)
|
38 |
-
|
39 |
-
# call it
|
40 |
-
await asyncio.gather(pipe(process.stdout), pipe(process.stderr))
|
41 |
-
|
42 |
-
import threading
|
43 |
-
|
44 |
-
async def start_ollama_serve():
|
45 |
-
await run_process(['ollama', 'serve'])
|
46 |
-
|
47 |
-
def run_async_in_thread(loop, coro):
|
48 |
-
asyncio.set_event_loop(loop)
|
49 |
-
loop.run_until_complete(coro)
|
50 |
-
loop.close()
|
51 |
-
|
52 |
-
# Create a new event loop that will run in a new thread
|
53 |
-
new_loop = asyncio.new_event_loop()
|
54 |
-
|
55 |
-
# Start ollama serve in a separate thread so the cell won't block execution
|
56 |
-
thread = threading.Thread(target=run_async_in_thread, args=(new_loop, start_ollama_serve()))
|
57 |
-
thread.start()
|
58 |
-
|
59 |
-
# Load up model
|
60 |
-
|
61 |
-
os.system(f"ollama pull {model}")
|
62 |
-
|
63 |
|
64 |
import copy
|
65 |
import gradio as gr
|
|
|
1 |
model = "gemma2:27b"
|
2 |
+
import ollama
|
3 |
+
import os
|
4 |
+
ollama_path="/usr/local/lib/python3.10/site-packages/ollama"
|
5 |
+
os.system(f"{ollama_path} ollama pull {model}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
import copy
|
8 |
import gradio as gr
|