Spaces:
Sleeping
Sleeping
drakosfire
commited on
Commit
•
45ce081
1
Parent(s):
36649b8
versioned llama.cpp, sorted pathing, updated dockerfile
Browse files- .dockerignore +1 -2
- .gitignore +1 -1
- Dockerfile +55 -14
- __pycache__/img2img.cpython-310.pyc +0 -0
- __pycache__/item_dict_gen.cpython-310.pyc +0 -0
- __pycache__/user_input.cpython-310.pyc +0 -0
- img2img.py +5 -5
- item_dict_gen.py +4 -4
- user_input.py +1 -0
.dockerignore
CHANGED
@@ -1,3 +1,2 @@
|
|
1 |
-
|
2 |
-
MerchantBotCLI
|
3 |
output
|
|
|
1 |
+
.get
|
|
|
2 |
output
|
.gitignore
CHANGED
@@ -7,4 +7,4 @@ models/stable-diffusion/Loras/EnvyMimicXL01.safetensors
|
|
7 |
models/stable-diffusion/Loras/add-detail-xl.safetensors
|
8 |
models/stable-diffusion/Loras/blank-card-template-5.safetensors
|
9 |
models/stable-diffusion/card-generator-v1.safetensors
|
10 |
-
|
|
|
7 |
models/stable-diffusion/Loras/add-detail-xl.safetensors
|
8 |
models/stable-diffusion/Loras/blank-card-template-5.safetensors
|
9 |
models/stable-diffusion/card-generator-v1.safetensors
|
10 |
+
cuda_12.4.0_550.54.14_linux.run
|
Dockerfile
CHANGED
@@ -1,12 +1,47 @@
|
|
1 |
# Stage 1: Build Cuda toolkit
|
2 |
-
FROM
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
# Llama.cpp requires the ENV variable be set to signal the CUDA build and be built with the CMAKE variables from pip for python use
|
5 |
ENV LLAMA_CUBLAS=1
|
6 |
-
RUN
|
7 |
-
|
8 |
pip install gradio && \
|
9 |
-
CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install
|
10 |
pip install pillow && \
|
11 |
pip install diffusers && \
|
12 |
pip install accelerate && \
|
@@ -14,23 +49,29 @@ RUN apt-get update && \
|
|
14 |
pip install peft && \
|
15 |
pip install pip install PyGithub
|
16 |
|
17 |
-
FROM base-layer as final-layer
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
22 |
|
23 |
ENV PATH=/usr/local/cuda-12.4/bin:/venv/bin:${PATH}
|
24 |
ENV LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:${LD_LIBRARY_PATH}
|
25 |
-
|
26 |
ENV VIRTUAL_ENV=/venv
|
27 |
-
RUN python3 -m venv $VIRTUAL_ENV
|
28 |
-
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
29 |
|
30 |
-
#
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
32 |
WORKDIR /home/user/app
|
33 |
-
|
|
|
|
|
|
|
34 |
|
35 |
USER user
|
36 |
|
|
|
1 |
# Stage 1: Build Cuda toolkit
|
2 |
+
FROM ubuntu:22.04 as cuda-setup
|
3 |
+
|
4 |
+
|
5 |
+
ARG DEBIAN_FRONTEND=noninteractive
|
6 |
+
|
7 |
+
# Install necessary libraries including libxml2
|
8 |
+
RUN apt-get update && \
|
9 |
+
apt-get install -y gcc libxml2 && \
|
10 |
+
apt-get clean && \
|
11 |
+
rm -rf /var/lib/apt/lists/*
|
12 |
+
|
13 |
+
COPY cuda_12.4.0_550.54.14_linux.run .
|
14 |
+
|
15 |
+
# Install wget, download cuda-toolkit and run
|
16 |
+
RUN chmod +x cuda_12.4.0_550.54.14_linux.run && \
|
17 |
+
./cuda_12.4.0_550.54.14_linux.run --silent --toolkit --override
|
18 |
+
|
19 |
+
# Second Stage: Copy necessary CUDA directories install flash-attn
|
20 |
+
FROM ubuntu:22.04 as base-layer
|
21 |
+
|
22 |
+
# Copy the CUDA toolkit from the first stage
|
23 |
+
COPY --from=cuda-setup /usr/local/cuda-12.4 /usr/local/cuda-12.4
|
24 |
+
|
25 |
+
# Set environment variables to enable CUDA commands
|
26 |
+
ENV PATH=/usr/local/cuda-12.4/bin:${PATH}
|
27 |
+
ENV LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:${LD_LIBRARY_PATH}
|
28 |
+
|
29 |
+
# Install Python, pip, and virtualenv
|
30 |
+
RUN apt-get update && \
|
31 |
+
apt-get install -y python3 python3-pip python3-venv git && \
|
32 |
+
apt-get clean && \
|
33 |
+
rm -rf /var/lib/apt/lists/*
|
34 |
+
|
35 |
+
# Create a virtual environment and install dependencies
|
36 |
+
RUN python3 -m venv /venv
|
37 |
+
ENV PATH="/venv/bin:$PATH"
|
38 |
|
39 |
# Llama.cpp requires the ENV variable be set to signal the CUDA build and be built with the CMAKE variables from pip for python use
|
40 |
ENV LLAMA_CUBLAS=1
|
41 |
+
RUN pip install --no-cache-dir torch packaging wheel && \
|
42 |
+
pip install flash-attn && \
|
43 |
pip install gradio && \
|
44 |
+
CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama_cpp_python==0.2.55 && \
|
45 |
pip install pillow && \
|
46 |
pip install diffusers && \
|
47 |
pip install accelerate && \
|
|
|
49 |
pip install peft && \
|
50 |
pip install pip install PyGithub
|
51 |
|
|
|
52 |
|
53 |
+
FROM ubuntu:22.04 as final-layer
|
54 |
+
|
55 |
+
COPY --from=base-layer /usr/local/cuda-12.4 /usr/local/cuda-12.4
|
56 |
+
COPY --from=base-layer /venv /venv
|
57 |
|
58 |
ENV PATH=/usr/local/cuda-12.4/bin:/venv/bin:${PATH}
|
59 |
ENV LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:${LD_LIBRARY_PATH}
|
60 |
+
ENV LLAMA_CPP_LIB=/venv/lib/python3.10/site-packages/llama_cpp/libllama.so
|
61 |
ENV VIRTUAL_ENV=/venv
|
|
|
|
|
62 |
|
63 |
+
# Install Python and create a user
|
64 |
+
RUN apt-get update && apt-get install -y python3 python3-venv && apt-get clean && rm -rf /var/lib/apt/lists/* && \
|
65 |
+
useradd -m -u 1000 user
|
66 |
+
|
67 |
+
ENV PATH="$VIRTUAL_ENV/bin:$PATH"
|
68 |
+
# Set working directory and user
|
69 |
+
COPY . /home/user/app
|
70 |
WORKDIR /home/user/app
|
71 |
+
RUN chown -R user:user /home/user/app/ && \
|
72 |
+
mkdir -p /home/user/app/output && \
|
73 |
+
chown -R user:user /home/user/app/image_temp && \
|
74 |
+
chown -R user:user /home/user/app/output
|
75 |
|
76 |
USER user
|
77 |
|
__pycache__/img2img.cpython-310.pyc
CHANGED
Binary files a/__pycache__/img2img.cpython-310.pyc and b/__pycache__/img2img.cpython-310.pyc differ
|
|
__pycache__/item_dict_gen.cpython-310.pyc
CHANGED
Binary files a/__pycache__/item_dict_gen.cpython-310.pyc and b/__pycache__/item_dict_gen.cpython-310.pyc differ
|
|
__pycache__/user_input.cpython-310.pyc
CHANGED
Binary files a/__pycache__/user_input.cpython-310.pyc and b/__pycache__/user_input.cpython-310.pyc differ
|
|
img2img.py
CHANGED
@@ -9,11 +9,11 @@ from PIL import Image
|
|
9 |
pipe = None
|
10 |
start_time = time.time()
|
11 |
torch.backends.cuda.matmul.allow_tf32 = True
|
12 |
-
model_path = ("
|
13 |
-
lora_path = "
|
14 |
-
detail_lora_path = "
|
15 |
-
mimic_lora_path = "
|
16 |
-
temp_image_path = "
|
17 |
card_pre_prompt = " blank magic card,high resolution, detailed intricate high quality border, textbox, high quality detailed magnum opus drawing of a "
|
18 |
negative_prompts = "text, words, numbers, letters"
|
19 |
image_list = []
|
|
|
9 |
pipe = None
|
10 |
start_time = time.time()
|
11 |
torch.backends.cuda.matmul.allow_tf32 = True
|
12 |
+
model_path = ("./models/stable-diffusion/card-generator-v1.safetensors")
|
13 |
+
lora_path = "./models/stable-diffusion/Loras/blank-card-template-5.safetensors"
|
14 |
+
detail_lora_path = "./models/stable-diffusion/Loras/add-detail-xl.safetensors"
|
15 |
+
mimic_lora_path = "./models/stable-diffusion/Loras/EnvyMimicXL01.safetensors"
|
16 |
+
temp_image_path = "./image_temp/"
|
17 |
card_pre_prompt = " blank magic card,high resolution, detailed intricate high quality border, textbox, high quality detailed magnum opus drawing of a "
|
18 |
negative_prompts = "text, words, numbers, letters"
|
19 |
image_list = []
|
item_dict_gen.py
CHANGED
@@ -3,18 +3,18 @@ import ast
|
|
3 |
import gc
|
4 |
import torch
|
5 |
|
6 |
-
model_path = "
|
7 |
|
8 |
def load_llm(user_input):
|
9 |
llm = Llama(
|
10 |
model_path=model_path,
|
11 |
n_ctx=8192, # The max sequence length to use - note that longer sequence lengths require much more resources
|
12 |
n_threads=8, # The number of CPU threads to use, tailor to your system and the resulting performance
|
13 |
-
n_gpu_layers
|
14 |
-
)
|
15 |
return llm(
|
16 |
f"GPT4 User: {prompt_instructions} the item is {user_input}: <|end_of_turn|>GPT4 Assistant:", # Prompt
|
17 |
-
max_tokens=
|
18 |
stop=["</s>"], # Example stop token - not necessarily correct for this specific model! Please check before using.
|
19 |
echo=False # Whether to echo the prompt
|
20 |
)
|
|
|
3 |
import gc
|
4 |
import torch
|
5 |
|
6 |
+
model_path = "./models/starling-lm-7b-alpha.Q8_0.gguf"
|
7 |
|
8 |
def load_llm(user_input):
|
9 |
llm = Llama(
|
10 |
model_path=model_path,
|
11 |
n_ctx=8192, # The max sequence length to use - note that longer sequence lengths require much more resources
|
12 |
n_threads=8, # The number of CPU threads to use, tailor to your system and the resulting performance
|
13 |
+
n_gpu_layers=32 # The number of layers to offload to GPU, if you have GPU acceleration available
|
14 |
+
)
|
15 |
return llm(
|
16 |
f"GPT4 User: {prompt_instructions} the item is {user_input}: <|end_of_turn|>GPT4 Assistant:", # Prompt
|
17 |
+
max_tokens=768, # Generate up to 512 tokens
|
18 |
stop=["</s>"], # Example stop token - not necessarily correct for this specific model! Please check before using.
|
19 |
echo=False # Whether to echo the prompt
|
20 |
)
|
user_input.py
CHANGED
@@ -64,6 +64,7 @@ def call_llm(user_input):
|
|
64 |
response = response
|
65 |
|
66 |
response = response.replace("GPT4 Assistant: ", "")
|
|
|
67 |
response = igen.convert_to_dict(response)
|
68 |
if not response:
|
69 |
response = call_llm(user_input)
|
|
|
64 |
response = response
|
65 |
|
66 |
response = response.replace("GPT4 Assistant: ", "")
|
67 |
+
print(response)
|
68 |
response = igen.convert_to_dict(response)
|
69 |
if not response:
|
70 |
response = call_llm(user_input)
|