Spaces:

TheDrakosfire
/

CardGenerator

Sleeping

App Files Files Community

drakosfire commited on Apr 4

Commit

45ce081

•

1 Parent(s): 36649b8

versioned llama.cpp, sorted pathing, updated dockerfile

Browse files

Files changed (9) hide show

.dockerignore +1 -2
.gitignore +1 -1
Dockerfile +55 -14
__pycache__/img2img.cpython-310.pyc +0 -0
__pycache__/item_dict_gen.cpython-310.pyc +0 -0
__pycache__/user_input.cpython-310.pyc +0 -0
img2img.py +5 -5
item_dict_gen.py +4 -4
user_input.py +1 -0

.dockerignore CHANGED Viewed

@@ -1,3 +1,2 @@
-MerchantBotCLI
 output


1	+ .get

2	output

.gitignore CHANGED Viewed

@@ -7,4 +7,4 @@ models/stable-diffusion/Loras/EnvyMimicXL01.safetensors
 models/stable-diffusion/Loras/add-detail-xl.safetensors
 models/stable-diffusion/Loras/blank-card-template-5.safetensors
 models/stable-diffusion/card-generator-v1.safetensors

 models/stable-diffusion/Loras/add-detail-xl.safetensors
 models/stable-diffusion/Loras/blank-card-template-5.safetensors
 models/stable-diffusion/card-generator-v1.safetensors
+cuda_12.4.0_550.54.14_linux.run

Dockerfile CHANGED Viewed

@@ -1,12 +1,47 @@
 # Stage 1: Build Cuda toolkit
-FROM drakosfire/cuda-base:latest as base-layer
 # Llama.cpp requires the ENV variable be set to signal the CUDA build and be built with the CMAKE variables from pip for python use
 ENV LLAMA_CUBLAS=1
-RUN apt-get update && \
-    apt-get install -y python3 python3-pip python3-venv && \
     pip install gradio && \
-    CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python && \
     pip install pillow && \
     pip install diffusers && \
     pip install accelerate && \
@@ -14,23 +49,29 @@ RUN apt-get update && \
     pip install peft && \
     pip install pip install PyGithub
-FROM base-layer as final-layer
-RUN useradd -m -u 1000 user
-# Set environment variables for copied builds of cuda and flash-attn in /venv
 ENV PATH=/usr/local/cuda-12.4/bin:/venv/bin:${PATH}
 ENV LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:${LD_LIBRARY_PATH}
 ENV VIRTUAL_ENV=/venv
-RUN python3 -m venv $VIRTUAL_ENV
-ENV PATH="$VIRTUAL_ENV/bin:$PATH"
-# Copy local files to working directory and activate user
-COPY . /home/user/app/
 WORKDIR /home/user/app
 USER user

 # Stage 1: Build Cuda toolkit
+FROM ubuntu:22.04 as cuda-setup
+ARG DEBIAN_FRONTEND=noninteractive
+# Install necessary libraries including libxml2
+RUN apt-get update && \
+    apt-get install -y gcc libxml2 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+COPY cuda_12.4.0_550.54.14_linux.run .
+# Install wget, download cuda-toolkit and run
+RUN chmod +x cuda_12.4.0_550.54.14_linux.run && \
+    ./cuda_12.4.0_550.54.14_linux.run --silent --toolkit --override
+# Second Stage: Copy necessary CUDA directories install flash-attn
+FROM ubuntu:22.04 as base-layer
+# Copy the CUDA toolkit from the first stage
+COPY --from=cuda-setup /usr/local/cuda-12.4 /usr/local/cuda-12.4
+# Set environment variables to enable CUDA commands
+ENV PATH=/usr/local/cuda-12.4/bin:${PATH}
+ENV LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:${LD_LIBRARY_PATH}
+# Install Python, pip, and virtualenv
+RUN apt-get update && \
+    apt-get install -y python3 python3-pip python3-venv git && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+# Create a virtual environment and install dependencies
+RUN python3 -m venv /venv
+ENV PATH="/venv/bin:$PATH"
 # Llama.cpp requires the ENV variable be set to signal the CUDA build and be built with the CMAKE variables from pip for python use
 ENV LLAMA_CUBLAS=1
+RUN pip install --no-cache-dir torch packaging wheel && \
+    pip install flash-attn && \
     pip install gradio && \
+    CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama_cpp_python==0.2.55 && \
     pip install pillow && \
     pip install diffusers && \
     pip install accelerate && \
     pip install peft && \
     pip install pip install PyGithub
+FROM ubuntu:22.04 as final-layer
+COPY --from=base-layer /usr/local/cuda-12.4 /usr/local/cuda-12.4
+COPY --from=base-layer /venv /venv
 ENV PATH=/usr/local/cuda-12.4/bin:/venv/bin:${PATH}
 ENV LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:${LD_LIBRARY_PATH}
+ENV LLAMA_CPP_LIB=/venv/lib/python3.10/site-packages/llama_cpp/libllama.so
 ENV VIRTUAL_ENV=/venv
+# Install Python and create a user
+RUN apt-get update && apt-get install -y python3 python3-venv && apt-get clean && rm -rf /var/lib/apt/lists/* && \
+    useradd -m -u 1000 user
+ENV PATH="$VIRTUAL_ENV/bin:$PATH"
+# Set working directory and user
+COPY . /home/user/app
 WORKDIR /home/user/app
+RUN chown -R user:user /home/user/app/ && \
+    mkdir -p /home/user/app/output && \
+    chown -R user:user /home/user/app/image_temp && \
+    chown -R user:user /home/user/app/output
 USER user

__pycache__/img2img.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/img2img.cpython-310.pyc and b/__pycache__/img2img.cpython-310.pyc differ

__pycache__/item_dict_gen.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/item_dict_gen.cpython-310.pyc and b/__pycache__/item_dict_gen.cpython-310.pyc differ

__pycache__/user_input.cpython-310.pyc CHANGED Viewed

Binary files a/__pycache__/user_input.cpython-310.pyc and b/__pycache__/user_input.cpython-310.pyc differ

img2img.py CHANGED Viewed

@@ -9,11 +9,11 @@ from PIL import Image
 pipe = None
 start_time = time.time()
 torch.backends.cuda.matmul.allow_tf32 = True
-model_path = ("/home/user/app/models/stable-diffusion/card-generator-v1.safetensors")
-lora_path = "/home/user/app/models/stable-diffusion/Loras/blank-card-template-5.safetensors"
-detail_lora_path = "/home/user/app/models/stable-diffusion/Loras/add-detail-xl.safetensors"
-mimic_lora_path = "/home/user/app/models/stable-diffusion/Loras/EnvyMimicXL01.safetensors"
-temp_image_path = "/home/user/app/image_temp/"
 card_pre_prompt = " blank magic card,high resolution, detailed intricate high quality border, textbox, high quality detailed magnum opus drawing of a "
 negative_prompts = "text, words, numbers, letters"
 image_list = []

 pipe = None
 start_time = time.time()
 torch.backends.cuda.matmul.allow_tf32 = True
+model_path = ("./models/stable-diffusion/card-generator-v1.safetensors")
+lora_path = "./models/stable-diffusion/Loras/blank-card-template-5.safetensors"
+detail_lora_path = "./models/stable-diffusion/Loras/add-detail-xl.safetensors"
+mimic_lora_path = "./models/stable-diffusion/Loras/EnvyMimicXL01.safetensors"
+temp_image_path = "./image_temp/"
 card_pre_prompt = " blank magic card,high resolution, detailed intricate high quality border, textbox, high quality detailed magnum opus drawing of a "
 negative_prompts = "text, words, numbers, letters"
 image_list = []

item_dict_gen.py CHANGED Viewed

@@ -3,18 +3,18 @@ import ast
 import gc
 import torch
-model_path = "/home/user/app/models/starling-lm-7b-alpha.Q8_0.gguf"
 def load_llm(user_input):
   llm = Llama(
   model_path=model_path,
   n_ctx=8192,  # The max sequence length to use - note that longer sequence lengths require much more resources
   n_threads=8,            # The number of CPU threads to use, tailor to your system and the resulting performance
-  n_gpu_layers=-1         # The number of layers to offload to GPU, if you have GPU acceleration available
-)
   return llm(
   f"GPT4 User: {prompt_instructions}  the item is {user_input}: <|end_of_turn|>GPT4 Assistant:", # Prompt
-  max_tokens=512,  # Generate up to 512 tokens
   stop=["</s>"],   # Example stop token - not necessarily correct for this specific model! Please check before using.
   echo=False        # Whether to echo the prompt
   )

 import gc
 import torch
+model_path = "./models/starling-lm-7b-alpha.Q8_0.gguf"
 def load_llm(user_input):
   llm = Llama(
   model_path=model_path,
   n_ctx=8192,  # The max sequence length to use - note that longer sequence lengths require much more resources
   n_threads=8,            # The number of CPU threads to use, tailor to your system and the resulting performance
+  n_gpu_layers=32 # The number of layers to offload to GPU, if you have GPU acceleration available
+  )
   return llm(
   f"GPT4 User: {prompt_instructions}  the item is {user_input}: <|end_of_turn|>GPT4 Assistant:", # Prompt
+  max_tokens=768,  # Generate up to 512 tokens
   stop=["</s>"],   # Example stop token - not necessarily correct for this specific model! Please check before using.
   echo=False        # Whether to echo the prompt
   )

user_input.py CHANGED Viewed

@@ -64,6 +64,7 @@ def call_llm(user_input):
         response = response
     response = response.replace("GPT4 Assistant: ", "")
     response = igen.convert_to_dict(response)
     if not response:
         response = call_llm(user_input)

         response = response
     response = response.replace("GPT4 Assistant: ", "")
+    print(response)
     response = igen.convert_to_dict(response)
     if not response:
         response = call_llm(user_input)