drakosfire commited on
Commit
45ce081
1 Parent(s): 36649b8

versioned llama.cpp, sorted pathing, updated dockerfile

Browse files
.dockerignore CHANGED
@@ -1,3 +1,2 @@
1
-
2
- MerchantBotCLI
3
  output
 
1
+ .get
 
2
  output
.gitignore CHANGED
@@ -7,4 +7,4 @@ models/stable-diffusion/Loras/EnvyMimicXL01.safetensors
7
  models/stable-diffusion/Loras/add-detail-xl.safetensors
8
  models/stable-diffusion/Loras/blank-card-template-5.safetensors
9
  models/stable-diffusion/card-generator-v1.safetensors
10
-
 
7
  models/stable-diffusion/Loras/add-detail-xl.safetensors
8
  models/stable-diffusion/Loras/blank-card-template-5.safetensors
9
  models/stable-diffusion/card-generator-v1.safetensors
10
+ cuda_12.4.0_550.54.14_linux.run
Dockerfile CHANGED
@@ -1,12 +1,47 @@
1
  # Stage 1: Build Cuda toolkit
2
- FROM drakosfire/cuda-base:latest as base-layer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  # Llama.cpp requires the ENV variable be set to signal the CUDA build and be built with the CMAKE variables from pip for python use
5
  ENV LLAMA_CUBLAS=1
6
- RUN apt-get update && \
7
- apt-get install -y python3 python3-pip python3-venv && \
8
  pip install gradio && \
9
- CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python && \
10
  pip install pillow && \
11
  pip install diffusers && \
12
  pip install accelerate && \
@@ -14,23 +49,29 @@ RUN apt-get update && \
14
  pip install peft && \
15
  pip install pip install PyGithub
16
 
17
- FROM base-layer as final-layer
18
 
19
- RUN useradd -m -u 1000 user
20
-
21
- # Set environment variables for copied builds of cuda and flash-attn in /venv
 
22
 
23
  ENV PATH=/usr/local/cuda-12.4/bin:/venv/bin:${PATH}
24
  ENV LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:${LD_LIBRARY_PATH}
25
-
26
  ENV VIRTUAL_ENV=/venv
27
- RUN python3 -m venv $VIRTUAL_ENV
28
- ENV PATH="$VIRTUAL_ENV/bin:$PATH"
29
 
30
- # Copy local files to working directory and activate user
31
- COPY . /home/user/app/
 
 
 
 
 
32
  WORKDIR /home/user/app
33
-
 
 
 
34
 
35
  USER user
36
 
 
1
  # Stage 1: Build Cuda toolkit
2
+ FROM ubuntu:22.04 as cuda-setup
3
+
4
+
5
+ ARG DEBIAN_FRONTEND=noninteractive
6
+
7
+ # Install necessary libraries including libxml2
8
+ RUN apt-get update && \
9
+ apt-get install -y gcc libxml2 && \
10
+ apt-get clean && \
11
+ rm -rf /var/lib/apt/lists/*
12
+
13
+ COPY cuda_12.4.0_550.54.14_linux.run .
14
+
15
+ # Install wget, download cuda-toolkit and run
16
+ RUN chmod +x cuda_12.4.0_550.54.14_linux.run && \
17
+ ./cuda_12.4.0_550.54.14_linux.run --silent --toolkit --override
18
+
19
+ # Second Stage: Copy necessary CUDA directories install flash-attn
20
+ FROM ubuntu:22.04 as base-layer
21
+
22
+ # Copy the CUDA toolkit from the first stage
23
+ COPY --from=cuda-setup /usr/local/cuda-12.4 /usr/local/cuda-12.4
24
+
25
+ # Set environment variables to enable CUDA commands
26
+ ENV PATH=/usr/local/cuda-12.4/bin:${PATH}
27
+ ENV LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:${LD_LIBRARY_PATH}
28
+
29
+ # Install Python, pip, and virtualenv
30
+ RUN apt-get update && \
31
+ apt-get install -y python3 python3-pip python3-venv git && \
32
+ apt-get clean && \
33
+ rm -rf /var/lib/apt/lists/*
34
+
35
+ # Create a virtual environment and install dependencies
36
+ RUN python3 -m venv /venv
37
+ ENV PATH="/venv/bin:$PATH"
38
 
39
  # Llama.cpp requires the ENV variable be set to signal the CUDA build and be built with the CMAKE variables from pip for python use
40
  ENV LLAMA_CUBLAS=1
41
+ RUN pip install --no-cache-dir torch packaging wheel && \
42
+ pip install flash-attn && \
43
  pip install gradio && \
44
+ CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama_cpp_python==0.2.55 && \
45
  pip install pillow && \
46
  pip install diffusers && \
47
  pip install accelerate && \
 
49
  pip install peft && \
50
  pip install pip install PyGithub
51
 
 
52
 
53
+ FROM ubuntu:22.04 as final-layer
54
+
55
+ COPY --from=base-layer /usr/local/cuda-12.4 /usr/local/cuda-12.4
56
+ COPY --from=base-layer /venv /venv
57
 
58
  ENV PATH=/usr/local/cuda-12.4/bin:/venv/bin:${PATH}
59
  ENV LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64:${LD_LIBRARY_PATH}
60
+ ENV LLAMA_CPP_LIB=/venv/lib/python3.10/site-packages/llama_cpp/libllama.so
61
  ENV VIRTUAL_ENV=/venv
 
 
62
 
63
+ # Install Python and create a user
64
+ RUN apt-get update && apt-get install -y python3 python3-venv && apt-get clean && rm -rf /var/lib/apt/lists/* && \
65
+ useradd -m -u 1000 user
66
+
67
+ ENV PATH="$VIRTUAL_ENV/bin:$PATH"
68
+ # Set working directory and user
69
+ COPY . /home/user/app
70
  WORKDIR /home/user/app
71
+ RUN chown -R user:user /home/user/app/ && \
72
+ mkdir -p /home/user/app/output && \
73
+ chown -R user:user /home/user/app/image_temp && \
74
+ chown -R user:user /home/user/app/output
75
 
76
  USER user
77
 
__pycache__/img2img.cpython-310.pyc CHANGED
Binary files a/__pycache__/img2img.cpython-310.pyc and b/__pycache__/img2img.cpython-310.pyc differ
 
__pycache__/item_dict_gen.cpython-310.pyc CHANGED
Binary files a/__pycache__/item_dict_gen.cpython-310.pyc and b/__pycache__/item_dict_gen.cpython-310.pyc differ
 
__pycache__/user_input.cpython-310.pyc CHANGED
Binary files a/__pycache__/user_input.cpython-310.pyc and b/__pycache__/user_input.cpython-310.pyc differ
 
img2img.py CHANGED
@@ -9,11 +9,11 @@ from PIL import Image
9
  pipe = None
10
  start_time = time.time()
11
  torch.backends.cuda.matmul.allow_tf32 = True
12
- model_path = ("/home/user/app/models/stable-diffusion/card-generator-v1.safetensors")
13
- lora_path = "/home/user/app/models/stable-diffusion/Loras/blank-card-template-5.safetensors"
14
- detail_lora_path = "/home/user/app/models/stable-diffusion/Loras/add-detail-xl.safetensors"
15
- mimic_lora_path = "/home/user/app/models/stable-diffusion/Loras/EnvyMimicXL01.safetensors"
16
- temp_image_path = "/home/user/app/image_temp/"
17
  card_pre_prompt = " blank magic card,high resolution, detailed intricate high quality border, textbox, high quality detailed magnum opus drawing of a "
18
  negative_prompts = "text, words, numbers, letters"
19
  image_list = []
 
9
  pipe = None
10
  start_time = time.time()
11
  torch.backends.cuda.matmul.allow_tf32 = True
12
+ model_path = ("./models/stable-diffusion/card-generator-v1.safetensors")
13
+ lora_path = "./models/stable-diffusion/Loras/blank-card-template-5.safetensors"
14
+ detail_lora_path = "./models/stable-diffusion/Loras/add-detail-xl.safetensors"
15
+ mimic_lora_path = "./models/stable-diffusion/Loras/EnvyMimicXL01.safetensors"
16
+ temp_image_path = "./image_temp/"
17
  card_pre_prompt = " blank magic card,high resolution, detailed intricate high quality border, textbox, high quality detailed magnum opus drawing of a "
18
  negative_prompts = "text, words, numbers, letters"
19
  image_list = []
item_dict_gen.py CHANGED
@@ -3,18 +3,18 @@ import ast
3
  import gc
4
  import torch
5
 
6
- model_path = "/home/user/app/models/starling-lm-7b-alpha.Q8_0.gguf"
7
 
8
  def load_llm(user_input):
9
  llm = Llama(
10
  model_path=model_path,
11
  n_ctx=8192, # The max sequence length to use - note that longer sequence lengths require much more resources
12
  n_threads=8, # The number of CPU threads to use, tailor to your system and the resulting performance
13
- n_gpu_layers=-1 # The number of layers to offload to GPU, if you have GPU acceleration available
14
- )
15
  return llm(
16
  f"GPT4 User: {prompt_instructions} the item is {user_input}: <|end_of_turn|>GPT4 Assistant:", # Prompt
17
- max_tokens=512, # Generate up to 512 tokens
18
  stop=["</s>"], # Example stop token - not necessarily correct for this specific model! Please check before using.
19
  echo=False # Whether to echo the prompt
20
  )
 
3
  import gc
4
  import torch
5
 
6
+ model_path = "./models/starling-lm-7b-alpha.Q8_0.gguf"
7
 
8
  def load_llm(user_input):
9
  llm = Llama(
10
  model_path=model_path,
11
  n_ctx=8192, # The max sequence length to use - note that longer sequence lengths require much more resources
12
  n_threads=8, # The number of CPU threads to use, tailor to your system and the resulting performance
13
+ n_gpu_layers=32 # The number of layers to offload to GPU, if you have GPU acceleration available
14
+ )
15
  return llm(
16
  f"GPT4 User: {prompt_instructions} the item is {user_input}: <|end_of_turn|>GPT4 Assistant:", # Prompt
17
+ max_tokens=768, # Generate up to 512 tokens
18
  stop=["</s>"], # Example stop token - not necessarily correct for this specific model! Please check before using.
19
  echo=False # Whether to echo the prompt
20
  )
user_input.py CHANGED
@@ -64,6 +64,7 @@ def call_llm(user_input):
64
  response = response
65
 
66
  response = response.replace("GPT4 Assistant: ", "")
 
67
  response = igen.convert_to_dict(response)
68
  if not response:
69
  response = call_llm(user_input)
 
64
  response = response
65
 
66
  response = response.replace("GPT4 Assistant: ", "")
67
+ print(response)
68
  response = igen.convert_to_dict(response)
69
  if not response:
70
  response = call_llm(user_input)