Leri777 commited on
Commit
a800c44
1 Parent(s): 15fd008

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -17
app.py CHANGED
@@ -2,6 +2,8 @@ import os
2
  import json
3
  import subprocess
4
  from threading import Thread
 
 
5
 
6
  import torch
7
  import spaces
@@ -10,21 +12,29 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
10
 
11
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
12
 
 
 
 
 
 
 
 
 
 
 
 
13
  MODEL_ID = "Qwen/Qwen2.5-Coder-7B-Instruct"
14
  CHAT_TEMPLATE = "ChatML"
15
  MODEL_NAME = MODEL_ID.split("/")[-1]
16
  CONTEXT_LENGTH = 16000
17
 
18
- # Estableciendo valores directamente para las variables
19
- COLOR = "blue" # Color predeterminado de la interfaz
20
- EMOJI = "🤖" # Emoji predeterminado para el modelo
21
- DESCRIPTION = f"This is the {MODEL_NAME} model designed for coding assistance and general AI tasks." # Descripción predeterminada
22
-
23
-
24
 
25
  @spaces.GPU()
26
  def predict(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
27
- # Format history with a given chat template
28
  if CHAT_TEMPLATE == "Auto":
29
  stop_tokens = [tokenizer.eos_token_id]
30
  instruction = system_prompt + "\n\n"
@@ -69,14 +79,17 @@ def predict(message, history, system_prompt, temperature, max_new_tokens, top_k,
69
  t = Thread(target=model.generate, kwargs=generate_kwargs)
70
  t.start()
71
  outputs = []
72
- for new_token in streamer:
73
- outputs.append(new_token)
74
- if new_token in stop_tokens:
75
- break
76
- yield "".join(outputs)
 
 
 
 
 
77
 
78
-
79
- # Load model
80
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
81
  quantization_config = BitsAndBytesConfig(
82
  load_in_4bit=True,
@@ -90,12 +103,13 @@ model = AutoModelForCausalLM.from_pretrained(
90
  attn_implementation="flash_attention_2",
91
  )
92
 
93
- # Create Gradio interface
 
94
  gr.ChatInterface(
95
  predict,
96
  title=EMOJI + " " + MODEL_NAME,
97
  description=DESCRIPTION,
98
- examples=[
99
  ["Can you solve the equation 2x + 3 = 11 for x in Python?"],
100
  ["Write a Java program that checks if a number is even or odd."],
101
  ["How can I reverse a string in JavaScript?"],
@@ -117,4 +131,6 @@ examples=[
117
  gr.Slider(0, 1, 0.95, label="Top P sampling"),
118
  ],
119
  theme=gr.themes.Soft(primary_hue=COLOR),
120
- ).queue().launch()
 
 
 
2
  import json
3
  import subprocess
4
  from threading import Thread
5
+ import logging
6
+ from logging.handlers import RotatingFileHandler
7
 
8
  import torch
9
  import spaces
 
12
 
13
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
14
 
15
+ log_file = '/tmp/app_debug.log'
16
+ logger = logging.getLogger(__name__)
17
+ logger.setLevel(logging.DEBUG)
18
+ file_handler = RotatingFileHandler(log_file, maxBytes=10*1024*1024, backupCount=5)
19
+ file_handler.setLevel(logging.DEBUG)
20
+ formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
21
+ file_handler.setFormatter(formatter)
22
+ logger.addHandler(file_handler)
23
+
24
+ logger.debug("Application started")
25
+
26
  MODEL_ID = "Qwen/Qwen2.5-Coder-7B-Instruct"
27
  CHAT_TEMPLATE = "ChatML"
28
  MODEL_NAME = MODEL_ID.split("/")[-1]
29
  CONTEXT_LENGTH = 16000
30
 
31
+ COLOR = "blue"
32
+ EMOJI = "🤖"
33
+ DESCRIPTION = f"This is the {MODEL_NAME} model designed for coding assistance and general AI tasks."
 
 
 
34
 
35
  @spaces.GPU()
36
  def predict(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
37
+ logger.debug(f"Received prediction request: message='{message}', system_prompt='{system_prompt}'")
38
  if CHAT_TEMPLATE == "Auto":
39
  stop_tokens = [tokenizer.eos_token_id]
40
  instruction = system_prompt + "\n\n"
 
79
  t = Thread(target=model.generate, kwargs=generate_kwargs)
80
  t.start()
81
  outputs = []
82
+ try:
83
+ for new_token in streamer:
84
+ outputs.append(new_token)
85
+ if new_token in stop_tokens:
86
+ break
87
+ yield "".join(outputs)
88
+ logger.debug(f"Prediction completed successfully for message: '{message}'")
89
+ except Exception as e:
90
+ logger.exception(f"Error during prediction for message '{message}': {str(e)}")
91
+ yield "An error occurred during processing."
92
 
 
 
93
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
94
  quantization_config = BitsAndBytesConfig(
95
  load_in_4bit=True,
 
103
  attn_implementation="flash_attention_2",
104
  )
105
 
106
+ logger.debug("Model and tokenizer loaded successfully")
107
+
108
  gr.ChatInterface(
109
  predict,
110
  title=EMOJI + " " + MODEL_NAME,
111
  description=DESCRIPTION,
112
+ examples=[
113
  ["Can you solve the equation 2x + 3 = 11 for x in Python?"],
114
  ["Write a Java program that checks if a number is even or odd."],
115
  ["How can I reverse a string in JavaScript?"],
 
131
  gr.Slider(0, 1, 0.95, label="Top P sampling"),
132
  ],
133
  theme=gr.themes.Soft(primary_hue=COLOR),
134
+ ).queue().launch()
135
+
136
+ logger.debug("Chat interface initialized and launched")