FantasticGu commited on
Commit
69b0f21
2 Parent(s): b4fe382 8079c8b

Merge branch 'main' of https://huggingface.co/spaces/FantasticGNU/AnomalyGPT

Browse files
.gitattributes CHANGED
@@ -36,3 +36,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
36
  .bin filter=lfs diff=lfs merge=lfs -text
37
  .pt filter=lfs diff=lfs merge=lfs -text
38
  .pth filter=lfs diff=lfs merge=lfs -text
 
 
 
36
  .bin filter=lfs diff=lfs merge=lfs -text
37
  .pt filter=lfs diff=lfs merge=lfs -text
38
  .pth filter=lfs diff=lfs merge=lfs -text
39
+ hazelnut_cut.png filter=lfs diff=lfs merge=lfs -text
40
+ capsule_crack.png filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-sa-4.0
3
+ title: AnomalyGPT
4
+ sdk: gradio
5
+ ---
app.py CHANGED
@@ -14,14 +14,13 @@ args = {
14
  'model': 'openllama_peft',
15
  'imagebind_ckpt_path': './pretrained_ckpt/imagebind_ckpt/imagebind_huge.pth',
16
  'vicuna_ckpt_path': './pretrained_ckpt/vicuna_ckpt/7b_v0',
17
- 'anomalygpt_ckpt_path': './ckpt/train_cn/pytorch_model.pt',
18
  'delta_ckpt_path': './pretrained_ckpt/pandagpt_ckpt/7b/pytorch_model.pt',
19
  'stage': 2,
20
  'max_tgt_len': 128,
21
  'lora_r': 32,
22
  'lora_alpha': 32,
23
- 'lora_dropout': 0.1,
24
- 'layers': [7,15,23,31]
25
  }
26
 
27
  model = OpenLLAMAPEFTModel(**args)
@@ -29,10 +28,9 @@ delta_ckpt = torch.load(args['delta_ckpt_path'], map_location=torch.device('cpu'
29
  model.load_state_dict(delta_ckpt, strict=False)
30
  delta_ckpt = torch.load(args['anomalygpt_ckpt_path'], map_location=torch.device('cpu'))
31
  model.load_state_dict(delta_ckpt, strict=False)
32
- model = model.eval()
33
-
34
-
35
- output = None
36
 
37
  """Override Chatbot.postprocess"""
38
  def postprocess(self, y):
@@ -127,7 +125,7 @@ def predict(
127
  history.append((input, response))
128
 
129
 
130
- plt.imshow(pixel_output.reshape(224,224).detach().cpu(), cmap='binary_r')
131
  plt.axis('off')
132
  plt.savefig('output.png',bbox_inches='tight',pad_inches = 0)
133
 
@@ -156,57 +154,48 @@ def predict(
156
  eroded_image = cv2.erode(image, kernel, iterations=1)
157
  cv2.imwrite('output.png', eroded_image)
158
 
159
- global output
160
  output = PILImage.open('output.png').convert('L')
161
 
162
 
163
- return chatbot, history, modality_cache
164
 
165
 
166
- def get_image():
167
- global output
168
- return output if output else "ffffff.png"
169
-
170
 
171
  def reset_user_input():
172
  return gr.update(value='')
173
 
174
- def reset_dialog():
175
- return [], []
176
 
177
  def reset_state():
178
- global output
179
- output = None
180
- return None, None, [], [], []
181
-
182
 
 
183
 
184
  with gr.Blocks() as demo:
185
  gr.HTML("""<h1 align="center">Demo of AnomalyGPT</h1>""")
186
 
187
  with gr.Row():
188
  with gr.Column(scale=1):
189
- with gr.Row(scale=3):
190
- image_path = gr.Image(type="filepath", label="Query Image", value=None)
191
- with gr.Row(scale=3):
192
- normal_img_path = gr.Image(type="filepath", label="Normal Image", value=None)
193
  with gr.Row():
194
- max_length = gr.Slider(0, 512, value=512, step=1.0, label="Maximum length", interactive=True)
195
  with gr.Row():
196
- top_p = gr.Slider(0, 1, value=0.01, step=0.01, label="Top P", interactive=True)
 
 
197
  with gr.Row():
 
 
198
  temperature = gr.Slider(0, 1, value=1.0, step=0.01, label="Temperature", interactive=True)
199
 
200
 
201
  with gr.Column(scale=3):
202
  with gr.Row():
203
  with gr.Column(scale=6):
204
- chatbot = gr.Chatbot().style(height=415)
205
  with gr.Column(scale=4):
206
  # gr.Image(output)
207
- image_output = gr.Image(value=get_image, label="Localization Output", every=1.0, shape=[224,224])
208
  with gr.Row():
209
- user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(container=False)
210
  with gr.Row():
211
  with gr.Column(scale=2):
212
  submitBtn = gr.Button("Submit", variant="primary")
@@ -230,19 +219,22 @@ with gr.Blocks() as demo:
230
  ], [
231
  chatbot,
232
  history,
233
- modality_cache
 
234
  ],
235
  show_progress=True
236
  )
237
 
238
  submitBtn.click(reset_user_input, [], [user_input])
239
  emptyBtn.click(reset_state, outputs=[
 
240
  image_path,
241
  normal_img_path,
242
  chatbot,
243
  history,
244
- modality_cache
 
245
  ], show_progress=True)
246
 
247
 
248
- demo.queue().launch(server_port=24008)
 
14
  'model': 'openllama_peft',
15
  'imagebind_ckpt_path': './pretrained_ckpt/imagebind_ckpt/imagebind_huge.pth',
16
  'vicuna_ckpt_path': './pretrained_ckpt/vicuna_ckpt/7b_v0',
17
+ 'anomalygpt_ckpt_path': './ckpt/train_supervised/pytorch_model.pt',
18
  'delta_ckpt_path': './pretrained_ckpt/pandagpt_ckpt/7b/pytorch_model.pt',
19
  'stage': 2,
20
  'max_tgt_len': 128,
21
  'lora_r': 32,
22
  'lora_alpha': 32,
23
+ 'lora_dropout': 0.1
 
24
  }
25
 
26
  model = OpenLLAMAPEFTModel(**args)
 
28
  model.load_state_dict(delta_ckpt, strict=False)
29
  delta_ckpt = torch.load(args['anomalygpt_ckpt_path'], map_location=torch.device('cpu'))
30
  model.load_state_dict(delta_ckpt, strict=False)
31
+ model = model.eval().to(torch.bfloat16)#.half()#.cuda()
32
+ # model.image_decoder = model.image_decoder.cuda()
33
+ # model.prompt_learner = model.prompt_learner.cuda()
 
34
 
35
  """Override Chatbot.postprocess"""
36
  def postprocess(self, y):
 
125
  history.append((input, response))
126
 
127
 
128
+ plt.imshow(pixel_output.to(torch.float16).reshape(224,224).detach().cpu(), cmap='binary_r')
129
  plt.axis('off')
130
  plt.savefig('output.png',bbox_inches='tight',pad_inches = 0)
131
 
 
154
  eroded_image = cv2.erode(image, kernel, iterations=1)
155
  cv2.imwrite('output.png', eroded_image)
156
 
 
157
  output = PILImage.open('output.png').convert('L')
158
 
159
 
160
+ return chatbot, history, modality_cache, output
161
 
162
 
 
 
 
 
163
 
164
  def reset_user_input():
165
  return gr.update(value='')
166
 
 
 
167
 
168
  def reset_state():
169
+ return gr.update(value=''), None, None, [], [], [], PILImage.open('ffffff.png')
 
 
 
170
 
171
+ examples = ['hazelnut_cut.png','capsule_crack.png','carpet_normal.jpg']
172
 
173
  with gr.Blocks() as demo:
174
  gr.HTML("""<h1 align="center">Demo of AnomalyGPT</h1>""")
175
 
176
  with gr.Row():
177
  with gr.Column(scale=1):
 
 
 
 
178
  with gr.Row():
179
+ image_path = gr.Image(type="filepath", label="Query Image", value=examples[0])
180
  with gr.Row():
181
+ normal_img_path = gr.Image(type="filepath", label="Normal Image (optional)", value=None)
182
+ with gr.Row():
183
+ gr.Examples(examples=examples, inputs=[image_path])
184
  with gr.Row():
185
+ max_length = gr.Slider(0, 512, value=512, step=1.0, label="Max length", interactive=True)
186
+ top_p = gr.Slider(0, 1, value=0.01, step=0.01, label="Top P", interactive=True)
187
  temperature = gr.Slider(0, 1, value=1.0, step=0.01, label="Temperature", interactive=True)
188
 
189
 
190
  with gr.Column(scale=3):
191
  with gr.Row():
192
  with gr.Column(scale=6):
193
+ chatbot = gr.Chatbot().style(height=440)
194
  with gr.Column(scale=4):
195
  # gr.Image(output)
196
+ image_output = gr.Image(interactive=False, label="Localization Output", type='pil',value=PILImage.open('ffffff.png'))
197
  with gr.Row():
198
+ user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=12).style(container=False)
199
  with gr.Row():
200
  with gr.Column(scale=2):
201
  submitBtn = gr.Button("Submit", variant="primary")
 
219
  ], [
220
  chatbot,
221
  history,
222
+ modality_cache,
223
+ image_output
224
  ],
225
  show_progress=True
226
  )
227
 
228
  submitBtn.click(reset_user_input, [], [user_input])
229
  emptyBtn.click(reset_state, outputs=[
230
+ user_input,
231
  image_path,
232
  normal_img_path,
233
  chatbot,
234
  history,
235
+ modality_cache,
236
+ image_output
237
  ], show_progress=True)
238
 
239
 
240
+ demo.queue().launch()
capsule_crack.png ADDED

Git LFS Details

  • SHA256: dd07c258e465acf0dc3770da851f3671fb4721df60bc460e053a95b9b21acccb
  • Pointer size: 132 Bytes
  • Size of remote file: 1.18 MB
carpet_normal.jpg ADDED
hazelnut_cut.png ADDED

Git LFS Details

  • SHA256: cd5d45c2c2a12aa99dac4e084a91fa21948238f660a70578dd28c34f5bb7325c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.25 MB
header.py CHANGED
@@ -25,7 +25,7 @@ import logging
25
  from copy import deepcopy
26
  import ipdb
27
  import argparse
28
- import data
29
  from transformers import LlamaTokenizer, LlamaForCausalLM, LlamaConfig
30
  from torch.nn.utils.rnn import pad_sequence
31
  from peft import LoraConfig, TaskType, get_peft_model
 
25
  from copy import deepcopy
26
  import ipdb
27
  import argparse
28
+ from model.ImageBind import data
29
  from transformers import LlamaTokenizer, LlamaForCausalLM, LlamaConfig
30
  from torch.nn.utils.rnn import pad_sequence
31
  from peft import LoraConfig, TaskType, get_peft_model
model/ImageBind/data.py CHANGED
@@ -23,7 +23,7 @@ from torchvision.transforms._transforms_video import NormalizeVideo
23
 
24
  DEFAULT_AUDIO_FRAME_SHIFT_MS = 10 # in milliseconds
25
 
26
- BPE_PATH = "/data/guzhaopeng/PandaGPT/code/model/ImageBind/bpe/bpe_simple_vocab_16e6.txt.gz"
27
 
28
 
29
  def waveform2melspec(waveform, sample_rate, num_mel_bins, target_length):
 
23
 
24
  DEFAULT_AUDIO_FRAME_SHIFT_MS = 10 # in milliseconds
25
 
26
+ BPE_PATH = "./model/ImageBind/bpe/bpe_simple_vocab_16e6.txt.gz"
27
 
28
 
29
  def waveform2melspec(waveform, sample_rate, num_mel_bins, target_length):
model/openllama.py CHANGED
@@ -10,6 +10,8 @@ import kornia as K
10
 
11
  import torch
12
  from torch.nn.utils import rnn
 
 
13
 
14
  CLASS_NAMES = ['bottle', 'cable', 'capsule', 'carpet', 'grid', 'hazelnut', 'leather', 'metal nut', 'pill', 'screw', 'tile', 'toothbrush', 'transistor', 'wood', 'zipper', 'object',
15
  'candle', 'cashew', 'chewinggum', 'fryum', 'macaroni', 'pcb', 'pipe fryum']
@@ -165,17 +167,21 @@ class OpenLLAMAPEFTModel(nn.Module):
165
  max_tgt_len = args['max_tgt_len']
166
  stage = args['stage']
167
 
 
 
168
  print (f'Initializing visual encoder from {imagebind_ckpt_path} ...')
169
 
170
  self.visual_encoder, self.visual_hidden_size = imagebind_model.imagebind_huge(args)
 
171
  imagebind_ckpt = torch.load(imagebind_ckpt_path, map_location=torch.device('cpu'))
172
  self.visual_encoder.load_state_dict(imagebind_ckpt, strict=True)
 
173
 
174
  self.iter = 0
175
 
176
- self.image_decoder = LinearLayer(1280, 1024, 4)
177
 
178
- self.prompt_learner = PromptLearner(1, 4096)
179
 
180
  self.loss_focal = FocalLoss()
181
  self.loss_dice = BinaryDiceLoss()
@@ -199,11 +205,25 @@ class OpenLLAMAPEFTModel(nn.Module):
199
  target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj']
200
  )
201
 
202
- self.llama_model = LlamaForCausalLM.from_pretrained(vicuna_ckpt_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
  self.llama_model = get_peft_model(self.llama_model, peft_config)
204
  self.llama_model.print_trainable_parameters()
205
 
206
- self.llama_tokenizer = LlamaTokenizer.from_pretrained(vicuna_ckpt_path, use_fast=False)
207
  self.llama_tokenizer.pad_token = self.llama_tokenizer.eos_token
208
  self.llama_tokenizer.padding_side = "right"
209
  print ('Language decoder initialized.')
@@ -213,7 +233,7 @@ class OpenLLAMAPEFTModel(nn.Module):
213
  )
214
 
215
  self.max_tgt_len = max_tgt_len
216
- self.device = torch.device('cpu')#torch.cuda.current_device()
217
 
218
 
219
  def rot90_img(self,x,k):
 
10
 
11
  import torch
12
  from torch.nn.utils import rnn
13
+ from transformers import AutoConfig, AutoModelForCausalLM
14
+ from accelerate import init_empty_weights, load_checkpoint_and_dispatch, infer_auto_device_map
15
 
16
  CLASS_NAMES = ['bottle', 'cable', 'capsule', 'carpet', 'grid', 'hazelnut', 'leather', 'metal nut', 'pill', 'screw', 'tile', 'toothbrush', 'transistor', 'wood', 'zipper', 'object',
17
  'candle', 'cashew', 'chewinggum', 'fryum', 'macaroni', 'pcb', 'pipe fryum']
 
167
  max_tgt_len = args['max_tgt_len']
168
  stage = args['stage']
169
 
170
+ self.device = torch.device('cpu') # torch.cuda.current_device()
171
+
172
  print (f'Initializing visual encoder from {imagebind_ckpt_path} ...')
173
 
174
  self.visual_encoder, self.visual_hidden_size = imagebind_model.imagebind_huge(args)
175
+ self.visual_encoder.to(self.device)
176
  imagebind_ckpt = torch.load(imagebind_ckpt_path, map_location=torch.device('cpu'))
177
  self.visual_encoder.load_state_dict(imagebind_ckpt, strict=True)
178
+
179
 
180
  self.iter = 0
181
 
182
+ self.image_decoder = LinearLayer(1280, 1024, 4).to(self.device)
183
 
184
+ self.prompt_learner = PromptLearner(1, 4096).to(self.device)
185
 
186
  self.loss_focal = FocalLoss()
187
  self.loss_dice = BinaryDiceLoss()
 
205
  target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj']
206
  )
207
 
208
+ # config = AutoConfig.from_pretrained(vicuna_ckpt_path)
209
+ # with init_empty_weights():
210
+ # self.llama_model = AutoModelForCausalLM.from_config(config)
211
+
212
+ # # device_map = infer_auto_device_map(self.llama_model, no_split_module_classes=["OPTDecoderLayer"], dtype="float16")
213
+ # # print(device_map)
214
+ # device_map = {'model.embed_tokens': 0, 'model.layers.0': 0, 'model.layers.1': 0, 'model.layers.2': 0, 'model.layers.3': 0, 'model.layers.4': 0, 'model.layers.5': 0, 'model.layers.6': 0, 'model.layers.7': 0, 'model.layers.8': 0, 'model.layers.9': 0, 'model.layers.10.self_attn': 0, 'model.layers.10.mlp.gate_proj': 0, 'model.layers.10.mlp.down_proj': 'cpu', 'model.layers.10.mlp.up_proj': 'cpu', 'model.layers.10.mlp.act_fn': 'cpu', 'model.layers.10.input_layernorm': 'cpu', 'model.layers.10.post_attention_layernorm': 'cpu', 'model.layers.11': 'cpu', 'model.layers.12': 'cpu', 'model.layers.13': 'cpu', 'model.layers.14': 'cpu', 'model.layers.15': 'cpu', 'model.layers.16': 'cpu', 'model.layers.17': 'cpu', 'model.layers.18': 'cpu', 'model.layers.19': 'cpu', 'model.layers.20': 'cpu', 'model.layers.21': 'cpu', 'model.layers.22': 'cpu', 'model.layers.23': 'cpu', 'model.layers.24': 'disk', 'model.layers.25': 'disk', 'model.layers.26': 'disk', 'model.layers.27': 'disk', 'model.layers.28': 'disk', 'model.layers.29': 'disk', 'model.layers.30': 'disk', 'model.layers.31.self_attn': 'disk', 'model.layers.31.mlp.gate_proj': 'disk', 'model.layers.31.mlp.down_proj': 'disk', 'model.layers.31.mlp.up_proj': 'disk', 'model.layers.31.mlp.act_fn': 'disk', 'model.layers.31.input_layernorm': 'disk', 'model.layers.31.post_attention_layernorm': 'disk', 'model.norm': 'disk', 'lm_head': 'disk'}
215
+ # # self.llama_model = load_checkpoint_and_dispatch(self.llama_model, vicuna_ckpt_path, device_map=device_map, offload_folder="offload", offload_state_dict = True)
216
+ # # self.llama_model.to(torch.float16)
217
+ # # try:
218
+ self.llama_model = AutoModelForCausalLM.from_pretrained(vicuna_ckpt_path, torch_dtype=torch.bfloat16, device_map='auto', offload_folder="offload", offload_state_dict = True)
219
+ # # except:
220
+ # pass
221
+ # finally:
222
+ # print(self.llama_model.hf_device_map)
223
  self.llama_model = get_peft_model(self.llama_model, peft_config)
224
  self.llama_model.print_trainable_parameters()
225
 
226
+ self.llama_tokenizer = LlamaTokenizer.from_pretrained(vicuna_ckpt_path, use_fast=False, torch_dtype=torch.bfloat16, device_map='auto', offload_folder="offload", offload_state_dict = True)
227
  self.llama_tokenizer.pad_token = self.llama_tokenizer.eos_token
228
  self.llama_tokenizer.padding_side = "right"
229
  print ('Language decoder initialized.')
 
233
  )
234
 
235
  self.max_tgt_len = max_tgt_len
236
+
237
 
238
 
239
  def rot90_img(self,x,k):
requirements.txt CHANGED
@@ -23,3 +23,5 @@ torchaudio==0.13.1
23
  torchvision==0.14.1
24
  tqdm==4.64.1
25
  transformers==4.29.1
 
 
 
23
  torchvision==0.14.1
24
  tqdm==4.64.1
25
  transformers==4.29.1
26
+ sentencepiece
27
+ accelerate