Spaces:

FantasticGNU
/

AnomalyGPT

Running on T4

App Files Files Community

FantasticGu commited on Sep 15, 2023

Commit

69b0f21

•

2 Parent(s): b4fe382 8079c8b

Merge branch 'main' of https://huggingface.co/spaces/FantasticGNU/AnomalyGPT

Browse files

Files changed (10) hide show

.gitattributes +2 -0
README.md +5 -0
app.py +24 -32
capsule_crack.png +3 -0
carpet_normal.jpg +0 -0
hazelnut_cut.png +3 -0
header.py +1 -1
model/ImageBind/data.py +1 -1
model/openllama.py +25 -5
requirements.txt +2 -0

.gitattributes CHANGED Viewed

@@ -36,3 +36,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 .bin filter=lfs diff=lfs merge=lfs -text
 .pt filter=lfs diff=lfs merge=lfs -text
 .pth filter=lfs diff=lfs merge=lfs -text

 .bin filter=lfs diff=lfs merge=lfs -text
 .pt filter=lfs diff=lfs merge=lfs -text
 .pth filter=lfs diff=lfs merge=lfs -text
+hazelnut_cut.png filter=lfs diff=lfs merge=lfs -text
+capsule_crack.png filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,5 @@

+---
+license: cc-by-sa-4.0
+title: AnomalyGPT
+sdk: gradio
+---

app.py CHANGED Viewed

@@ -14,14 +14,13 @@ args = {
     'model': 'openllama_peft',
     'imagebind_ckpt_path': './pretrained_ckpt/imagebind_ckpt/imagebind_huge.pth',
     'vicuna_ckpt_path': './pretrained_ckpt/vicuna_ckpt/7b_v0',
-    'anomalygpt_ckpt_path': './ckpt/train_cn/pytorch_model.pt',
     'delta_ckpt_path': './pretrained_ckpt/pandagpt_ckpt/7b/pytorch_model.pt',
     'stage': 2,
     'max_tgt_len': 128,
     'lora_r': 32,
     'lora_alpha': 32,
-    'lora_dropout': 0.1,
-    'layers': [7,15,23,31]
 }
 model = OpenLLAMAPEFTModel(**args)
@@ -29,10 +28,9 @@ delta_ckpt = torch.load(args['delta_ckpt_path'], map_location=torch.device('cpu'
 model.load_state_dict(delta_ckpt, strict=False)
 delta_ckpt = torch.load(args['anomalygpt_ckpt_path'], map_location=torch.device('cpu'))
 model.load_state_dict(delta_ckpt, strict=False)
-model = model.eval()
-output = None
 """Override Chatbot.postprocess"""
 def postprocess(self, y):
@@ -127,7 +125,7 @@ def predict(
     history.append((input, response))
-    plt.imshow(pixel_output.reshape(224,224).detach().cpu(), cmap='binary_r')
     plt.axis('off')
     plt.savefig('output.png',bbox_inches='tight',pad_inches = 0)
@@ -156,57 +154,48 @@ def predict(
     eroded_image = cv2.erode(image, kernel, iterations=1)
     cv2.imwrite('output.png', eroded_image)
-    global output
     output =  PILImage.open('output.png').convert('L')
-    return chatbot, history, modality_cache
-def get_image():
-    global output
-    return output if output else "ffffff.png"
 def reset_user_input():
     return gr.update(value='')
-def reset_dialog():
-    return [], []
 def reset_state():
-    global output
-    output = None
-    return None, None, [], [], []
 with gr.Blocks() as demo:
     gr.HTML("""<h1 align="center">Demo of AnomalyGPT</h1>""")
     with gr.Row():
         with gr.Column(scale=1):
-            with gr.Row(scale=3):
-                image_path = gr.Image(type="filepath", label="Query Image", value=None)
-            with gr.Row(scale=3):
-                normal_img_path = gr.Image(type="filepath", label="Normal Image", value=None)
             with gr.Row():
-                max_length = gr.Slider(0, 512, value=512, step=1.0, label="Maximum length", interactive=True)
             with gr.Row():
-                top_p = gr.Slider(0, 1, value=0.01, step=0.01, label="Top P", interactive=True)
             with gr.Row():
                 temperature = gr.Slider(0, 1, value=1.0, step=0.01, label="Temperature", interactive=True)
         with gr.Column(scale=3):
             with gr.Row():
                 with gr.Column(scale=6):
-                    chatbot = gr.Chatbot().style(height=415)
                 with gr.Column(scale=4):
                     # gr.Image(output)
-                    image_output = gr.Image(value=get_image, label="Localization Output", every=1.0, shape=[224,224])
             with gr.Row():
-                user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=10).style(container=False)
             with gr.Row():
                 with gr.Column(scale=2):
                     submitBtn = gr.Button("Submit", variant="primary")
@@ -230,19 +219,22 @@ with gr.Blocks() as demo:
         ], [
             chatbot,
             history,
-            modality_cache
         ],
         show_progress=True
     )
     submitBtn.click(reset_user_input, [], [user_input])
     emptyBtn.click(reset_state, outputs=[
         image_path,
         normal_img_path,
         chatbot,
         history,
-        modality_cache
     ], show_progress=True)
-demo.queue().launch(server_port=24008)

     'model': 'openllama_peft',
     'imagebind_ckpt_path': './pretrained_ckpt/imagebind_ckpt/imagebind_huge.pth',
     'vicuna_ckpt_path': './pretrained_ckpt/vicuna_ckpt/7b_v0',
+    'anomalygpt_ckpt_path': './ckpt/train_supervised/pytorch_model.pt',
     'delta_ckpt_path': './pretrained_ckpt/pandagpt_ckpt/7b/pytorch_model.pt',
     'stage': 2,
     'max_tgt_len': 128,
     'lora_r': 32,
     'lora_alpha': 32,
+    'lora_dropout': 0.1
 }
 model = OpenLLAMAPEFTModel(**args)
 model.load_state_dict(delta_ckpt, strict=False)
 delta_ckpt = torch.load(args['anomalygpt_ckpt_path'], map_location=torch.device('cpu'))
 model.load_state_dict(delta_ckpt, strict=False)
+model = model.eval().to(torch.bfloat16)#.half()#.cuda()
+# model.image_decoder = model.image_decoder.cuda()
+# model.prompt_learner = model.prompt_learner.cuda()
 """Override Chatbot.postprocess"""
 def postprocess(self, y):
     history.append((input, response))
+    plt.imshow(pixel_output.to(torch.float16).reshape(224,224).detach().cpu(), cmap='binary_r')
     plt.axis('off')
     plt.savefig('output.png',bbox_inches='tight',pad_inches = 0)
     eroded_image = cv2.erode(image, kernel, iterations=1)
     cv2.imwrite('output.png', eroded_image)
     output =  PILImage.open('output.png').convert('L')
+    return chatbot, history, modality_cache, output
 def reset_user_input():
     return gr.update(value='')
 def reset_state():
+    return gr.update(value=''), None, None, [], [], [], PILImage.open('ffffff.png')
+examples = ['hazelnut_cut.png','capsule_crack.png','carpet_normal.jpg']
 with gr.Blocks() as demo:
     gr.HTML("""<h1 align="center">Demo of AnomalyGPT</h1>""")
     with gr.Row():
         with gr.Column(scale=1):
             with gr.Row():
+                image_path = gr.Image(type="filepath", label="Query Image", value=examples[0])
             with gr.Row():
+                normal_img_path = gr.Image(type="filepath", label="Normal Image (optional)", value=None)
+            with gr.Row():
+                gr.Examples(examples=examples, inputs=[image_path])
             with gr.Row():
+                max_length = gr.Slider(0, 512, value=512, step=1.0, label="Max length", interactive=True)
+                top_p = gr.Slider(0, 1, value=0.01, step=0.01, label="Top P", interactive=True)
                 temperature = gr.Slider(0, 1, value=1.0, step=0.01, label="Temperature", interactive=True)
         with gr.Column(scale=3):
             with gr.Row():
                 with gr.Column(scale=6):
+                    chatbot = gr.Chatbot().style(height=440)
                 with gr.Column(scale=4):
                     # gr.Image(output)
+                    image_output = gr.Image(interactive=False, label="Localization Output", type='pil',value=PILImage.open('ffffff.png'))
             with gr.Row():
+                user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=12).style(container=False)
             with gr.Row():
                 with gr.Column(scale=2):
                     submitBtn = gr.Button("Submit", variant="primary")
         ], [
             chatbot,
             history,
+            modality_cache,
+            image_output
         ],
         show_progress=True
     )
     submitBtn.click(reset_user_input, [], [user_input])
     emptyBtn.click(reset_state, outputs=[
+        user_input,
         image_path,
         normal_img_path,
         chatbot,
         history,
+        modality_cache,
+        image_output
     ], show_progress=True)
+demo.queue().launch()

capsule_crack.png ADDED Viewed

Git LFS Details

SHA256: dd07c258e465acf0dc3770da851f3671fb4721df60bc460e053a95b9b21acccb
Pointer size: 132 Bytes
Size of remote file: 1.18 MB

carpet_normal.jpg ADDED Viewed

hazelnut_cut.png ADDED Viewed

Git LFS Details

SHA256: cd5d45c2c2a12aa99dac4e084a91fa21948238f660a70578dd28c34f5bb7325c
Pointer size: 132 Bytes
Size of remote file: 1.25 MB

header.py CHANGED Viewed

@@ -25,7 +25,7 @@ import logging
 from copy import deepcopy
 import ipdb
 import argparse
-import data
 from transformers import LlamaTokenizer, LlamaForCausalLM, LlamaConfig
 from torch.nn.utils.rnn import pad_sequence
 from peft import LoraConfig, TaskType, get_peft_model

 from copy import deepcopy
 import ipdb
 import argparse
+from model.ImageBind import data
 from transformers import LlamaTokenizer, LlamaForCausalLM, LlamaConfig
 from torch.nn.utils.rnn import pad_sequence
 from peft import LoraConfig, TaskType, get_peft_model

model/ImageBind/data.py CHANGED Viewed

@@ -23,7 +23,7 @@ from torchvision.transforms._transforms_video import NormalizeVideo
 DEFAULT_AUDIO_FRAME_SHIFT_MS = 10  # in milliseconds
-BPE_PATH = "/data/guzhaopeng/PandaGPT/code/model/ImageBind/bpe/bpe_simple_vocab_16e6.txt.gz"
 def waveform2melspec(waveform, sample_rate, num_mel_bins, target_length):

 DEFAULT_AUDIO_FRAME_SHIFT_MS = 10  # in milliseconds
+BPE_PATH = "./model/ImageBind/bpe/bpe_simple_vocab_16e6.txt.gz"
 def waveform2melspec(waveform, sample_rate, num_mel_bins, target_length):

model/openllama.py CHANGED Viewed

@@ -10,6 +10,8 @@ import kornia as K
 import torch
 from torch.nn.utils import rnn
 CLASS_NAMES = ['bottle', 'cable', 'capsule', 'carpet', 'grid', 'hazelnut', 'leather', 'metal nut', 'pill', 'screw', 'tile', 'toothbrush', 'transistor', 'wood', 'zipper', 'object',
                'candle', 'cashew', 'chewinggum', 'fryum', 'macaroni', 'pcb', 'pipe fryum']
@@ -165,17 +167,21 @@ class OpenLLAMAPEFTModel(nn.Module):
         max_tgt_len = args['max_tgt_len']
         stage = args['stage']
         print (f'Initializing visual encoder from {imagebind_ckpt_path} ...')
         self.visual_encoder, self.visual_hidden_size = imagebind_model.imagebind_huge(args)
         imagebind_ckpt = torch.load(imagebind_ckpt_path, map_location=torch.device('cpu'))
         self.visual_encoder.load_state_dict(imagebind_ckpt, strict=True)
         self.iter = 0
-        self.image_decoder = LinearLayer(1280, 1024, 4)
-        self.prompt_learner = PromptLearner(1, 4096)
         self.loss_focal = FocalLoss()
         self.loss_dice = BinaryDiceLoss()
@@ -199,11 +205,25 @@ class OpenLLAMAPEFTModel(nn.Module):
             target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj']
         )
-        self.llama_model = LlamaForCausalLM.from_pretrained(vicuna_ckpt_path)
         self.llama_model = get_peft_model(self.llama_model, peft_config)
         self.llama_model.print_trainable_parameters()
-        self.llama_tokenizer = LlamaTokenizer.from_pretrained(vicuna_ckpt_path, use_fast=False)
         self.llama_tokenizer.pad_token = self.llama_tokenizer.eos_token
         self.llama_tokenizer.padding_side = "right"
         print ('Language decoder initialized.')
@@ -213,7 +233,7 @@ class OpenLLAMAPEFTModel(nn.Module):
         )
         self.max_tgt_len = max_tgt_len
-        self.device = torch.device('cpu')#torch.cuda.current_device()
     def rot90_img(self,x,k):

 import torch
 from torch.nn.utils import rnn
+from transformers import AutoConfig, AutoModelForCausalLM
+from accelerate import init_empty_weights, load_checkpoint_and_dispatch, infer_auto_device_map
 CLASS_NAMES = ['bottle', 'cable', 'capsule', 'carpet', 'grid', 'hazelnut', 'leather', 'metal nut', 'pill', 'screw', 'tile', 'toothbrush', 'transistor', 'wood', 'zipper', 'object',
                'candle', 'cashew', 'chewinggum', 'fryum', 'macaroni', 'pcb', 'pipe fryum']
         max_tgt_len = args['max_tgt_len']
         stage = args['stage']
+        self.device = torch.device('cpu') # torch.cuda.current_device()
         print (f'Initializing visual encoder from {imagebind_ckpt_path} ...')
         self.visual_encoder, self.visual_hidden_size = imagebind_model.imagebind_huge(args)
+        self.visual_encoder.to(self.device)
         imagebind_ckpt = torch.load(imagebind_ckpt_path, map_location=torch.device('cpu'))
         self.visual_encoder.load_state_dict(imagebind_ckpt, strict=True)
         self.iter = 0
+        self.image_decoder = LinearLayer(1280, 1024, 4).to(self.device)
+        self.prompt_learner = PromptLearner(1, 4096).to(self.device)
         self.loss_focal = FocalLoss()
         self.loss_dice = BinaryDiceLoss()
             target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj']
         )
+        # config = AutoConfig.from_pretrained(vicuna_ckpt_path)
+        # with init_empty_weights():
+        #     self.llama_model = AutoModelForCausalLM.from_config(config)
+        # # device_map = infer_auto_device_map(self.llama_model, no_split_module_classes=["OPTDecoderLayer"], dtype="float16")
+        # # print(device_map)
+        # device_map = {'model.embed_tokens': 0, 'model.layers.0': 0, 'model.layers.1': 0, 'model.layers.2': 0, 'model.layers.3': 0, 'model.layers.4': 0, 'model.layers.5': 0, 'model.layers.6': 0, 'model.layers.7': 0, 'model.layers.8': 0, 'model.layers.9': 0, 'model.layers.10.self_attn': 0, 'model.layers.10.mlp.gate_proj': 0, 'model.layers.10.mlp.down_proj': 'cpu', 'model.layers.10.mlp.up_proj': 'cpu', 'model.layers.10.mlp.act_fn': 'cpu', 'model.layers.10.input_layernorm': 'cpu', 'model.layers.10.post_attention_layernorm': 'cpu', 'model.layers.11': 'cpu', 'model.layers.12': 'cpu', 'model.layers.13': 'cpu', 'model.layers.14': 'cpu', 'model.layers.15': 'cpu', 'model.layers.16': 'cpu', 'model.layers.17': 'cpu', 'model.layers.18': 'cpu', 'model.layers.19': 'cpu', 'model.layers.20': 'cpu', 'model.layers.21': 'cpu', 'model.layers.22': 'cpu', 'model.layers.23': 'cpu', 'model.layers.24': 'disk', 'model.layers.25': 'disk', 'model.layers.26': 'disk', 'model.layers.27': 'disk', 'model.layers.28': 'disk', 'model.layers.29': 'disk', 'model.layers.30': 'disk', 'model.layers.31.self_attn': 'disk', 'model.layers.31.mlp.gate_proj': 'disk', 'model.layers.31.mlp.down_proj': 'disk', 'model.layers.31.mlp.up_proj': 'disk', 'model.layers.31.mlp.act_fn': 'disk', 'model.layers.31.input_layernorm': 'disk', 'model.layers.31.post_attention_layernorm': 'disk', 'model.norm': 'disk', 'lm_head': 'disk'}
+        # # self.llama_model = load_checkpoint_and_dispatch(self.llama_model, vicuna_ckpt_path, device_map=device_map, offload_folder="offload", offload_state_dict = True)
+        # # self.llama_model.to(torch.float16)
+        # # try:
+        self.llama_model = AutoModelForCausalLM.from_pretrained(vicuna_ckpt_path, torch_dtype=torch.bfloat16, device_map='auto', offload_folder="offload", offload_state_dict = True)
+        # # except:
+        #     pass
+        # finally:
+        #     print(self.llama_model.hf_device_map)
         self.llama_model = get_peft_model(self.llama_model, peft_config)
         self.llama_model.print_trainable_parameters()
+        self.llama_tokenizer = LlamaTokenizer.from_pretrained(vicuna_ckpt_path, use_fast=False, torch_dtype=torch.bfloat16, device_map='auto', offload_folder="offload", offload_state_dict = True)
         self.llama_tokenizer.pad_token = self.llama_tokenizer.eos_token
         self.llama_tokenizer.padding_side = "right"
         print ('Language decoder initialized.')
         )
         self.max_tgt_len = max_tgt_len
     def rot90_img(self,x,k):

requirements.txt CHANGED Viewed

@@ -23,3 +23,5 @@ torchaudio==0.13.1
 torchvision==0.14.1
 tqdm==4.64.1
 transformers==4.29.1

 torchvision==0.14.1
 tqdm==4.64.1
 transformers==4.29.1
+sentencepiece
+accelerate