Spaces:

ruslanmv
/

TextToVideo-Dalle

Runtime error

App Files Files Community

Ruslan Magana Vsevolodovna commited on Aug 21, 2022

Commit

4b68d40

•

1 Parent(s): 2cc68f0

fixing gpu

Browse files

Files changed (2) hide show

app.py +15 -17
requirements.txt +5 -3

app.py CHANGED Viewed

@@ -3,15 +3,12 @@ from moviepy.editor import *
 from PIL import Image
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,pipeline
 import gradio as gr
-import torch
-from huggingface_hub import snapshot_download
-from PIL import Image
 from min_dalle import MinDalle
-import torch
 from PIL import Image, ImageDraw, ImageFont
 import textwrap
 from mutagen.mp3 import MP3
-# to speech conversion
 from gtts import gTTS
 from pydub import AudioSegment
 from os import getcwd
@@ -23,13 +20,14 @@ title = "Video Story Generator with Audio by using dalle-mini and distilbart and
 tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
 model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
-#device = "cuda:0" if torch.cuda.is_available() else "cpu"
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 print(device)
-#device = torch.device('cuda')
-# transfer model
-#model.to(device)
 def get_output_video(text):
   inputs = tokenizer(text,
@@ -62,13 +60,12 @@ def get_output_video(text):
       model = MinDalle(
           is_mega=is_mega,
           models_root=models_root,
-          is_reusable=False,
           is_verbose=True,
-          #dtype=torch.float16 if fp16 else torch.float32
-          dtype=torch.float32,
-          #dtype=torch.float16,
-          device='cpu' #'cuda'
       )
       image = model.generate_image(
           text,
@@ -86,11 +83,12 @@ def get_output_video(text):
       is_mega= True,
       text=senten,
       seed=1,
-      grid_size=1,
-      top_k=256,
       image_path='generated',
       models_root='pretrained',
-      fp16=256,)
     generated_images.append(image)
   # Step 4- Creation of the subtitles

 from PIL import Image
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,pipeline
 import gradio as gr
+import torch, torch.backends.cudnn, torch.backends.cuda
 from min_dalle import MinDalle
+from huggingface_hub import snapshot_download
 from PIL import Image, ImageDraw, ImageFont
 import textwrap
 from mutagen.mp3 import MP3
 from gtts import gTTS
 from pydub import AudioSegment
 from os import getcwd
 tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
 model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 print(device)
+def log_gpu_memory():
+    print(subprocess.check_output('nvidia-smi').decode('utf-8'))
+log_gpu_memory()
 def get_output_video(text):
   inputs = tokenizer(text,
       model = MinDalle(
           is_mega=is_mega,
           models_root=models_root,
+          is_reusable=True,
           is_verbose=True,
+          dtype=torch.float16 if fp16 else torch.float32 #param ["float32", "float16", "bfloat16"] #float32 is faster than float16 but uses more GPU memory.
+          device='cuda' #'cpu'
       )
+      log_gpu_memory()
       image = model.generate_image(
           text,
       is_mega= True,
       text=senten,
       seed=1,
+      grid_size=1, #param {type:"integer"}
+      top_k=128, #param {type:"integer"}
       image_path='generated',
       models_root='pretrained',
+      fp16=256,)
     generated_images.append(image)
   # Step 4- Creation of the subtitles

requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
 gradio
-min-dalle
 transformers
-torch
 requests
 moviepy
 huggingface_hub
@@ -12,4 +12,6 @@ gTTS
 mutagen
 nltk
 accelerate
-nvidia-ml-py3

+min-dalle==0.4.6
+emoji==1.7.0
 gradio
 transformers
 requests
 moviepy
 huggingface_hub
 mutagen
 nltk
 accelerate
+nvidia-ml-py3
+--find-links https://download.pytorch.org/whl/torch_stable.html
+torch==1.12.1+cu116