Spaces:

namnh2002
/

video-summarization_timesformer

Sleeping

App Files Files Community

nam_nguyenhoai_AI commited on Jun 7

Commit

4808241

•

1 Parent(s): e16c706

update src

Browse files

Files changed (3) hide show

algorithm.py +2 -44
app.py +11 -18
utils.py +2 -2

algorithm.py CHANGED Viewed

@@ -3,7 +3,7 @@ from sklearn.metrics import pairwise_distances_argmin_min
 import random
 from utils import *
-def kmeans(number_of_clusters, features):
     # Cluster the frames using K-Means
     # K-means from sklearn
@@ -32,49 +32,7 @@ def kmeans(number_of_clusters, features):
     return closest_clips_frames
-def tt01(features, threshold):
-    i = 0
-    clips = []
-    # compare the sum of squared difference between clips i and j
-    for j in range(1, len(features)):
-        if sum_of_squared_difference(features[i], features[j]) > threshold:
-            clip = []
-            # add frames from clip i to j-1 to the clip list
-            for b in range(i*8, j*8):
-                clip.append(b)
-            # randomly select 15% of the frames from the clip list
-            random_num = round(len(clip)*0.15)
-            # sort the frames in the clip list to ensure the order of the frames
-            random_Frames = sorted(random.sample(clip, random_num))
-            i = j
-            clips.extend(random_Frames)
-    # add the last clip to the clip list
-    clip = []
-    if i==j:
-        for c in range(j*8, j*8+8):
-            clip.append(c)
-            random_num = round(len(clip)*0.15)
-            random_Frames = sorted(random.sample(clip, random_num))
-        #print("i == j")
-    else: # (i<j)
-        for c in range(i*8, (j+1)*8):
-            clip.append(c)
-            random_num = round(len(clip)*0.15)
-            random_Frames = sorted(random.sample(clip, random_num))
-        #print(f"{i} with {j}")
-    clips.extend(random_Frames)
-    return clips
-def tt02(features, threshold):
     i = 0
     previous = i

 import random
 from utils import *
+def offline(number_of_clusters, features):
     # Cluster the frames using K-Means
     # K-means from sklearn
     return closest_clips_frames
+def online(features, threshold):
     i = 0
     previous = i

app.py CHANGED Viewed

@@ -6,15 +6,12 @@ import numpy as np
 from utils import *
 from algorithm import *
-def make_video(video_path, outdir='./summarized_video', algorithm='Kmeans', model_version='K600'):
-    if algorithm not in ["Kmeans", "Sum of Squared Difference 01", "Sum of Squared Difference 02"]:
-        algorithm = "Kmeans"
-    if model_version not in ["K600", "K400", "SSv2"]:
-        model_version = "K600"
     # nen them vao cac truong hop mo hinh khac
-    model, processor, device = load_model(model_version)
     # total_params = sum(param.numel() for param in model.parameters())
     # print('Total parameters: {:.2f}M'.format(total_params / 1e6))
@@ -101,12 +98,10 @@ def make_video(video_path, outdir='./summarized_video', algorithm='Kmeans', mode
         print("Shape of each clip: ", features[0].shape)
         selected_frames = []
-        if algorithm == "Kmeans":
-            selected_frames = kmeans(number_of_clusters, features)
-        elif algorithm == "Sum of Squared Difference 01":
-            selected_frames = tt01(features, 400)
         else:
-            selected_frames = tt02(features, 400)
         print("Selected frame: ", selected_frames)
@@ -145,20 +140,18 @@ with gr.Blocks(css=css) as demo:
     with gr.Row():
         input_video = gr.Video(label="Input Video")
-        algorithm_type = gr.Dropdown(["Kmeans", "Sum of Squared Difference 01", "Sum of Squared Difference 02"], type="value", label='Algorithm')
-        model_type = gr.Dropdown(["K600", "K400", "SSv2"], type="value", label='Model Type')
     submit = gr.Button("Submit")
     processed_video = gr.Video(label="Summarized Video")
-    def on_submit(uploaded_video, algorithm_type, model_type):
         print("Algorithm: ", algorithm_type)
-        print("Model Type: ", model_type)
         # Process the video and get the path of the output video
-        output_video_path = make_video(uploaded_video, algorithm=algorithm_type, model_version=model_type)
         return output_video_path
-    submit.click(on_submit, inputs=[input_video, algorithm_type, model_type], outputs=processed_video)
 if __name__ == '__main__':
     demo.queue().launch(share=True)

 from utils import *
 from algorithm import *
+def make_video(video_path, outdir='./summarized_video', algorithm='Offline (KMeans)'):
+    if algorithm not in ["Offline (KMeans)", "Online (Sum of Squared Difference)"]:
+        algorithm = "Offline (KMeans)"
     # nen them vao cac truong hop mo hinh khac
+    model, processor, device = load_model()
     # total_params = sum(param.numel() for param in model.parameters())
     # print('Total parameters: {:.2f}M'.format(total_params / 1e6))
         print("Shape of each clip: ", features[0].shape)
         selected_frames = []
+        if algorithm == "Offline (KMeans)":
+            selected_frames = offline(number_of_clusters, features)
         else:
+            selected_frames = online(features, 400)
         print("Selected frame: ", selected_frames)
     with gr.Row():
         input_video = gr.Video(label="Input Video")
+        algorithm_type = gr.Dropdown(["Offline (KMeans)", "Online (Sum of Squared Difference)"], type="value", label='Algorithm')
     submit = gr.Button("Submit")
     processed_video = gr.Video(label="Summarized Video")
+    def on_submit(uploaded_video, algorithm_type):
         print("Algorithm: ", algorithm_type)
         # Process the video and get the path of the output video
+        output_video_path = make_video(uploaded_video, algorithm=algorithm_type)
         return output_video_path
+    submit.click(on_submit, inputs=[input_video, algorithm_type], outputs=processed_video)
 if __name__ == '__main__':
     demo.queue().launch(share=True)

utils.py CHANGED Viewed

@@ -52,10 +52,10 @@ def to_video(selected_frames, frames, output_path, video_fps):
     video_writer.release()
     print("Completed summarizing the video (wait for a moment to load).")
-def load_model(model_version):
     try:
         DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
-        model = TimesformerModel.from_pretrained(f"facebook/timesformer-base-finetuned-{model_version}")
         processor=VideoMAEImageProcessor.from_pretrained("MCG-NJU/videomae-base")
         return model, processor, DEVICE

     video_writer.release()
     print("Completed summarizing the video (wait for a moment to load).")
+def load_model():
     try:
         DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
+        model = TimesformerModel.from_pretrained(f"facebook/timesformer-base-finetuned-k600")
         processor=VideoMAEImageProcessor.from_pretrained("MCG-NJU/videomae-base")
         return model, processor, DEVICE