import torch
import numpy as np
import ffmpeg

size = 224

def get_video_dim(video_path):
    probe = ffmpeg.probe(video_path)
    video_stream = next(
        (stream for stream in probe["streams"] if stream["codec_type"] == "video"),
        None,
    )
    width = int(video_stream["width"])
    height = int(video_stream["height"])
    num, denum = video_stream["avg_frame_rate"].split("/")
    frame_rate = int(num) / int(denum)
    return height, width, frame_rate

def get_output_dim(self, h, w):
    if isinstance(self.size, tuple) and len(self.size) == 2:
        return self.size
    elif h >= w:
        return int(h * self.size / w), self.size
    else:
        return self.size, int(w * self.size / h)

h, w, fr = get_video_dim(video_path)
height, width = get_output_dim(h, w)

cmd = (
    ffmpeg.input(video_path)
    .filter("fps", fps=1)
    .filter("scale", width, height)
)

x = int((width - size) / 2.0)
y = int((height - size) / 2.0)
cmd = cmd.crop(x, y, size, size)
out, _ = cmd.output("pipe:", format="rawvideo", pix_fmt="rgb24").run(
    capture_stdout=True, quiet=True
)

height, width = 224, 224
video = np.frombuffer(out, np.uint8).reshape([-1, height, width, 3])
video = torch.from_numpy(video.astype("float32"))