import torch import numpy as np import ffmpeg size = 224 def get_video_dim(video_path): probe = ffmpeg.probe(video_path) video_stream = next( (stream for stream in probe["streams"] if stream["codec_type"] == "video"), None, ) width = int(video_stream["width"]) height = int(video_stream["height"]) num, denum = video_stream["avg_frame_rate"].split("/") frame_rate = int(num) / int(denum) return height, width, frame_rate def get_output_dim(self, h, w): if isinstance(self.size, tuple) and len(self.size) == 2: return self.size elif h >= w: return int(h * self.size / w), self.size else: return self.size, int(w * self.size / h) h, w, fr = get_video_dim(video_path) height, width = get_output_dim(h, w) cmd = ( ffmpeg.input(video_path) .filter("fps", fps=1) .filter("scale", width, height) ) x = int((width - size) / 2.0) y = int((height - size) / 2.0) cmd = cmd.crop(x, y, size, size) out, _ = cmd.output("pipe:", format="rawvideo", pix_fmt="rgb24").run( capture_stdout=True, quiet=True ) height, width = 224, 224 video = np.frombuffer(out, np.uint8).reshape([-1, height, width, 3]) video = torch.from_numpy(video.astype("float32"))