Spaces:

thecho7
/

deepfake

Running

App Files Files Community

thecho7 commited on May 14, 2023

Commit

c426e13

•

1 Parent(s): 86f5435

LFS dat

Browse files

Files changed (47) hide show

.gitattributes +2 -0
Dockerfile +54 -0
LICENSE +21 -0
README.md +171 -13
__pycache__/kernel_utils.cpython-310.pyc +0 -0
app.py +86 -0
configs/b5.json +28 -0
configs/b7.json +29 -0
download_weights.sh +9 -0
examples/liuujwwgpr.mp4 +3 -0
examples/nlurbvsozt.mp4 +3 -0
examples/rfjuhbnlro.mp4 +3 -0
kernel_utils.py +365 -0
libs/shape_predictor_68_face_landmarks.dat +3 -0
training/__init__.py +0 -0
training/__pycache__/__init__.cpython-310.pyc +0 -0
training/__pycache__/__init__.cpython-39.pyc +0 -0
training/__pycache__/losses.cpython-310.pyc +0 -0
training/__pycache__/losses.cpython-39.pyc +0 -0
training/datasets/__init__.py +0 -0
training/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
training/datasets/__pycache__/classifier_dataset.cpython-310.pyc +0 -0
training/datasets/__pycache__/validation_set.cpython-310.pyc +0 -0
training/datasets/classifier_dataset.py +384 -0
training/datasets/validation_set.py +60 -0
training/losses.py +28 -0
training/pipelines/__init__.py +0 -0
training/pipelines/train_classifier.py +364 -0
training/tools/__init__.py +0 -0
training/tools/__pycache__/__init__.cpython-310.pyc +0 -0
training/tools/__pycache__/config.cpython-310.pyc +0 -0
training/tools/__pycache__/schedulers.cpython-310.pyc +0 -0
training/tools/__pycache__/utils.cpython-310.pyc +0 -0
training/tools/config.py +43 -0
training/tools/schedulers.py +46 -0
training/tools/utils.py +121 -0
training/transforms/__init__.py +0 -0
training/transforms/__pycache__/__init__.cpython-310.pyc +0 -0
training/transforms/__pycache__/albu.cpython-310.pyc +0 -0
training/transforms/albu.py +100 -0
training/zoo/__init__.py +0 -0
training/zoo/__pycache__/__init__.cpython-310.pyc +0 -0
training/zoo/__pycache__/classifiers.cpython-310.pyc +0 -0
training/zoo/classifiers.py +172 -0
training/zoo/unet.py +151 -0
weights/.gitkeep +0 -0
weights/b7_ns_best.pth +3 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+*.dat filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,54 @@

+ARG PYTORCH="1.10.0"
+ARG CUDA="11.3"
+ARG CUDNN="8"
+FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+ENV TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
+ENV CMAKE_PREFIX_PATH="$(dirname $(which conda))/../"
+# Setting noninteractive build, setting up tzdata and configuring timezones
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=Europe/Berlin
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+RUN apt-get update && apt-get install -y libglib2.0-0 libsm6 libxrender-dev libxext6 nano mc glances vim git \
+ && apt-get clean \
+ && rm -rf /var/lib/apt/lists/*
+# Install cython
+RUN conda install cython -y && conda clean --all
+# Installing APEX
+RUN pip install -U pip
+RUN git clone https://github.com/NVIDIA/apex
+RUN sed -i 's/check_cuda_torch_binary_vs_bare_metal(torch.utils.cpp_extension.CUDA_HOME)/pass/g' apex/setup.py
+RUN pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext"  ./apex
+RUN apt-get update -y
+RUN apt-get install build-essential cmake -y
+RUN apt-get install libopenblas-dev liblapack-dev -y
+RUN apt-get install libx11-dev libgtk-3-dev -y
+RUN pip install dlib
+RUN pip install facenet-pytorch
+RUN pip install albumentations==1.0.0 timm==0.4.12 pytorch_toolbelt tensorboardx
+RUN pip install cython jupyter  jupyterlab ipykernel matplotlib tqdm pandas
+# download pretraned Imagenet models
+RUN apt install wget
+RUN wget https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b7_ns-1dbc32de.pth -P /root/.cache/torch/hub/checkpoints/
+RUN wget https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b5_ns-6f26d0cf.pth -P /root/.cache/torch/hub/checkpoints/
+# Setting the working directory
+WORKDIR /workspace
+# Copying the required codebase
+COPY . /workspace
+RUN chmod 777 preprocess_data.sh
+RUN chmod 777 train.sh
+RUN chmod 777 predict_submission.sh
+ENV PYTHONPATH=.
+CMD ["/bin/bash"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2020 Selim Seferbekov
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,13 +1,171 @@
----
-title: Deepfake
-emoji: 🔥
-colorFrom: indigo
-colorTo: purple
-sdk: gradio
-sdk_version: 3.29.0
-app_file: app.py
-pinned: false
-license: unlicense
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+## DeepFake Detection (DFDC) Solution by @selimsef
+## Challenge details:
+[Kaggle Challenge Page](https://www.kaggle.com/c/deepfake-detection-challenge)
+### Fake detection articles
+- [The Deepfake Detection Challenge (DFDC) Preview Dataset](https://arxiv.org/abs/1910.08854)
+- [Deep Fake Image Detection Based on Pairwise Learning](https://www.mdpi.com/2076-3417/10/1/370)
+- [DeeperForensics-1.0: A Large-Scale Dataset for Real-World Face Forgery Detection](https://arxiv.org/abs/2001.03024)
+- [DeepFakes and Beyond: A Survey of Face Manipulation and Fake Detection](https://arxiv.org/abs/2001.00179)
+- [Real or Fake? Spoofing State-Of-The-Art Face Synthesis Detection Systems](https://arxiv.org/abs/1911.05351)
+- [CNN-generated images are surprisingly easy to spot... for now](https://arxiv.org/abs/1912.11035)
+- [FakeSpotter: A Simple yet Robust Baseline for Spotting AI-Synthesized Fake Faces](https://arxiv.org/abs/1909.06122)
+- [FakeLocator: Robust Localization of GAN-Based Face Manipulations via Semantic Segmentation Networks with Bells and Whistles](https://arxiv.org/abs/2001.09598)
+- [Media Forensics and DeepFakes: an overview](https://arxiv.org/abs/2001.06564)
+- [Face X-ray for More General Face Forgery Detection](https://arxiv.org/abs/1912.13458)
+## Solution description
+In general solution is based on frame-by-frame classification approach. Other complex things did not work so well on public leaderboard.
+#### Face-Detector
+MTCNN detector is chosen due to kernel time limits. It would be better to use S3FD detector as more precise and robust, but opensource Pytorch implementations don't have a license.
+Input size for face detector was calculated for each video depending on video resolution.
+- 2x scale for videos with less than 300 pixels wider side
+- no rescale for videos with wider side between 300 and 1000
+- 0.5x scale for videos with wider side > 1000 pixels
+- 0.33x scale for videos with wider side > 1900 pixels
+### Input size
+As soon as I discovered that EfficientNets significantly outperform other encoders I used only them in my solution.
+As I started with B4 I decided to use "native" size for that network (380x380).
+Due to memory costraints I did not increase input size even for B7 encoder.
+### Margin
+When I generated crops for training I added 30% of face crop size from each side and used only this setting during the competition.
+See [extract_crops.py](preprocessing/extract_crops.py) for the details
+### Encoders
+The winning encoder is current state-of-the-art model (EfficientNet B7) pretrained with ImageNet and noisy student [Self-training with Noisy Student improves ImageNet classification
+](https://arxiv.org/abs/1911.04252)
+### Averaging predictions
+I used 32 frames for each video.
+For each model output instead of simple averaging I used the following heuristic which worked quite well on public leaderbord (0.25 -> 0.22 solo B5).
+```python
+import numpy as np
+def confident_strategy(pred, t=0.8):
+    pred = np.array(pred)
+    sz = len(pred)
+    fakes = np.count_nonzero(pred > t)
+    # 11 frames are detected as fakes with high probability
+    if fakes > sz // 2.5 and fakes > 11:
+        return np.mean(pred[pred > t])
+    elif np.count_nonzero(pred < 0.2) > 0.9 * sz:
+        return np.mean(pred[pred < 0.2])
+    else:
+        return np.mean(pred)
+```
+### Augmentations
+I used heavy augmentations by default.
+[Albumentations](https://github.com/albumentations-team/albumentations) library supports most of the augmentations out of the box. Only needed to add IsotropicResize augmentation.
+```
+def create_train_transforms(size=300):
+    return Compose([
+        ImageCompression(quality_lower=60, quality_upper=100, p=0.5),
+        GaussNoise(p=0.1),
+        GaussianBlur(blur_limit=3, p=0.05),
+        HorizontalFlip(),
+        OneOf([
+            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
+            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR),
+            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR),
+        ], p=1),
+        PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT),
+        OneOf([RandomBrightnessContrast(), FancyPCA(), HueSaturationValue()], p=0.7),
+        ToGray(p=0.2),
+        ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=10, border_mode=cv2.BORDER_CONSTANT, p=0.5),
+    ]
+    )
+```
+In addition to these augmentations I wanted to achieve better generalization with
+- Cutout like augmentations (dropping artefacts and parts of face)
+- Dropout part of the image, inspired by [GridMask](https://arxiv.org/abs/2001.04086) and [Severstal Winning Solution](https://www.kaggle.com/c/severstal-steel-defect-detection/discussion/114254)
+![augmentations](images/augmentations.jpg "Dropout augmentations")
+## Building docker image
+All libraries and enviroment is already configured with Dockerfile. It requires docker engine https://docs.docker.com/engine/install/ubuntu/ and  nvidia docker in your system https://github.com/NVIDIA/nvidia-docker.
+To build a docker image run `docker build -t df .`
+## Running docker
+`docker run --runtime=nvidia --ipc=host --rm  --volume <DATA_ROOT>:/dataset -it df`
+## Data preparation
+Once DFDC dataset is downloaded all the scripts expect to have `dfdc_train_xxx` folders under data root directory.
+Preprocessing is done in a single script **`preprocess_data.sh`** which requires dataset directory as first argument.
+It will execute the steps below:
+##### 1. Find face bboxes
+To extract face bboxes I used facenet library, basically only MTCNN.
+`python preprocessing/detect_original_faces.py --root-dir DATA_ROOT`
+This script will detect faces in real videos and store them as jsons in DATA_ROOT/bboxes directory
+##### 2. Extract crops from videos
+To extract image crops I used bboxes saved before. It will use bounding boxes from original videos for face videos as well.
+`python preprocessing/extract_crops.py --root-dir DATA_ROOT --crops-dir crops`
+This script will extract face crops from videos and save them in DATA_ROOT/crops directory
+##### 3. Generate landmarks
+From the saved crops it is quite fast to process crops with MTCNN and extract landmarks
+`python preprocessing/generate_landmarks.py --root-dir DATA_ROOT`
+This script will extract landmarks and save them in DATA_ROOT/landmarks directory
+##### 4. Generate diff SSIM masks
+`python preprocessing/generate_diffs.py --root-dir DATA_ROOT`
+This script will extract SSIM difference masks between real and fake images and save them in DATA_ROOT/diffs directory
+##### 5. Generate folds
+`python preprocessing/generate_folds.py --root-dir DATA_ROOT --out folds.csv`
+By default it will use 16 splits to have 0-2 folders as a holdout set. Though only 400 videos can be used for validation as well.
+## Training
+Training 5 B7 models with different seeds is done in **`train.sh`** script.
+During training checkpoints are saved for every epoch.
+## Hardware requirements
+Mostly trained on devbox configuration with 4xTitan V, thanks to Nvidia and DSB2018 competition where I got these gpus https://www.kaggle.com/c/data-science-bowl-2018/
+Overall training requires 4 GPUs with 12gb+ memory.
+Batch size needs to be adjusted for standard 1080Ti or 2080Ti graphic cards.
+As I computed fake loss and real loss separately inside each batch, results might be better with larger batch size, for example on V100 gpus.
+Even though SyncBN is used larger batch on each GPU will lead to less noise as DFDC dataset has some fakes where face detector failed and face crops are not really fakes.
+## Plotting losses to select checkpoints
+`python plot_loss.py --log-file logs/<log file>`
+![loss plot](images/loss_plot.png "Weighted loss")
+## Inference
+Kernel is reproduced with `predict_folder.py` script.
+## Pretrained models
+`download_weights.sh` script will download trained models to `weights/` folder. They should be downloaded before building a docker image.
+Ensemble inference is already preconfigured with `predict_submission.sh` bash script. It expects a directory with videos as first argument and an output csv file as second argument.
+For example `./predict_submission.sh /mnt/datasets/deepfake/test_videos submission.csv`

__pycache__/kernel_utils.cpython-310.pyc ADDED Viewed

Binary file (11.8 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+import argparse
+import os
+import re
+import time
+import torch
+from kernel_utils import VideoReader, FaceExtractor, confident_strategy, predict_on_video
+from training.zoo.classifiers import DeepFakeClassifier
+import gradio as gr
+def model_fn(model_dir):
+	model_path = os.path.join(model_dir, 'b7_ns_best.pth')
+	model = DeepFakeClassifier(encoder="tf_efficientnet_b7_ns") # default: CPU
+	checkpoint = torch.load(model_path, map_location="cpu")
+	state_dict = checkpoint.get("state_dict", checkpoint)
+	model.load_state_dict({re.sub("^module.", "", k): v for k, v in state_dict.items()}, strict=True)
+	model.eval()
+	del checkpoint
+	#models.append(model.half())
+	return model
+def convert_result(pred, class_names=["Real", "Fake"]):
+	preds = [pred, 1 - pred]
+	assert len(class_names) == len(preds), "Class / Prediction should have the same length"
+	return {n: p for n, p in zip(class_names, preds)}
+def predict_fn(model, video, meta):
+	start = time.time()
+	prediction = predict_on_video(face_extractor=meta["face_extractor"],
+							   video_path=video,
+							   batch_size=meta["fps"],
+							   input_size=meta["input_size"],
+							   models=model,
+							   strategy=meta["strategy"],
+							   apply_compression=False,
+							   device='cpu')
+	elapsed_time = round(time.time() - start, 2)
+	prediction = convert_result(prediction)
+	return prediction, elapsed_time
+# Create title, description and article strings
+title = "Deepfake Detector (private)"
+description = "A video Deepfake Classifier (code: https://github.com/selimsef/dfdc_deepfake_challenge)"
+example_list = ["examples/" + str(p) for p in os.listdir("examples/")]
+# Environments
+model_dir = 'weights'
+frames_per_video = 32
+video_reader = VideoReader()
+video_read_fn = lambda x: video_reader.read_frames(x, num_frames=frames_per_video)
+face_extractor = FaceExtractor(video_read_fn)
+input_size = 380
+strategy = confident_strategy
+class_names = ["Real", "Fake"]
+meta = {"fps": 32,
+		"face_extractor": face_extractor,
+		"input_size": input_size,
+		"strategy": strategy}
+model = model_fn(model_dir)
+"""
+if __name__ == '__main__':
+	video_path = "nlurbvsozt.mp4"
+	model = model_fn(model_dir)
+	a, b = predict_fn([model], video_path, meta)
+	print(a, b)
+"""
+# Create the Gradio demo
+demo = gr.Interface(fn=predict_fn, # mapping function from input to output
+					inputs=[[model], gr.Video(autosize=True), meta],
+					outputs=[gr.Label(num_top_classes=2, label="Predictions"), # what are the outputs?
+							 gr.Number(label="Prediction time (s)")], # our fn has two outputs, therefore we have two outputs
+					examples=example_list,
+					title=title,
+					description=description)
+# Launch the demo!
+demo.launch(debug=False,) # Hugging face space don't need shareable_links

configs/b5.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+    "network": "DeepFakeClassifier",
+    "encoder": "tf_efficientnet_b5_ns",
+    "batches_per_epoch": 2500,
+    "size": 380,
+    "fp16": true,
+    "optimizer": {
+        "batch_size": 20,
+        "type": "SGD",
+        "momentum": 0.9,
+        "weight_decay": 1e-4,
+        "learning_rate": 0.01,
+        "nesterov": true,
+        "schedule": {
+            "type": "poly",
+            "mode": "step",
+            "epochs": 30,
+            "params": {"max_iter":  75100}
+        }
+    },
+    "normalize": {
+        "mean": [0.485, 0.456, 0.406],
+        "std": [0.229, 0.224, 0.225]
+    },
+    "losses": {
+        "BinaryCrossentropy": 1
+    }
+}

configs/b7.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "network": "DeepFakeClassifier",
+    "encoder": "tf_efficientnet_b7_ns",
+    "batches_per_epoch": 2500,
+    "size": 380,
+    "fp16": true,
+    "optimizer": {
+        "batch_size": 4,
+        "type": "SGD",
+        "momentum": 0.9,
+        "weight_decay": 1e-4,
+        "learning_rate": 1e-4,
+        "nesterov": true,
+        "schedule": {
+            "type": "poly",
+            "mode": "step",
+            "epochs": 20,
+            "params": {"max_iter":  100500}
+        }
+    },
+    "normalize": {
+        "mean": [0.485, 0.456, 0.406],
+        "std": [0.229, 0.224, 0.225]
+    },
+    "losses": {
+        "BinaryCrossentropy": 1
+    }
+}

download_weights.sh ADDED Viewed

	@@ -0,0 +1,9 @@

+tag=0.0.1
+wget -O weights/final_111_DeepFakeClassifier_tf_efficientnet_b7_ns_0_36 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_111_DeepFakeClassifier_tf_efficientnet_b7_ns_0_36
+wget -O weights/final_555_DeepFakeClassifier_tf_efficientnet_b7_ns_0_19 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_555_DeepFakeClassifier_tf_efficientnet_b7_ns_0_19
+wget -O weights/final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_29 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_29
+wget -O weights/final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_31 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_777_DeepFakeClassifier_tf_efficientnet_b7_ns_0_31
+wget -O weights/final_888_DeepFakeClassifier_tf_efficientnet_b7_ns_0_37 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_888_DeepFakeClassifier_tf_efficientnet_b7_ns_0_37
+wget -O weights/final_888_DeepFakeClassifier_tf_efficientnet_b7_ns_0_40 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_888_DeepFakeClassifier_tf_efficientnet_b7_ns_0_40
+wget -O weights/final_999_DeepFakeClassifier_tf_efficientnet_b7_ns_0_23 https://github.com/selimsef/dfdc_deepfake_challenge/releases/download/$tag/final_999_DeepFakeClassifier_tf_efficientnet_b7_ns_0_23

examples/liuujwwgpr.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3aaefb51aa5720cdabcc68d93da5c6a22573d8da06bdaf5e009c7a370943e85
+size 12852441

examples/nlurbvsozt.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:300b7dea93132b512f35de76572e7fcde666c812b91aec6b189dafa6f100c9b5
+size 4486723

examples/rfjuhbnlro.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6d0bb841ebe6a8e20cf265b45356a1ea3fed9837025e8d549b2437290d79273
+size 16218775

kernel_utils.py ADDED Viewed

	@@ -0,0 +1,365 @@

+import os
+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from albumentations.augmentations.functional import image_compression
+from facenet_pytorch.models.mtcnn import MTCNN
+from concurrent.futures import ThreadPoolExecutor
+from torchvision.transforms import Normalize
+mean = [0.485, 0.456, 0.406]
+std = [0.229, 0.224, 0.225]
+normalize_transform = Normalize(mean, std)
+class VideoReader:
+    """Helper class for reading one or more frames from a video file."""
+    def __init__(self, verbose=True, insets=(0, 0)):
+        """Creates a new VideoReader.
+        Arguments:
+            verbose: whether to print warnings and error messages
+            insets: amount to inset the image by, as a percentage of
+                (width, height). This lets you "zoom in" to an image
+                to remove unimportant content around the borders.
+                Useful for face detection, which may not work if the
+                faces are too small.
+        """
+        self.verbose = verbose
+        self.insets = insets
+    def read_frames(self, path, num_frames, jitter=0, seed=None):
+        """Reads frames that are always evenly spaced throughout the video.
+        Arguments:
+            path: the video file
+            num_frames: how many frames to read, -1 means the entire video
+                (warning: this will take up a lot of memory!)
+            jitter: if not 0, adds small random offsets to the frame indices;
+                this is useful so we don't always land on even or odd frames
+            seed: random seed for jittering; if you set this to a fixed value,
+                you probably want to set it only on the first video
+        """
+        assert num_frames > 0
+        capture = cv2.VideoCapture(path)
+        frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+        if frame_count <= 0: return None
+        frame_idxs = np.linspace(0, frame_count - 1, num_frames, endpoint=True, dtype=np.int32)
+        if jitter > 0:
+            np.random.seed(seed)
+            jitter_offsets = np.random.randint(-jitter, jitter, len(frame_idxs))
+            frame_idxs = np.clip(frame_idxs + jitter_offsets, 0, frame_count - 1)
+        result = self._read_frames_at_indices(path, capture, frame_idxs)
+        capture.release()
+        return result
+    def read_random_frames(self, path, num_frames, seed=None):
+        """Picks the frame indices at random.
+        Arguments:
+            path: the video file
+            num_frames: how many frames to read, -1 means the entire video
+                (warning: this will take up a lot of memory!)
+        """
+        assert num_frames > 0
+        np.random.seed(seed)
+        capture = cv2.VideoCapture(path)
+        frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+        if frame_count <= 0: return None
+        frame_idxs = sorted(np.random.choice(np.arange(0, frame_count), num_frames))
+        result = self._read_frames_at_indices(path, capture, frame_idxs)
+        capture.release()
+        return result
+    def read_frames_at_indices(self, path, frame_idxs):
+        """Reads frames from a video and puts them into a NumPy array.
+        Arguments:
+            path: the video file
+            frame_idxs: a list of frame indices. Important: should be
+                sorted from low-to-high! If an index appears multiple
+                times, the frame is still read only once.
+        Returns:
+            - a NumPy array of shape (num_frames, height, width, 3)
+            - a list of the frame indices that were read
+        Reading stops if loading a frame fails, in which case the first
+        dimension returned may actually be less than num_frames.
+        Returns None if an exception is thrown for any reason, or if no
+        frames were read.
+        """
+        assert len(frame_idxs) > 0
+        capture = cv2.VideoCapture(path)
+        result = self._read_frames_at_indices(path, capture, frame_idxs)
+        capture.release()
+        return result
+    def _read_frames_at_indices(self, path, capture, frame_idxs):
+        try:
+            frames = []
+            idxs_read = []
+            for frame_idx in range(frame_idxs[0], frame_idxs[-1] + 1):
+                # Get the next frame, but don't decode if we're not using it.
+                ret = capture.grab()
+                if not ret:
+                    if self.verbose:
+                        print("Error grabbing frame %d from movie %s" % (frame_idx, path))
+                    break
+                # Need to look at this frame?
+                current = len(idxs_read)
+                if frame_idx == frame_idxs[current]:
+                    ret, frame = capture.retrieve()
+                    if not ret or frame is None:
+                        if self.verbose:
+                            print("Error retrieving frame %d from movie %s" % (frame_idx, path))
+                        break
+                    frame = self._postprocess_frame(frame)
+                    frames.append(frame)
+                    idxs_read.append(frame_idx)
+            if len(frames) > 0:
+                return np.stack(frames), idxs_read
+            if self.verbose:
+                print("No frames read from movie %s" % path)
+            return None
+        except:
+            if self.verbose:
+                print("Exception while reading movie %s" % path)
+            return None
+    def read_middle_frame(self, path):
+        """Reads the frame from the middle of the video."""
+        capture = cv2.VideoCapture(path)
+        frame_count = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
+        result = self._read_frame_at_index(path, capture, frame_count // 2)
+        capture.release()
+        return result
+    def read_frame_at_index(self, path, frame_idx):
+        """Reads a single frame from a video.
+        If you just want to read a single frame from the video, this is more
+        efficient than scanning through the video to find the frame. However,
+        for reading multiple frames it's not efficient.
+        My guess is that a "streaming" approach is more efficient than a
+        "random access" approach because, unless you happen to grab a keyframe,
+        the decoder still needs to read all the previous frames in order to
+        reconstruct the one you're asking for.
+        Returns a NumPy array of shape (1, H, W, 3) and the index of the frame,
+        or None if reading failed.
+        """
+        capture = cv2.VideoCapture(path)
+        result = self._read_frame_at_index(path, capture, frame_idx)
+        capture.release()
+        return result
+    def _read_frame_at_index(self, path, capture, frame_idx):
+        capture.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
+        ret, frame = capture.read()
+        if not ret or frame is None:
+            if self.verbose:
+                print("Error retrieving frame %d from movie %s" % (frame_idx, path))
+            return None
+        else:
+            frame = self._postprocess_frame(frame)
+            return np.expand_dims(frame, axis=0), [frame_idx]
+    def _postprocess_frame(self, frame):
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        if self.insets[0] > 0:
+            W = frame.shape[1]
+            p = int(W * self.insets[0])
+            frame = frame[:, p:-p, :]
+        if self.insets[1] > 0:
+            H = frame.shape[1]
+            q = int(H * self.insets[1])
+            frame = frame[q:-q, :, :]
+        return frame
+class FaceExtractor:
+    def __init__(self, video_read_fn):
+        self.video_read_fn = video_read_fn
+        self.detector = MTCNN(margin=0, thresholds=[0.7, 0.8, 0.8], device="cuda")
+    def process_videos(self, input_dir, filenames, video_idxs):
+        videos_read = []
+        frames_read = []
+        frames = []
+        results = []
+        for video_idx in video_idxs:
+            # Read the full-size frames from this video.
+            filename = filenames[video_idx]
+            video_path = os.path.join(input_dir, filename)
+            result = self.video_read_fn(video_path)
+            # Error? Then skip this video.
+            if result is None: continue
+            videos_read.append(video_idx)
+            # Keep track of the original frames (need them later).
+            my_frames, my_idxs = result
+            frames.append(my_frames)
+            frames_read.append(my_idxs)
+            for i, frame in enumerate(my_frames):
+                h, w = frame.shape[:2]
+                img = Image.fromarray(frame.astype(np.uint8))
+                img = img.resize(size=[s // 2 for s in img.size])
+                batch_boxes, probs = self.detector.detect(img, landmarks=False)
+                faces = []
+                scores = []
+                if batch_boxes is None:
+                    continue
+                for bbox, score in zip(batch_boxes, probs):
+                    if bbox is not None:
+                        xmin, ymin, xmax, ymax = [int(b * 2) for b in bbox]
+                        w = xmax - xmin
+                        h = ymax - ymin
+                        p_h = h // 3
+                        p_w = w // 3
+                        crop = frame[max(ymin - p_h, 0):ymax + p_h, max(xmin - p_w, 0):xmax + p_w]
+                        faces.append(crop)
+                        scores.append(score)
+                frame_dict = {"video_idx": video_idx,
+                              "frame_idx": my_idxs[i],
+                              "frame_w": w,
+                              "frame_h": h,
+                              "faces": faces,
+                              "scores": scores}
+                results.append(frame_dict)
+        return results
+    def process_video(self, video_path):
+        """Convenience method for doing face extraction on a single video."""
+        input_dir = os.path.dirname(video_path)
+        filenames = [os.path.basename(video_path)]
+        return self.process_videos(input_dir, filenames, [0])
+def confident_strategy(pred, t=0.8):
+    pred = np.array(pred)
+    sz = len(pred)
+    fakes = np.count_nonzero(pred > t)
+    # 11 frames are detected as fakes with high probability
+    if fakes > sz // 2.5 and fakes > 11:
+        return np.mean(pred[pred > t])
+    elif np.count_nonzero(pred < 0.2) > 0.9 * sz:
+        return np.mean(pred[pred < 0.2])
+    else:
+        return np.mean(pred)
+strategy = confident_strategy
+def put_to_center(img, input_size):
+    img = img[:input_size, :input_size]
+    image = np.zeros((input_size, input_size, 3), dtype=np.uint8)
+    start_w = (input_size - img.shape[1]) // 2
+    start_h = (input_size - img.shape[0]) // 2
+    image[start_h:start_h + img.shape[0], start_w: start_w + img.shape[1], :] = img
+    return image
+def isotropically_resize_image(img, size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC):
+    h, w = img.shape[:2]
+    if max(w, h) == size:
+        return img
+    if w > h:
+        scale = size / w
+        h = h * scale
+        w = size
+    else:
+        scale = size / h
+        w = w * scale
+        h = size
+    interpolation = interpolation_up if scale > 1 else interpolation_down
+    resized = cv2.resize(img, (int(w), int(h)), interpolation=interpolation)
+    return resized
+def predict_on_video(face_extractor, video_path, batch_size, input_size, models, strategy=np.mean,
+                     apply_compression=False, device='cpu'):
+    batch_size *= 4
+    try:
+        faces = face_extractor.process_video(video_path)
+        if len(faces) > 0:
+            x = np.zeros((batch_size, input_size, input_size, 3), dtype=np.uint8)
+            n = 0
+            for frame_data in faces:
+                for face in frame_data["faces"]:
+                    resized_face = isotropically_resize_image(face, input_size)
+                    resized_face = put_to_center(resized_face, input_size)
+                    if apply_compression:
+                        resized_face = image_compression(resized_face, quality=90, image_type=".jpg")
+                    if n + 1 < batch_size:
+                        x[n] = resized_face
+                        n += 1
+                    else:
+                        pass
+            if n > 0:
+                if device == 'cpu':
+                    x = torch.tensor(x, device='cpu').float()
+                else:
+                    x = torch.tensor(x, device="cuda").float()
+                # Preprocess the images.
+                x = x.permute((0, 3, 1, 2))
+                for i in range(len(x)):
+                    x[i] = normalize_transform(x[i] / 255.)
+                # Make a prediction, then take the average.
+                with torch.no_grad():
+                    preds = []
+                    for model in models:
+                        if device == 'cpu':
+                            y_pred = model(x[:n])
+                        else:
+                            y_pred = model(x[:n].half())
+                        y_pred = torch.sigmoid(y_pred.squeeze())
+                        bpred = y_pred[:n].cpu().numpy()
+                        preds.append(strategy(bpred))
+                    return np.mean(preds)
+    except Exception as e:
+        print("Prediction error on video %s: %s" % (video_path, str(e)))
+    return 0.5
+def predict_on_video_set(face_extractor, videos, input_size, num_workers, test_dir, frames_per_video, models,
+                         strategy=np.mean,
+                         apply_compression=False):
+    def process_file(i):
+        filename = videos[i]
+        y_pred = predict_on_video(face_extractor=face_extractor, video_path=os.path.join(test_dir, filename),
+                                  input_size=input_size,
+                                  batch_size=frames_per_video,
+                                  models=models, strategy=strategy, apply_compression=apply_compression)
+        return y_pred
+    with ThreadPoolExecutor(max_workers=num_workers) as ex:
+        predictions = ex.map(process_file, range(len(videos)))
+    return list(predictions)

libs/shape_predictor_68_face_landmarks.dat ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fbdc2cb80eb9aa7a758672cbfdda32ba6300efe9b6e6c7a299ff7e736b11b92f
+size 99693937

training/__init__.py ADDED Viewed

File without changes

training/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (148 Bytes). View file

training/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (146 Bytes). View file

training/__pycache__/losses.cpython-310.pyc ADDED Viewed

Binary file (1.54 kB). View file

training/__pycache__/losses.cpython-39.pyc ADDED Viewed

Binary file (1.53 kB). View file

training/datasets/__init__.py ADDED Viewed

File without changes

training/datasets/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (157 Bytes). View file

training/datasets/__pycache__/classifier_dataset.cpython-310.pyc ADDED Viewed

Binary file (10.8 kB). View file

training/datasets/__pycache__/validation_set.cpython-310.pyc ADDED Viewed

Binary file (4.99 kB). View file

training/datasets/classifier_dataset.py ADDED Viewed

	@@ -0,0 +1,384 @@

+import math
+import os
+import random
+import sys
+import traceback
+import cv2
+import numpy as np
+import pandas as pd
+import skimage.draw
+from albumentations import ImageCompression, OneOf, GaussianBlur, Blur
+from albumentations.augmentations.functional import image_compression
+from albumentations.augmentations.geometric.functional import rot90
+from albumentations.pytorch.functional import img_to_tensor
+from scipy.ndimage import binary_erosion, binary_dilation
+from skimage import measure
+from torch.utils.data import Dataset
+import dlib
+from training.datasets.validation_set import PUBLIC_SET
+def prepare_bit_masks(mask):
+    h, w = mask.shape
+    mid_w = w // 2
+    mid_h = w // 2
+    masks = []
+    ones = np.ones_like(mask)
+    ones[:mid_h] = 0
+    masks.append(ones)
+    ones = np.ones_like(mask)
+    ones[mid_h:] = 0
+    masks.append(ones)
+    ones = np.ones_like(mask)
+    ones[:, :mid_w] = 0
+    masks.append(ones)
+    ones = np.ones_like(mask)
+    ones[:, mid_w:] = 0
+    masks.append(ones)
+    ones = np.ones_like(mask)
+    ones[:mid_h, :mid_w] = 0
+    ones[mid_h:, mid_w:] = 0
+    masks.append(ones)
+    ones = np.ones_like(mask)
+    ones[:mid_h, mid_w:] = 0
+    ones[mid_h:, :mid_w] = 0
+    masks.append(ones)
+    return masks
+detector = dlib.get_frontal_face_detector()
+predictor = dlib.shape_predictor('libs/shape_predictor_68_face_landmarks.dat')
+def blackout_convex_hull(img):
+    try:
+        rect = detector(img)[0]
+        sp = predictor(img, rect)
+        landmarks = np.array([[p.x, p.y] for p in sp.parts()])
+        outline = landmarks[[*range(17), *range(26, 16, -1)]]
+        Y, X = skimage.draw.polygon(outline[:, 1], outline[:, 0])
+        cropped_img = np.zeros(img.shape[:2], dtype=np.uint8)
+        cropped_img[Y, X] = 1
+        # if random.random() > 0.5:
+        #     img[cropped_img == 0] = 0
+        #     #leave only face
+        #     return img
+        y, x = measure.centroid(cropped_img)
+        y = int(y)
+        x = int(x)
+        first = random.random() > 0.5
+        if random.random() > 0.5:
+            if first:
+                cropped_img[:y, :] = 0
+            else:
+                cropped_img[y:, :] = 0
+        else:
+            if first:
+                cropped_img[:, :x] = 0
+            else:
+                cropped_img[:, x:] = 0
+        img[cropped_img > 0] = 0
+    except Exception as e:
+        pass
+def dist(p1, p2):
+    return math.sqrt((p1[0] - p2[0]) ** 2 + (p1[1] - p2[1]) ** 2)
+def remove_eyes(image, landmarks):
+    image = image.copy()
+    (x1, y1), (x2, y2) = landmarks[:2]
+    mask = np.zeros_like(image[..., 0])
+    line = cv2.line(mask, (x1, y1), (x2, y2), color=(1), thickness=2)
+    w = dist((x1, y1), (x2, y2))
+    dilation = int(w // 4)
+    line = binary_dilation(line, iterations=dilation)
+    image[line, :] = 0
+    return image
+def remove_nose(image, landmarks):
+    image = image.copy()
+    (x1, y1), (x2, y2) = landmarks[:2]
+    x3, y3 = landmarks[2]
+    mask = np.zeros_like(image[..., 0])
+    x4 = int((x1 + x2) / 2)
+    y4 = int((y1 + y2) / 2)
+    line = cv2.line(mask, (x3, y3), (x4, y4), color=(1), thickness=2)
+    w = dist((x1, y1), (x2, y2))
+    dilation = int(w // 4)
+    line = binary_dilation(line, iterations=dilation)
+    image[line, :] = 0
+    return image
+def remove_mouth(image, landmarks):
+    image = image.copy()
+    (x1, y1), (x2, y2) = landmarks[-2:]
+    mask = np.zeros_like(image[..., 0])
+    line = cv2.line(mask, (x1, y1), (x2, y2), color=(1), thickness=2)
+    w = dist((x1, y1), (x2, y2))
+    dilation = int(w // 3)
+    line = binary_dilation(line, iterations=dilation)
+    image[line, :] = 0
+    return image
+def remove_landmark(image, landmarks):
+    if random.random() > 0.5:
+        image = remove_eyes(image, landmarks)
+    elif random.random() > 0.5:
+        image = remove_mouth(image, landmarks)
+    elif random.random() > 0.5:
+        image = remove_nose(image, landmarks)
+    return image
+def change_padding(image, part=5):
+    h, w = image.shape[:2]
+    # original padding was done with 1/3 from each side, too much
+    pad_h = int(((3 / 5) * h) / part)
+    pad_w = int(((3 / 5) * w) / part)
+    image = image[h // 5 - pad_h:-h // 5 + pad_h, w // 5 - pad_w:-w // 5 + pad_w]
+    return image
+def blackout_random(image, mask, label):
+    binary_mask = mask > 0.4 * 255
+    h, w = binary_mask.shape[:2]
+    tries = 50
+    current_try = 1
+    while current_try < tries:
+        first = random.random() < 0.5
+        if random.random() < 0.5:
+            pivot = random.randint(h // 2 - h // 5, h // 2 + h // 5)
+            bitmap_msk = np.ones_like(binary_mask)
+            if first:
+                bitmap_msk[:pivot, :] = 0
+            else:
+                bitmap_msk[pivot:, :] = 0
+        else:
+            pivot = random.randint(w // 2 - w // 5, w // 2 + w // 5)
+            bitmap_msk = np.ones_like(binary_mask)
+            if first:
+                bitmap_msk[:, :pivot] = 0
+            else:
+                bitmap_msk[:, pivot:] = 0
+        if label < 0.5 and np.count_nonzero(image * np.expand_dims(bitmap_msk, axis=-1)) / 3 > (h * w) / 5 \
+                or np.count_nonzero(binary_mask * bitmap_msk) > 40:
+            mask *= bitmap_msk
+            image *= np.expand_dims(bitmap_msk, axis=-1)
+            break
+        current_try += 1
+    return image
+def blend_original(img):
+    img = img.copy()
+    h, w = img.shape[:2]
+    rect = detector(img)
+    if len(rect) == 0:
+        return img
+    else:
+        rect = rect[0]
+    sp = predictor(img, rect)
+    landmarks = np.array([[p.x, p.y] for p in sp.parts()])
+    outline = landmarks[[*range(17), *range(26, 16, -1)]]
+    Y, X = skimage.draw.polygon(outline[:, 1], outline[:, 0])
+    raw_mask = np.zeros(img.shape[:2], dtype=np.uint8)
+    raw_mask[Y, X] = 1
+    face = img * np.expand_dims(raw_mask, -1)
+    # add warping
+    h1 = random.randint(h - h // 2, h + h // 2)
+    w1 = random.randint(w - w // 2, w + w // 2)
+    while abs(h1 - h) < h // 3 and abs(w1 - w) < w // 3:
+        h1 = random.randint(h - h // 2, h + h // 2)
+        w1 = random.randint(w - w // 2, w + w // 2)
+    face = cv2.resize(face, (w1, h1), interpolation=random.choice([cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC]))
+    face = cv2.resize(face, (w, h), interpolation=random.choice([cv2.INTER_LINEAR, cv2.INTER_AREA, cv2.INTER_CUBIC]))
+    raw_mask = binary_erosion(raw_mask, iterations=random.randint(4, 10))
+    img[raw_mask, :] = face[raw_mask, :]
+    if random.random() < 0.2:
+        img = OneOf([GaussianBlur(), Blur()], p=0.5)(image=img)["image"]
+    # image compression
+    if random.random() < 0.5:
+        img = ImageCompression(quality_lower=40, quality_upper=95)(image=img)["image"]
+    return img
+class DeepFakeClassifierDataset(Dataset):
+    def __init__(self,
+                 data_path="/mnt/sota/datasets/deepfake",
+                 fold=0,
+                 label_smoothing=0.01,
+                 padding_part=3,
+                 hardcore=True,
+                 crops_dir="crops",
+                 folds_csv="folds.csv",
+                 normalize={"mean": [0.485, 0.456, 0.406],
+                            "std": [0.229, 0.224, 0.225]},
+                 rotation=False,
+                 mode="train",
+                 reduce_val=True,
+                 oversample_real=True,
+                 transforms=None
+                 ):
+        super().__init__()
+        self.data_root = data_path
+        self.fold = fold
+        self.folds_csv = folds_csv
+        self.mode = mode
+        self.rotation = rotation
+        self.padding_part = padding_part
+        self.hardcore = hardcore
+        self.crops_dir = crops_dir
+        self.label_smoothing = label_smoothing
+        self.normalize = normalize
+        self.transforms = transforms
+        self.df = pd.read_csv(self.folds_csv)
+        self.oversample_real = oversample_real
+        self.reduce_val = reduce_val
+    def __getitem__(self, index: int):
+        while True:
+            video, img_file, label, ori_video, frame, fold = self.data[index]
+            try:
+                if self.mode == "train":
+                    label = np.clip(label, self.label_smoothing, 1 - self.label_smoothing)
+                img_path = os.path.join(self.data_root, self.crops_dir, video, img_file)
+                image = cv2.imread(img_path, cv2.IMREAD_COLOR)
+                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+                mask = np.zeros(image.shape[:2], dtype=np.uint8)
+                diff_path = os.path.join(self.data_root, "diffs", video, img_file[:-4] + "_diff.png")
+                try:
+                    msk = cv2.imread(diff_path, cv2.IMREAD_GRAYSCALE)
+                    if msk is not None:
+                        mask = msk
+                except:
+                    print("not found mask", diff_path)
+                    pass
+                if self.mode == "train" and self.hardcore and not self.rotation:
+                    landmark_path = os.path.join(self.data_root, "landmarks", ori_video, img_file[:-4] + ".npy")
+                    if os.path.exists(landmark_path) and random.random() < 0.7:
+                        landmarks = np.load(landmark_path)
+                        image = remove_landmark(image, landmarks)
+                    elif random.random() < 0.2:
+                        blackout_convex_hull(image)
+                    elif random.random() < 0.1:
+                        binary_mask = mask > 0.4 * 255
+                        masks = prepare_bit_masks((binary_mask * 1).astype(np.uint8))
+                        tries = 6
+                        current_try = 1
+                        while current_try < tries:
+                            bitmap_msk = random.choice(masks)
+                            if label < 0.5 or np.count_nonzero(mask * bitmap_msk) > 20:
+                                mask *= bitmap_msk
+                                image *= np.expand_dims(bitmap_msk, axis=-1)
+                                break
+                            current_try += 1
+                if self.mode == "train" and self.padding_part > 3:
+                    image = change_padding(image, self.padding_part)
+                valid_label = np.count_nonzero(mask[mask > 20]) > 32 or label < 0.5
+                valid_label = 1 if valid_label else 0
+                rotation = 0
+                if self.transforms:
+                    data = self.transforms(image=image, mask=mask)
+                    image = data["image"]
+                    mask = data["mask"]
+                if self.mode == "train" and self.hardcore and self.rotation:
+                    # landmark_path = os.path.join(self.data_root, "landmarks", ori_video, img_file[:-4] + ".npy")
+                    dropout = 0.8 if label > 0.5 else 0.6
+                    if self.rotation:
+                        dropout *= 0.7
+                    elif random.random() < dropout:
+                        blackout_random(image, mask, label)
+                #
+                # os.makedirs("../images", exist_ok=True)
+                # cv2.imwrite(os.path.join("../images", video+ "_" + str(1 if label > 0.5 else 0) + "_"+img_file), image[...,::-1])
+                if self.mode == "train" and self.rotation:
+                    rotation = random.randint(0, 3)
+                    image = rot90(image, rotation)
+                image = img_to_tensor(image, self.normalize)
+                return {"image": image, "labels": np.array((label,)), "img_name": os.path.join(video, img_file),
+                        "valid": valid_label, "rotations": rotation}
+            except Exception as e:
+                traceback.print_exc(file=sys.stdout)
+                print("Broken image", os.path.join(self.data_root, self.crops_dir, video, img_file))
+                index = random.randint(0, len(self.data) - 1)
+    def random_blackout_landmark(self, image, mask, landmarks):
+        x, y = random.choice(landmarks)
+        first = random.random() > 0.5
+        #  crop half face either vertically or horizontally
+        if random.random() > 0.5:
+            # width
+            if first:
+                image[:, :x] = 0
+                mask[:, :x] = 0
+            else:
+                image[:, x:] = 0
+                mask[:, x:] = 0
+        else:
+            # height
+            if first:
+                image[:y, :] = 0
+                mask[:y, :] = 0
+            else:
+                image[y:, :] = 0
+                mask[y:, :] = 0
+    def reset(self, epoch, seed):
+        self.data = self._prepare_data(epoch, seed)
+    def __len__(self) -> int:
+        return len(self.data)
+    def get_distribution(self):
+        return self.n_real, self.n_fake
+    def _prepare_data(self, epoch, seed):
+        df = self.df
+        if self.mode == "train":
+            rows = df[df["fold"] != self.fold]
+        else:
+            rows = df[df["fold"] == self.fold]
+        seed = (epoch + 1) * seed
+        if self.oversample_real:
+            rows = self._oversample(rows, seed)
+        if self.mode == "val" and self.reduce_val:
+            # every 2nd frame, to speed up validation
+            rows = rows[rows["frame"] % 20 == 0]
+            # another option is to use public validation set
+            #rows = rows[rows["video"].isin(PUBLIC_SET)]
+        print(
+            "real {} fakes {} mode {}".format(len(rows[rows["label"] == 0]), len(rows[rows["label"] == 1]), self.mode))
+        data = rows.values
+        self.n_real = len(rows[rows["label"] == 0])
+        self.n_fake = len(rows[rows["label"] == 1])
+        np.random.seed(seed)
+        np.random.shuffle(data)
+        return data
+    def _oversample(self, rows: pd.DataFrame, seed):
+        real = rows[rows["label"] == 0]
+        fakes = rows[rows["label"] == 1]
+        num_real = real["video"].count()
+        if self.mode == "train":
+            fakes = fakes.sample(n=num_real, replace=False, random_state=seed)
+        return pd.concat([real, fakes])

training/datasets/validation_set.py ADDED Viewed

	@@ -0,0 +1,60 @@

+PUBLIC_SET = {'tjuihawuqm', 'prwsfljdjo', 'scrbqgpvzz', 'ziipxxchai', 'uubgqnvfdl', 'wclvkepakb', 'xjvxtuakyd',
+              'qlvsqdroqo', 'bcbqxhziqz', 'yzuestxcbq', 'hxwtsaydal', 'kqlvggiqee', 'vtunvalyji', 'mohiqoogpb',
+              'siebfpwuhu', 'cekwtyxdoo', 'hszwwswewp', 'orekjthsef', 'huvlwkxoxm', 'fmhiujydwo', 'lhvjzhjxdp',
+              'ibxfxggtqh', 'bofrwgeyjo', 'rmufsuogzn', 'zbgssotnjm', 'dpevefkefv', 'sufvvwmbha', 'ncoeewrdlo',
+              'qhsehzgxqj', 'yxadevzohx', 'aomqqjipcp', 'pcyswtgick', 'wfzjxzhdkj', 'rcjfxxhcal', 'lnjkpdviqb',
+              'xmkwsnuzyq', 'ouaowjmigq', 'bkuzquigyt', 'vwxednhlwz', 'mszblrdprw', 'blnmxntbey', 'gccnvdoknm',
+              'mkzaekkvej', 'hclsparpth', 'eryjktdexi', 'hfsvqabzfq', 'acazlolrpz', 'yoyhmxtrys', 'rerpivllud',
+              'elackxuccp', 'zgbhzkditd', 'vjljdfopjg', 'famlupsgqm', 'nymodlmxni', 'qcbkztamqc', 'qclpbcbgeq',
+              'lpkgabskbw', 'mnowxangqx', 'czfqlbcfpa', 'qyyhuvqmyf', 'toinozytsp', 'ztyvglkcsf', 'nplviymzlg',
+              'opvqdabdap', 'uxuvkrjhws', 'mxahsihabr', 'cqxxumarvp', 'ptbfnkajyi', 'njzshtfmcw', 'dcqodpzomd',
+              'ajiyrjfyzp', 'ywauoonmlr', 'gochxzemmq', 'lpgxwdgnio', 'hnfwagcxdf', 'gfcycflhbo', 'gunamloolc',
+              'yhjlnisfel', 'srfefmyjvt', 'evysmtpnrf', 'aktnlyqpah', 'gpsxfxrjrr', 'zfobicuigx', 'mnzabbkpmt',
+              'rfjuhbnlro', 'zuwwbbusgl', 'csnkohqxdv', 'bzvzpwrabw', 'yietrwuncf', 'wynotylpnm', 'ekboxwrwuv',
+              'rcecrgeotc', 'rklawjhbpv', 'ilqwcbprqa', 'jsysgmycsx', 'sqixhnilfm', 'wnlubukrki', 'nikynwcvuh',
+              'sjkfxrlxxs', 'btdxnajogv', 'wjhpisoeaj', 'dyjklprkoc', 'qlqhjcshpk', 'jyfvaequfg', 'dozjwhnedd',
+              'owaogcehvc', 'oyqgwjdwaj', 'vvfszaosiv', 'kmcdjxmnoa', 'jiswxuqzyz', 'ddtbarpcgo', 'wqysrieiqu',
+              'xcruhaccxc', 'honxqdilvv', 'nxgzmgzkfv', 'cxsvvnxpyz', 'demuhxssgl', 'hzoiotcykp', 'fwykevubzy',
+              'tejfudfgpq', 'kvmpmhdxly', 'oojxonbgow', 'vurjckblge', 'oysopgovhu', 'khpipxnsvx', 'pqthmvwonf',
+              'fddmkqjwsh', 'pcoxcmtroa', 'cnxccbjlct', 'ggzjfrirjh', 'jquevmhdvc', 'ecumyiowzs', 'esmqxszybs',
+              'mllzkpgatp', 'ryxaqpfubf', 'hbufmvbium', 'vdtsbqidjb', 'sjwywglgym', 'qxyrtwozyw', 'upmgtackuf',
+              'ucthmsajay', 'zgjosltkie', 'snlyjbnpgw', 'nswtvttxre', 'iznnzjvaxc', 'jhczqfefgw', 'htzbnroagi',
+              'pdswwyyntw', 'uvrzaczrbx', 'vbcgoyxsvn', 'hzssdinxec', 'novarhxpbj', 'vizerpsvbz', 'jawgcggquk',
+              'iorbtaarte', 'yarpxfqejd', 'vhbbwdflyh', 'rrrfjhugvb', 'fneqiqpqvs', 'jytrvwlewz', 'bfjsthfhbd',
+              'rxdoimqble', 'ekelfsnqof', 'uqvxjfpwdo', 'cjkctqqakb', 'tynfsthodx', 'yllztsrwjw', 'bktkwbcawi',
+              'wcqvzujamg', 'bcvheslzrq', 'aqrsylrzgi', 'sktpeppbkc', 'mkmgcxaztt', 'etdliwticv', 'hqzwudvhih',
+              'swsaoktwgi', 'temjefwaas', 'papagllumt', 'xrtvqhdibb', 'oelqpetgwj', 'ggdpclfcgk', 'imdmhwkkni',
+              'lebzjtusnr', 'xhtppuyqdr', 'nxzgekegsp', 'waucvvmtkq', 'rnfcjxynfa', 'adohdulfwb', 'tjywwgftmv',
+              'fjrueenjyp', 'oaguiggjyv', 'ytopzxrswu', 'yxvmusxvcz', 'rukyxomwcx', 'qdqdsaiitt', 'mxlipjhmqk',
+              'voawxrmqyl', 'kezwvsxxzj', 'oocincvedt', 'qooxnxqqjb', 'mwwploizlj', 'yaxgpxhavq', 'uhakqelqri',
+              'bvpeerislp', 'bkcyglmfci', 'jyoxdvxpza', 'gkutjglghz', 'knxltsvzyu', 'ybbrkacebd', 'apvzjkvnwn',
+              'ahjnxtiamx', 'hsbljbsgxr', 'fnxgqcvlsd', 'xphdfgmfmz', 'scbdenmaed', 'ywxpquomgt', 'yljecirelf',
+              'wcvsqnplsk', 'vmxfwxgdei', 'icbsahlivv', 'yhylappzid', 'irqzdokcws', 'petmyhjclt', 'rmlzgerevr',
+              'qarqtkvgby', 'nkhzxomani', 'viteugozpv', 'qhkzlnzruj', 'eisofhptvk', 'gqnaxievjx', 'heiyoojifp',
+              'zcxcmneefk', 'wvgviwnwob', 'gcdtglsoqj', 'yqhouqakbx', 'fopjiyxiqd', 'hierggamuo', 'ypbtpunjvm',
+              'sjinmmbipg', 'kmqkiihrmj', 'wmoqzxddkb', 'lnhkjhyhvw', 'wixbuuzygv', 'fsdrwikhge', 'sfsayjgzrh',
+              'pqdeutauqc', 'frqfsucgao', 'pdufsewrec', 'bfdopzvxbi', 'shnsajrsow', 'rvvpazsffd', 'pxcfrszlgi',
+              'itfsvvmslp', 'ayipraspbn', 'prhmixykhr', 'doniqevxeg', 'dvtpwatuja', 'jiavqbrkyk', 'ipkpxvwroe',
+              'syxobtuucp', 'syuxttuyhm', 'nwvsbmyndn', 'eqslzbqfea', 'ytddugrwph', 'vokrpfjpeb', 'bdshuoldwx',
+              'fmvvmcbdrw', 'bnuwxhfahw', 'gbnzicjyhz', 'txnmkabufs', 'gfdjzwnpyp', 'hweshqpfwe', 'dxgnpnowgk',
+              'xugmhbetrw', 'rktrpsdlci', 'nthpnwylxo', 'ihglzxzroo', 'ocgdbrgmtq', 'ruhtnngrqv', 'xljemofssi',
+              'zxacihctqp', 'ghnpsltzyn', 'lbigytrrtr', 'ndikguxzek', 'mdfndlljvt', 'lyoslorecs', 'oefukgnvel',
+              'zmxeiipnqb', 'cosghhimnd', 'alrtntfxtd', 'eywdmustbb', 'ooafcxxfrs', 'fqgypsunzr', 'hevcclcklc',
+              'uhrqlmlclw', 'ipvwtgdlre', 'wcssbghcpc', 'didzujjhtg', 'fjxovgmwnm', 'dmmvuaikkv', 'hitfycdavv',
+              'zyufpqvpyu', 'coujjnypba', 'temeqbmzxu', 'apedduehoy', 'iksxzpqxzi', 'kwfdyqofzw', 'aassnaulhq',
+              'eyguqfmgzh', 'yiykshcbaz', 'sngjsueuhs', 'okgelildpc', 'ztyuiqrhdk', 'tvhjcfnqtg', 'gfgcwxkbjd',
+              'lbfqksftuo', 'kowiwvrjht', 'dkuqbduxev', 'mwnibuujwz', 'sodvtfqbpf', 'hsbwhlolsn', 'qsjiypnjwi',
+              'blszgmxkvu', 'ystdtnetgj', 'rfwxcinshk', 'vnlzxqwthl', 'ljouzjaqqe', 'gahgyuwzbu', 'xxzefxwyku',
+              'xitgdpzbxv', 'sylnrepacf', 'igpvrfjdzc', 'nxnmkytwze', 'psesikjaxx', 'dvwpvqdflx', 'bjyaxvggle',
+              'dpmgoiwhuf', 'wadvzjhwtw', 'kcjvhgvhpt', 'eppyqpgewp', 'tyjpjpglgx', 'cekarydqba', 'dvkdfhrpph',
+              'cnpanmywno', 'ljauauuyka', 'hicjuubiau', 'cqhwesrciw', 'dnmowthjcj', 'lujvyveojc', 'wndursivcx',
+              'espkiocpxq', 'jsbpkpxwew', 'dsnxgrfdmd', 'hyjqolupxn', 'xdezcezszc', 'axfhbpkdlc', 'qqnlrngaft',
+              'coqwgzpbhx', 'ncmpqwmnzb', 'sznkemeqro', 'omphqltjdd', 'uoccaiathd', 'jzmzdispyo', 'pxjkzvqomp',
+              'udxqbhgvvx', 'dzkyxbbqkr', 'dtozwcapoa', 'qswlzfgcgj', 'tgawasvbbr', 'lmdyicksrv', 'fzvpbrzssi',
+              'dxfdovivlw', 'zzmgnglanj', 'vssmlqoiti', 'vajkicalux', 'ekvwecwltj', 'ylxwcwhjjd', 'keioymnobc',
+              'usqqvxcjmg', 'phjvutxpoi', 'nycmyuzpml', 'bwdmzwhdnw', 'fxuxxtryjn', 'orixbcfvdz', 'hefisnapds',
+              'fpevfidstw', 'halvwiltfs', 'dzojiwfvba', 'ojsxxkalat', 'esjdyghhog', 'ptbnewtvon', 'hcanfkwivl',
+              'yronlutbgm', 'llplvmcvbl', 'yxirnfyijn', 'nwvloufjty', 'rtpbawlmxr', 'aayfryxljh', 'zfrrixsimm',
+              'txmnoyiyte'}

training/losses.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from typing import Any
+from pytorch_toolbelt.losses import BinaryFocalLoss
+from torch import nn
+from torch.nn.modules.loss import BCEWithLogitsLoss
+class WeightedLosses(nn.Module):
+    def __init__(self, losses, weights):
+        super().__init__()
+        self.losses = losses
+        self.weights = weights
+    def forward(self, *input: Any, **kwargs: Any):
+        cum_loss = 0
+        for loss, w in zip(self.losses, self.weights):
+            cum_loss += w * loss.forward(*input, **kwargs)
+        return cum_loss
+class BinaryCrossentropy(BCEWithLogitsLoss):
+    pass
+class FocalLoss(BinaryFocalLoss):
+    def __init__(self, alpha=None, gamma=3, ignore_index=None, reduction="mean", normalized=False,
+                 reduced_threshold=None):
+        super().__init__(alpha, gamma, ignore_index, reduction, normalized, reduced_threshold)

training/pipelines/__init__.py ADDED Viewed

File without changes

training/pipelines/train_classifier.py ADDED Viewed

	@@ -0,0 +1,364 @@

+import argparse
+import json
+import os
+from collections import defaultdict
+from sklearn.metrics import log_loss
+from torch import topk
+import sys
+print('@@@@@@@@@@@@@@@@@@')
+sys.path.append('..')
+from training import losses
+from training.datasets.classifier_dataset import DeepFakeClassifierDataset
+from training.losses import WeightedLosses
+from training.tools.config import load_config
+from training.tools.utils import create_optimizer, AverageMeter
+from training.transforms.albu import IsotropicResize
+from training.zoo import classifiers
+os.environ["MKL_NUM_THREADS"] = "1"
+os.environ["NUMEXPR_NUM_THREADS"] = "1"
+os.environ["OMP_NUM_THREADS"] = "1"
+import cv2
+cv2.ocl.setUseOpenCL(False)
+cv2.setNumThreads(0)
+import numpy as np
+from albumentations import Compose, RandomBrightnessContrast, \
+    HorizontalFlip, FancyPCA, HueSaturationValue, OneOf, ToGray, \
+    ShiftScaleRotate, ImageCompression, PadIfNeeded, GaussNoise, GaussianBlur
+from apex.parallel import DistributedDataParallel, convert_syncbn_model
+from tensorboardX import SummaryWriter
+from apex import amp
+import torch
+from torch.backends import cudnn
+from torch.nn import DataParallel
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+import torch.distributed as dist
+torch.backends.cudnn.benchmark = True
+def create_train_transforms(size=300):
+    return Compose([
+        ImageCompression(quality_lower=60, quality_upper=100, p=0.5),
+        GaussNoise(p=0.1),
+        GaussianBlur(blur_limit=3, p=0.05),
+        HorizontalFlip(),
+        OneOf([
+            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
+            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_LINEAR),
+            IsotropicResize(max_side=size, interpolation_down=cv2.INTER_LINEAR, interpolation_up=cv2.INTER_LINEAR),
+        ], p=1),
+        PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT),
+        OneOf([RandomBrightnessContrast(), FancyPCA(), HueSaturationValue()], p=0.7),
+        ToGray(p=0.2),
+        ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=10, border_mode=cv2.BORDER_CONSTANT, p=0.5),
+    ]
+    )
+def create_val_transforms(size=300):
+    return Compose([
+        IsotropicResize(max_side=size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC),
+        PadIfNeeded(min_height=size, min_width=size, border_mode=cv2.BORDER_CONSTANT),
+    ])
+def main():
+    parser = argparse.ArgumentParser("PyTorch Xview Pipeline")
+    arg = parser.add_argument
+    arg('--config', metavar='CONFIG_FILE', help='path to configuration file')
+    arg('--workers', type=int, default=6, help='number of cpu threads to use')
+    arg('--gpu', type=str, default='0', help='List of GPUs for parallel training, e.g. 0,1,2,3')
+    arg('--output-dir', type=str, default='weights/')
+    arg('--resume', type=str, default='')
+    arg('--fold', type=int, default=0)
+    arg('--prefix', type=str, default='classifier_')
+    arg('--data-dir', type=str, default="/mnt/sota/datasets/deepfake")
+    arg('--folds-csv', type=str, default='folds.csv')
+    arg('--crops-dir', type=str, default='crops')
+    arg('--label-smoothing', type=float, default=0.01)
+    arg('--logdir', type=str, default='logs')
+    arg('--zero-score', action='store_true', default=False)
+    arg('--from-zero', action='store_true', default=False)
+    arg('--distributed', action='store_true', default=False)
+    arg('--freeze-epochs', type=int, default=0)
+    arg("--local_rank", default=0, type=int)
+    arg("--seed", default=777, type=int)
+    arg("--padding-part", default=3, type=int)
+    arg("--opt-level", default='O1', type=str)
+    arg("--test_every", type=int, default=1)
+    arg("--no-oversample", action="store_true")
+    arg("--no-hardcore", action="store_true")
+    arg("--only-changed-frames", action="store_true")
+    args = parser.parse_args()
+    os.makedirs(args.output_dir, exist_ok=True)
+    if args.distributed:
+        torch.cuda.set_device(args.local_rank)
+        torch.distributed.init_process_group(backend='nccl', init_method='env://')
+    else:
+        os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
+        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu
+    cudnn.benchmark = True
+    conf = load_config(args.config)
+    model = classifiers.__dict__[conf['network']](encoder=conf['encoder'])
+    model = model.cuda()
+    if args.distributed:
+        model = convert_syncbn_model(model)
+    ohem = conf.get("ohem_samples", None)
+    reduction = "mean"
+    if ohem:
+        reduction = "none"
+    loss_fn = []
+    weights = []
+    for loss_name, weight in conf["losses"].items():
+        loss_fn.append(losses.__dict__[loss_name](reduction=reduction).cuda())
+        weights.append(weight)
+    loss = WeightedLosses(loss_fn, weights)
+    loss_functions = {"classifier_loss": loss}
+    optimizer, scheduler = create_optimizer(conf['optimizer'], model)
+    bce_best = 100
+    start_epoch = 0
+    batch_size = conf['optimizer']['batch_size']
+    data_train = DeepFakeClassifierDataset(mode="train",
+                                           oversample_real=not args.no_oversample,
+                                           fold=args.fold,
+                                           padding_part=args.padding_part,
+                                           hardcore=not args.no_hardcore,
+                                           crops_dir=args.crops_dir,
+                                           data_path=args.data_dir,
+                                           label_smoothing=args.label_smoothing,
+                                           folds_csv=args.folds_csv,
+                                           transforms=create_train_transforms(conf["size"]),
+                                           normalize=conf.get("normalize", None))
+    data_val = DeepFakeClassifierDataset(mode="val",
+                                         fold=args.fold,
+                                         padding_part=args.padding_part,
+                                         crops_dir=args.crops_dir,
+                                         data_path=args.data_dir,
+                                         folds_csv=args.folds_csv,
+                                         transforms=create_val_transforms(conf["size"]),
+                                         normalize=conf.get("normalize", None))
+    val_data_loader = DataLoader(data_val, batch_size=batch_size * 2, num_workers=args.workers, shuffle=False,
+                                 pin_memory=False)
+    os.makedirs(args.logdir, exist_ok=True)
+    summary_writer = SummaryWriter(args.logdir + '/' + conf.get("prefix", args.prefix) + conf['encoder'] + "_" + str(args.fold))
+    if args.resume:
+        if os.path.isfile(args.resume):
+            print("=> loading checkpoint '{}'".format(args.resume))
+            checkpoint = torch.load(args.resume, map_location='cpu')
+            state_dict = checkpoint['state_dict']
+            state_dict = {k[7:]: w for k, w in state_dict.items()}
+            model.load_state_dict(state_dict, strict=False)
+            if not args.from_zero:
+                start_epoch = checkpoint['epoch']
+                if not args.zero_score:
+                    bce_best = checkpoint.get('bce_best', 0)
+            print("=> loaded checkpoint '{}' (epoch {}, bce_best {})"
+                  .format(args.resume, checkpoint['epoch'], checkpoint['bce_best']))
+        else:
+            print("=> no checkpoint found at '{}'".format(args.resume))
+    if args.from_zero:
+        start_epoch = 0
+    current_epoch = start_epoch
+    if conf['fp16']:
+        model, optimizer = amp.initialize(model, optimizer,
+                                          opt_level=args.opt_level,
+                                          loss_scale='dynamic')
+    snapshot_name = "{}{}_{}_{}".format(conf.get("prefix", args.prefix), conf['network'], conf['encoder'], args.fold)
+    if args.distributed:
+        model = DistributedDataParallel(model, delay_allreduce=True)
+    else:
+        model = DataParallel(model).cuda()
+    data_val.reset(1, args.seed)
+    max_epochs = conf['optimizer']['schedule']['epochs']
+    for epoch in range(start_epoch, max_epochs):
+        data_train.reset(epoch, args.seed)
+        train_sampler = None
+        if args.distributed:
+            train_sampler = torch.utils.data.distributed.DistributedSampler(data_train)
+            train_sampler.set_epoch(epoch)
+        if epoch < args.freeze_epochs:
+            print("Freezing encoder!!!")
+            model.module.encoder.eval()
+            for p in model.module.encoder.parameters():
+                p.requires_grad = False
+        else:
+            model.module.encoder.train()
+            for p in model.module.encoder.parameters():
+                p.requires_grad = True
+        train_data_loader = DataLoader(data_train, batch_size=batch_size, num_workers=args.workers,
+                                       shuffle=train_sampler is None, sampler=train_sampler, pin_memory=False,
+                                       drop_last=True)
+        train_epoch(current_epoch, loss_functions, model, optimizer, scheduler, train_data_loader, summary_writer, conf,
+                    args.local_rank, args.only_changed_frames)
+        model = model.eval()
+        if args.local_rank == 0:
+            torch.save({
+                'epoch': current_epoch + 1,
+                'state_dict': model.state_dict(),
+                'bce_best': bce_best,
+            }, args.output_dir + '/' + snapshot_name + "_last")
+            torch.save({
+                'epoch': current_epoch + 1,
+                'state_dict': model.state_dict(),
+                'bce_best': bce_best,
+            }, args.output_dir + snapshot_name + "_{}".format(current_epoch))
+            if (epoch + 1) % args.test_every == 0:
+                bce_best = evaluate_val(args, val_data_loader, bce_best, model,
+                                        snapshot_name=snapshot_name,
+                                        current_epoch=current_epoch,
+                                        summary_writer=summary_writer)
+        current_epoch += 1
+def evaluate_val(args, data_val, bce_best, model, snapshot_name, current_epoch, summary_writer):
+    print("Test phase")
+    model = model.eval()
+    bce, probs, targets = validate(model, data_loader=data_val)
+    if args.local_rank == 0:
+        summary_writer.add_scalar('val/bce', float(bce), global_step=current_epoch)
+        if bce < bce_best:
+            print("Epoch {} improved from {} to {}".format(current_epoch, bce_best, bce))
+            if args.output_dir is not None:
+                torch.save({
+                    'epoch': current_epoch + 1,
+                    'state_dict': model.state_dict(),
+                    'bce_best': bce,
+                }, args.output_dir + snapshot_name + "_best_dice")
+            bce_best = bce
+            with open("predictions_{}.json".format(args.fold), "w") as f:
+                json.dump({"probs": probs, "targets": targets}, f)
+        torch.save({
+            'epoch': current_epoch + 1,
+            'state_dict': model.state_dict(),
+            'bce_best': bce_best,
+        }, args.output_dir + snapshot_name + "_last")
+        print("Epoch: {} bce: {}, bce_best: {}".format(current_epoch, bce, bce_best))
+    return bce_best
+def validate(net, data_loader, prefix=""):
+    probs = defaultdict(list)
+    targets = defaultdict(list)
+    with torch.no_grad():
+        for sample in tqdm(data_loader):
+            imgs = sample["image"].cuda()
+            img_names = sample["img_name"]
+            labels = sample["labels"].cuda().float()
+            out = net(imgs)
+            labels = labels.cpu().numpy()
+            preds = torch.sigmoid(out).cpu().numpy()
+            for i in range(out.shape[0]):
+                video, img_id = img_names[i].split("/")
+                probs[video].append(preds[i].tolist())
+                targets[video].append(labels[i].tolist())
+    data_x = []
+    data_y = []
+    for vid, score in probs.items():
+        score = np.array(score)
+        lbl = targets[vid]
+        score = np.mean(score)
+        lbl = np.mean(lbl)
+        data_x.append(score)
+        data_y.append(lbl)
+    y = np.array(data_y)
+    x = np.array(data_x)
+    fake_idx = y > 0.1
+    real_idx = y < 0.1
+    fake_loss = log_loss(y[fake_idx], x[fake_idx], labels=[0, 1])
+    real_loss = log_loss(y[real_idx], x[real_idx], labels=[0, 1])
+    print("{}fake_loss".format(prefix), fake_loss)
+    print("{}real_loss".format(prefix), real_loss)
+    return (fake_loss + real_loss) / 2, probs, targets
+def train_epoch(current_epoch, loss_functions, model, optimizer, scheduler, train_data_loader, summary_writer, conf,
+                local_rank, only_valid):
+    losses = AverageMeter()
+    fake_losses = AverageMeter()
+    real_losses = AverageMeter()
+    max_iters = conf["batches_per_epoch"]
+    print("training epoch {}".format(current_epoch))
+    model.train()
+    pbar = tqdm(enumerate(train_data_loader), total=max_iters, desc="Epoch {}".format(current_epoch), ncols=0)
+    if conf["optimizer"]["schedule"]["mode"] == "epoch":
+        scheduler.step(current_epoch)
+    for i, sample in pbar:
+        imgs = sample["image"].cuda()
+        labels = sample["labels"].cuda().float()
+        out_labels = model(imgs)
+        if only_valid:
+            valid_idx = sample["valid"].cuda().float() > 0
+            out_labels = out_labels[valid_idx]
+            labels = labels[valid_idx]
+            if labels.size(0) == 0:
+                continue
+        fake_loss = 0
+        real_loss = 0
+        fake_idx = labels > 0.5
+        real_idx = labels <= 0.5
+        ohem = conf.get("ohem_samples", None)
+        if torch.sum(fake_idx * 1) > 0:
+            fake_loss = loss_functions["classifier_loss"](out_labels[fake_idx], labels[fake_idx])
+        if torch.sum(real_idx * 1) > 0:
+            real_loss = loss_functions["classifier_loss"](out_labels[real_idx], labels[real_idx])
+        if ohem:
+            fake_loss = topk(fake_loss, k=min(ohem, fake_loss.size(0)), sorted=False)[0].mean()
+            real_loss = topk(real_loss, k=min(ohem, real_loss.size(0)), sorted=False)[0].mean()
+        loss = (fake_loss + real_loss) / 2
+        losses.update(loss.item(), imgs.size(0))
+        fake_losses.update(0 if fake_loss == 0 else fake_loss.item(), imgs.size(0))
+        real_losses.update(0 if real_loss == 0 else real_loss.item(), imgs.size(0))
+        optimizer.zero_grad()
+        pbar.set_postfix({"lr": float(scheduler.get_lr()[-1]), "epoch": current_epoch, "loss": losses.avg,
+                          "fake_loss": fake_losses.avg, "real_loss": real_losses.avg})
+        if conf['fp16']:
+            with amp.scale_loss(loss, optimizer) as scaled_loss:
+                scaled_loss.backward()
+        else:
+            loss.backward()
+        torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 1)
+        optimizer.step()
+        torch.cuda.synchronize()
+        if conf["optimizer"]["schedule"]["mode"] in ("step", "poly"):
+            scheduler.step(i + current_epoch * max_iters)
+        if i == max_iters - 1:
+            break
+    pbar.close()
+    if local_rank == 0:
+        for idx, param_group in enumerate(optimizer.param_groups):
+            lr = param_group['lr']
+            summary_writer.add_scalar('group{}/lr'.format(idx), float(lr), global_step=current_epoch)
+        summary_writer.add_scalar('train/loss', float(losses.avg), global_step=current_epoch)
+if __name__ == '__main__':
+    main()

training/tools/__init__.py ADDED Viewed

File without changes

training/tools/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (154 Bytes). View file

training/tools/__pycache__/config.cpython-310.pyc ADDED Viewed

Binary file (1.06 kB). View file

training/tools/__pycache__/schedulers.cpython-310.pyc ADDED Viewed

Binary file (3.01 kB). View file

training/tools/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (3.65 kB). View file

training/tools/config.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import json
+DEFAULTS = {
+    "network": "dpn",
+    "encoder": "dpn92",
+    "model_params": {},
+    "optimizer": {
+        "batch_size": 32,
+        "type": "SGD",  # supported: SGD, Adam
+        "momentum": 0.9,
+        "weight_decay": 0,
+        "clip": 1.,
+        "learning_rate": 0.1,
+        "classifier_lr": -1,
+        "nesterov": True,
+        "schedule": {
+            "type": "constant",  # supported: constant, step, multistep, exponential, linear, poly
+            "mode": "epoch",  # supported: epoch, step
+            "epochs": 10,
+            "params": {}
+        }
+    },
+    "normalize": {
+        "mean": [0.485, 0.456, 0.406],
+        "std": [0.229, 0.224, 0.225]
+    }
+}
+def _merge(src, dst):
+    for k, v in src.items():
+        if k in dst:
+            if isinstance(v, dict):
+                _merge(src[k], dst[k])
+        else:
+            dst[k] = v
+def load_config(config_file, defaults=DEFAULTS):
+    with open(config_file, "r") as fd:
+        config = json.load(fd)
+    _merge(defaults, config)
+    return config

training/tools/schedulers.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from bisect import bisect_right
+from torch.optim.lr_scheduler import _LRScheduler
+class LRStepScheduler(_LRScheduler):
+    def __init__(self, optimizer, steps, last_epoch=-1):
+        self.lr_steps = steps
+        super().__init__(optimizer, last_epoch)
+    def get_lr(self):
+        pos = max(bisect_right([x for x, y in self.lr_steps], self.last_epoch) - 1, 0)
+        return [self.lr_steps[pos][1] if self.lr_steps[pos][0] <= self.last_epoch else base_lr for base_lr in self.base_lrs]
+class PolyLR(_LRScheduler):
+    """Sets the learning rate of each parameter group according to poly learning rate policy
+    """
+    def __init__(self, optimizer, max_iter=90000, power=0.9, last_epoch=-1):
+        self.max_iter = max_iter
+        self.power = power
+        super(PolyLR, self).__init__(optimizer, last_epoch)
+    def get_lr(self):
+        self.last_epoch = (self.last_epoch + 1) % self.max_iter
+        return [base_lr * ((1 - float(self.last_epoch) / self.max_iter) ** (self.power)) for base_lr in self.base_lrs]
+class ExponentialLRScheduler(_LRScheduler):
+    """Decays the learning rate of each parameter group by gamma every epoch.
+    When last_epoch=-1, sets initial lr as lr.
+    Args:
+        optimizer (Optimizer): Wrapped optimizer.
+        gamma (float): Multiplicative factor of learning rate decay.
+        last_epoch (int): The index of last epoch. Default: -1.
+    """
+    def __init__(self, optimizer, gamma, last_epoch=-1):
+        self.gamma = gamma
+        super(ExponentialLRScheduler, self).__init__(optimizer, last_epoch)
+    def get_lr(self):
+        if self.last_epoch <= 0:
+            return self.base_lrs
+        return [base_lr * self.gamma**self.last_epoch for base_lr in self.base_lrs]

training/tools/utils.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import cv2
+from apex.optimizers import FusedAdam, FusedSGD
+from timm.optim import AdamW
+from torch import optim
+from torch.optim import lr_scheduler
+from torch.optim.rmsprop import RMSprop
+from torch.optim.adamw import AdamW
+from torch.optim.lr_scheduler import MultiStepLR, CyclicLR
+from training.tools.schedulers import ExponentialLRScheduler, PolyLR, LRStepScheduler
+cv2.ocl.setUseOpenCL(False)
+cv2.setNumThreads(0)
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+def create_optimizer(optimizer_config, model, master_params=None):
+    """Creates optimizer and schedule from configuration
+    Parameters
+    ----------
+    optimizer_config : dict
+        Dictionary containing the configuration options for the optimizer.
+    model : Model
+        The network model.
+    Returns
+    -------
+    optimizer : Optimizer
+        The optimizer.
+    scheduler : LRScheduler
+        The learning rate scheduler.
+    """
+    if optimizer_config.get("classifier_lr", -1) != -1:
+        # Separate classifier parameters from all others
+        net_params = []
+        classifier_params = []
+        for k, v in model.named_parameters():
+            if not v.requires_grad:
+                continue
+            if k.find("encoder") != -1:
+                net_params.append(v)
+            else:
+                classifier_params.append(v)
+        params = [
+            {"params": net_params},
+            {"params": classifier_params, "lr": optimizer_config["classifier_lr"]},
+        ]
+    else:
+        if master_params:
+            params = master_params
+        else:
+            params = model.parameters()
+    if optimizer_config["type"] == "SGD":
+        optimizer = optim.SGD(params,
+                              lr=optimizer_config["learning_rate"],
+                              momentum=optimizer_config["momentum"],
+                              weight_decay=optimizer_config["weight_decay"],
+                              nesterov=optimizer_config["nesterov"])
+    elif optimizer_config["type"] == "FusedSGD":
+        optimizer = FusedSGD(params,
+                             lr=optimizer_config["learning_rate"],
+                             momentum=optimizer_config["momentum"],
+                             weight_decay=optimizer_config["weight_decay"],
+                             nesterov=optimizer_config["nesterov"])
+    elif optimizer_config["type"] == "Adam":
+        optimizer = optim.Adam(params,
+                               lr=optimizer_config["learning_rate"],
+                               weight_decay=optimizer_config["weight_decay"])
+    elif optimizer_config["type"] == "FusedAdam":
+        optimizer = FusedAdam(params,
+                              lr=optimizer_config["learning_rate"],
+                              weight_decay=optimizer_config["weight_decay"])
+    elif optimizer_config["type"] == "AdamW":
+        optimizer = AdamW(params,
+                               lr=optimizer_config["learning_rate"],
+                               weight_decay=optimizer_config["weight_decay"])
+    elif optimizer_config["type"] == "RmsProp":
+        optimizer = RMSprop(params,
+                               lr=optimizer_config["learning_rate"],
+                               weight_decay=optimizer_config["weight_decay"])
+    else:
+        raise KeyError("unrecognized optimizer {}".format(optimizer_config["type"]))
+    if optimizer_config["schedule"]["type"] == "step":
+        scheduler = LRStepScheduler(optimizer, **optimizer_config["schedule"]["params"])
+    elif optimizer_config["schedule"]["type"] == "clr":
+        scheduler = CyclicLR(optimizer, **optimizer_config["schedule"]["params"])
+    elif optimizer_config["schedule"]["type"] == "multistep":
+        scheduler = MultiStepLR(optimizer, **optimizer_config["schedule"]["params"])
+    elif optimizer_config["schedule"]["type"] == "exponential":
+        scheduler = ExponentialLRScheduler(optimizer, **optimizer_config["schedule"]["params"])
+    elif optimizer_config["schedule"]["type"] == "poly":
+        scheduler = PolyLR(optimizer, **optimizer_config["schedule"]["params"])
+    elif optimizer_config["schedule"]["type"] == "constant":
+        scheduler = lr_scheduler.LambdaLR(optimizer, lambda epoch: 1.0)
+    elif optimizer_config["schedule"]["type"] == "linear":
+        def linear_lr(it):
+            return it * optimizer_config["schedule"]["params"]["alpha"] + optimizer_config["schedule"]["params"]["beta"]
+        scheduler = lr_scheduler.LambdaLR(optimizer, linear_lr)
+    return optimizer, scheduler

training/transforms/__init__.py ADDED Viewed

File without changes

training/transforms/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (159 Bytes). View file

training/transforms/__pycache__/albu.cpython-310.pyc ADDED Viewed

Binary file (4.36 kB). View file

training/transforms/albu.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import random
+import cv2
+import numpy as np
+from albumentations import DualTransform, ImageOnlyTransform
+from albumentations.augmentations.crops.functional import crop
+#from albumentations.augmentations.functional import crop
+def isotropically_resize_image(img, size, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC):
+    h, w = img.shape[:2]
+    if max(w, h) == size:
+        return img
+    if w > h:
+        scale = size / w
+        h = h * scale
+        w = size
+    else:
+        scale = size / h
+        w = w * scale
+        h = size
+    interpolation = interpolation_up if scale > 1 else interpolation_down
+    resized = cv2.resize(img, (int(w), int(h)), interpolation=interpolation)
+    return resized
+class IsotropicResize(DualTransform):
+    def __init__(self, max_side, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC,
+                 always_apply=False, p=1):
+        super(IsotropicResize, self).__init__(always_apply, p)
+        self.max_side = max_side
+        self.interpolation_down = interpolation_down
+        self.interpolation_up = interpolation_up
+    def apply(self, img, interpolation_down=cv2.INTER_AREA, interpolation_up=cv2.INTER_CUBIC, **params):
+        return isotropically_resize_image(img, size=self.max_side, interpolation_down=interpolation_down,
+                                          interpolation_up=interpolation_up)
+    def apply_to_mask(self, img, **params):
+        return self.apply(img, interpolation_down=cv2.INTER_NEAREST, interpolation_up=cv2.INTER_NEAREST, **params)
+    def get_transform_init_args_names(self):
+        return ("max_side", "interpolation_down", "interpolation_up")
+class Resize4xAndBack(ImageOnlyTransform):
+    def __init__(self, always_apply=False, p=0.5):
+        super(Resize4xAndBack, self).__init__(always_apply, p)
+    def apply(self, img, **params):
+        h, w = img.shape[:2]
+        scale = random.choice([2, 4])
+        img = cv2.resize(img, (w // scale, h // scale), interpolation=cv2.INTER_AREA)
+        img = cv2.resize(img, (w, h),
+                         interpolation=random.choice([cv2.INTER_CUBIC, cv2.INTER_LINEAR, cv2.INTER_NEAREST]))
+        return img
+class RandomSizedCropNonEmptyMaskIfExists(DualTransform):
+    def __init__(self, min_max_height, w2h_ratio=[0.7, 1.3], always_apply=False, p=0.5):
+        super(RandomSizedCropNonEmptyMaskIfExists, self).__init__(always_apply, p)
+        self.min_max_height = min_max_height
+        self.w2h_ratio = w2h_ratio
+    def apply(self, img, x_min=0, x_max=0, y_min=0, y_max=0, **params):
+        cropped = crop(img, x_min, y_min, x_max, y_max)
+        return cropped
+    @property
+    def targets_as_params(self):
+        return ["mask"]
+    def get_params_dependent_on_targets(self, params):
+        mask = params["mask"]
+        mask_height, mask_width = mask.shape[:2]
+        crop_height = int(mask_height * random.uniform(self.min_max_height[0], self.min_max_height[1]))
+        w2h_ratio = random.uniform(*self.w2h_ratio)
+        crop_width = min(int(crop_height * w2h_ratio), mask_width - 1)
+        if mask.sum() == 0:
+            x_min = random.randint(0, mask_width - crop_width + 1)
+            y_min = random.randint(0, mask_height - crop_height + 1)
+        else:
+            mask = mask.sum(axis=-1) if mask.ndim == 3 else mask
+            non_zero_yx = np.argwhere(mask)
+            y, x = random.choice(non_zero_yx)
+            x_min = x - random.randint(0, crop_width - 1)
+            y_min = y - random.randint(0, crop_height - 1)
+            x_min = np.clip(x_min, 0, mask_width - crop_width)
+            y_min = np.clip(y_min, 0, mask_height - crop_height)
+        x_max = x_min + crop_height
+        y_max = y_min + crop_width
+        y_max = min(mask_height, y_max)
+        x_max = min(mask_width, x_max)
+        return {"x_min": x_min, "x_max": x_max, "y_min": y_min, "y_max": y_max}
+    def get_transform_init_args_names(self):
+        return "min_max_height", "height", "width", "w2h_ratio"

training/zoo/__init__.py ADDED Viewed

File without changes

training/zoo/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (152 Bytes). View file

training/zoo/__pycache__/classifiers.cpython-310.pyc ADDED Viewed

Binary file (5.55 kB). View file

training/zoo/classifiers.py ADDED Viewed

	@@ -0,0 +1,172 @@

+from functools import partial
+import numpy as np
+import torch
+from timm.models.efficientnet import tf_efficientnet_b4_ns, tf_efficientnet_b3_ns, \
+    tf_efficientnet_b5_ns, tf_efficientnet_b2_ns, tf_efficientnet_b6_ns, tf_efficientnet_b7_ns
+from torch import nn
+from torch.nn.modules.dropout import Dropout
+from torch.nn.modules.linear import Linear
+from torch.nn.modules.pooling import AdaptiveAvgPool2d
+encoder_params = {
+    "tf_efficientnet_b3_ns": {
+        "features": 1536,
+        "init_op": partial(tf_efficientnet_b3_ns, pretrained=True, drop_path_rate=0.2)
+    },
+    "tf_efficientnet_b2_ns": {
+        "features": 1408,
+        "init_op": partial(tf_efficientnet_b2_ns, pretrained=False, drop_path_rate=0.2)
+    },
+    "tf_efficientnet_b4_ns": {
+        "features": 1792,
+        "init_op": partial(tf_efficientnet_b4_ns, pretrained=True, drop_path_rate=0.5)
+    },
+    "tf_efficientnet_b5_ns": {
+        "features": 2048,
+        "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.2)
+    },
+    "tf_efficientnet_b4_ns_03d": {
+        "features": 1792,
+        "init_op": partial(tf_efficientnet_b4_ns, pretrained=True, drop_path_rate=0.3)
+    },
+    "tf_efficientnet_b5_ns_03d": {
+        "features": 2048,
+        "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.3)
+    },
+    "tf_efficientnet_b5_ns_04d": {
+        "features": 2048,
+        "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.4)
+    },
+    "tf_efficientnet_b6_ns": {
+        "features": 2304,
+        "init_op": partial(tf_efficientnet_b6_ns, pretrained=True, drop_path_rate=0.2)
+    },
+    "tf_efficientnet_b7_ns": {
+        "features": 2560,
+        "init_op": partial(tf_efficientnet_b7_ns, pretrained=True, drop_path_rate=0.2)
+    },
+    "tf_efficientnet_b6_ns_04d": {
+        "features": 2304,
+        "init_op": partial(tf_efficientnet_b6_ns, pretrained=True, drop_path_rate=0.4)
+    },
+}
+def setup_srm_weights(input_channels: int = 3) -> torch.Tensor:
+    """Creates the SRM kernels for noise analysis."""
+    # note: values taken from Zhou et al., "Learning Rich Features for Image Manipulation Detection", CVPR2018
+    srm_kernel = torch.from_numpy(np.array([
+        [  # srm 1/2 horiz
+            [0., 0., 0., 0., 0.],  # noqa: E241,E201
+            [0., 0., 0., 0., 0.],  # noqa: E241,E201
+            [0., 1., -2., 1., 0.],  # noqa: E241,E201
+            [0., 0., 0., 0., 0.],  # noqa: E241,E201
+            [0., 0., 0., 0., 0.],  # noqa: E241,E201
+        ], [  # srm 1/4
+            [0., 0., 0., 0., 0.],  # noqa: E241,E201
+            [0., -1., 2., -1., 0.],  # noqa: E241,E201
+            [0., 2., -4., 2., 0.],  # noqa: E241,E201
+            [0., -1., 2., -1., 0.],  # noqa: E241,E201
+            [0., 0., 0., 0., 0.],  # noqa: E241,E201
+        ], [  # srm 1/12
+            [-1., 2., -2., 2., -1.],  # noqa: E241,E201
+            [2., -6., 8., -6., 2.],  # noqa: E241,E201
+            [-2., 8., -12., 8., -2.],  # noqa: E241,E201
+            [2., -6., 8., -6., 2.],  # noqa: E241,E201
+            [-1., 2., -2., 2., -1.],  # noqa: E241,E201
+        ]
+    ])).float()
+    srm_kernel[0] /= 2
+    srm_kernel[1] /= 4
+    srm_kernel[2] /= 12
+    return srm_kernel.view(3, 1, 5, 5).repeat(1, input_channels, 1, 1)
+def setup_srm_layer(input_channels: int = 3) -> torch.nn.Module:
+    """Creates a SRM convolution layer for noise analysis."""
+    weights = setup_srm_weights(input_channels)
+    conv = torch.nn.Conv2d(input_channels, out_channels=3, kernel_size=5, stride=1, padding=2, bias=False)
+    with torch.no_grad():
+        conv.weight = torch.nn.Parameter(weights, requires_grad=False)
+    return conv
+class DeepFakeClassifierSRM(nn.Module):
+    def __init__(self, encoder, dropout_rate=0.5) -> None:
+        super().__init__()
+        self.encoder = encoder_params[encoder]["init_op"]()
+        self.avg_pool = AdaptiveAvgPool2d((1, 1))
+        self.srm_conv = setup_srm_layer(3)
+        self.dropout = Dropout(dropout_rate)
+        self.fc = Linear(encoder_params[encoder]["features"], 1)
+    def forward(self, x):
+        noise = self.srm_conv(x)
+        x = self.encoder.forward_features(noise)
+        x = self.avg_pool(x).flatten(1)
+        x = self.dropout(x)
+        x = self.fc(x)
+        return x
+class GlobalWeightedAvgPool2d(nn.Module):
+    """
+    Global Weighted Average Pooling from paper "Global Weighted Average
+    Pooling Bridges Pixel-level Localization and Image-level Classification"
+    """
+    def __init__(self, features: int, flatten=False):
+        super().__init__()
+        self.conv = nn.Conv2d(features, 1, kernel_size=1, bias=True)
+        self.flatten = flatten
+    def fscore(self, x):
+        m = self.conv(x)
+        m = m.sigmoid().exp()
+        return m
+    def norm(self, x: torch.Tensor):
+        return x / x.sum(dim=[2, 3], keepdim=True)
+    def forward(self, x):
+        input_x = x
+        x = self.fscore(x)
+        x = self.norm(x)
+        x = x * input_x
+        x = x.sum(dim=[2, 3], keepdim=not self.flatten)
+        return x
+class DeepFakeClassifier(nn.Module):
+    def __init__(self, encoder, dropout_rate=0.0) -> None:
+        super().__init__()
+        self.encoder = encoder_params[encoder]["init_op"]()
+        self.avg_pool = AdaptiveAvgPool2d((1, 1))
+        self.dropout = Dropout(dropout_rate)
+        self.fc = Linear(encoder_params[encoder]["features"], 1)
+    def forward(self, x):
+        x = self.encoder.forward_features(x)
+        x = self.avg_pool(x).flatten(1)
+        x = self.dropout(x)
+        x = self.fc(x)
+        return x
+class DeepFakeClassifierGWAP(nn.Module):
+    def __init__(self, encoder, dropout_rate=0.5) -> None:
+        super().__init__()
+        self.encoder = encoder_params[encoder]["init_op"]()
+        self.avg_pool = GlobalWeightedAvgPool2d(encoder_params[encoder]["features"])
+        self.dropout = Dropout(dropout_rate)
+        self.fc = Linear(encoder_params[encoder]["features"], 1)
+    def forward(self, x):
+        x = self.encoder.forward_features(x)
+        x = self.avg_pool(x).flatten(1)
+        x = self.dropout(x)
+        x = self.fc(x)
+        return x

training/zoo/unet.py ADDED Viewed

	@@ -0,0 +1,151 @@

+from functools import partial
+import torch
+from timm.models.efficientnet import tf_efficientnet_b3_ns, tf_efficientnet_b5_ns
+from torch import nn
+from torch.nn import Dropout2d, Conv2d
+from torch.nn.modules.dropout import Dropout
+from torch.nn.modules.linear import Linear
+from torch.nn.modules.pooling import AdaptiveAvgPool2d
+from torch.nn.modules.upsampling import UpsamplingBilinear2d
+encoder_params = {
+    "tf_efficientnet_b3_ns": {
+        "features": 1536,
+        "filters": [40, 32, 48, 136, 1536],
+        "decoder_filters": [64, 128, 256, 256],
+        "init_op": partial(tf_efficientnet_b3_ns, pretrained=True, drop_path_rate=0.2)
+    },
+    "tf_efficientnet_b5_ns": {
+        "features": 2048,
+        "filters": [48, 40, 64, 176, 2048],
+        "decoder_filters": [64, 128, 256, 256],
+        "init_op": partial(tf_efficientnet_b5_ns, pretrained=True, drop_path_rate=0.2)
+    },
+}
+class DecoderBlock(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.layer = nn.Sequential(
+            nn.Upsample(scale_factor=2),
+            nn.Conv2d(in_channels, out_channels, 3, padding=1),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, x):
+        return self.layer(x)
+class ConcatBottleneck(nn.Module):
+    def __init__(self, in_channels, out_channels):
+        super().__init__()
+        self.seq = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, 3, padding=1),
+            nn.ReLU(inplace=True)
+        )
+    def forward(self, dec, enc):
+        x = torch.cat([dec, enc], dim=1)
+        return self.seq(x)
+class Decoder(nn.Module):
+    def __init__(self, decoder_filters, filters, upsample_filters=None,
+                 decoder_block=DecoderBlock, bottleneck=ConcatBottleneck, dropout=0):
+        super().__init__()
+        self.decoder_filters = decoder_filters
+        self.filters = filters
+        self.decoder_block = decoder_block
+        self.decoder_stages = nn.ModuleList([self._get_decoder(idx) for idx in range(0, len(decoder_filters))])
+        self.bottlenecks = nn.ModuleList([bottleneck(self.filters[-i - 2] + f, f)
+                                          for i, f in enumerate(reversed(decoder_filters))])
+        self.dropout = Dropout2d(dropout) if dropout > 0 else None
+        self.last_block = None
+        if upsample_filters:
+            self.last_block = decoder_block(decoder_filters[0], out_channels=upsample_filters)
+        else:
+            self.last_block = UpsamplingBilinear2d(scale_factor=2)
+    def forward(self, encoder_results: list):
+        x = encoder_results[0]
+        bottlenecks = self.bottlenecks
+        for idx, bottleneck in enumerate(bottlenecks):
+            rev_idx = - (idx + 1)
+            x = self.decoder_stages[rev_idx](x)
+            x = bottleneck(x, encoder_results[-rev_idx])
+        if self.last_block:
+            x = self.last_block(x)
+        if self.dropout:
+            x = self.dropout(x)
+        return x
+    def _get_decoder(self, layer):
+        idx = layer + 1
+        if idx == len(self.decoder_filters):
+            in_channels = self.filters[idx]
+        else:
+            in_channels = self.decoder_filters[idx]
+        return self.decoder_block(in_channels, self.decoder_filters[max(layer, 0)])
+def _initialize_weights(module):
+    for m in module.modules():
+        if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d) or isinstance(m, nn.Linear):
+            m.weight.data = nn.init.kaiming_normal_(m.weight.data)
+            if m.bias is not None:
+                m.bias.data.zero_()
+        elif isinstance(m, nn.BatchNorm2d):
+            m.weight.data.fill_(1)
+            m.bias.data.zero_()
+class EfficientUnetClassifier(nn.Module):
+    def __init__(self, encoder, dropout_rate=0.5) -> None:
+        super().__init__()
+        self.decoder = Decoder(decoder_filters=encoder_params[encoder]["decoder_filters"],
+                               filters=encoder_params[encoder]["filters"])
+        self.avg_pool = AdaptiveAvgPool2d((1, 1))
+        self.dropout = Dropout(dropout_rate)
+        self.fc = Linear(encoder_params[encoder]["features"], 1)
+        self.final = Conv2d(encoder_params[encoder]["decoder_filters"][0], out_channels=1, kernel_size=1, bias=False)
+        _initialize_weights(self)
+        self.encoder = encoder_params[encoder]["init_op"]()
+    def get_encoder_features(self, x):
+        encoder_results = []
+        x = self.encoder.conv_stem(x)
+        x = self.encoder.bn1(x)
+        x = self.encoder.act1(x)
+        encoder_results.append(x)
+        x = self.encoder.blocks[:2](x)
+        encoder_results.append(x)
+        x = self.encoder.blocks[2:3](x)
+        encoder_results.append(x)
+        x = self.encoder.blocks[3:5](x)
+        encoder_results.append(x)
+        x = self.encoder.blocks[5:](x)
+        x = self.encoder.conv_head(x)
+        x = self.encoder.bn2(x)
+        x = self.encoder.act2(x)
+        encoder_results.append(x)
+        encoder_results = list(reversed(encoder_results))
+        return encoder_results
+    def forward(self, x):
+        encoder_results = self.get_encoder_features(x)
+        seg = self.final(self.decoder(encoder_results))
+        x = encoder_results[0]
+        x = self.avg_pool(x).flatten(1)
+        x = self.dropout(x)
+        x = self.fc(x)
+        return x, seg
+if __name__ == '__main__':
+    model = EfficientUnetClassifier("tf_efficientnet_b5_ns")
+    model.eval()
+    with torch.no_grad():
+        input = torch.rand(4, 3, 224, 224)
+        print(model(input))

weights/.gitkeep ADDED Viewed

File without changes

weights/b7_ns_best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9db77ab9318863e2f8ab287c8eb83c2232584b82dc2fb41f1d614ddd7900cccb
+size 266910617