Spaces:

H-Liu1997
/

TANGO

Running on L40S

File size: 7,073 Bytes

31f2f28

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1e90f25",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Step 1: Install dependency\n",
    "!pip install ffmpeg-python\n",
    "\n",
    "# Step 2: Clone the Wav2Lip repository\n",
    "!git clone https://github.com/justinjohn0306/Wav2Lip\n",
    "\n",
    "# Step 3: Download pretrained model\n",
    "import requests\n",
    "url = \"https://iiitaphyd-my.sharepoint.com/personal/radrabha_m_research_iiit_ac_in/_layouts/15/download.aspx?share=EdjI7bZlgApMqsVoEUUXpLsBxqXbn5z8VTmoxp55YNDcIA\"\n",
    "response = requests.get(url)\n",
    "\n",
    "with open(\"Wav2Lip/checkpoints/wav2lip_gan.pth\", \"wb\") as f:\n",
    "    f.write(response.content)\n",
    "    \n",
    "# Step 4: Install the required dependencies for Wav2Lip\n",
    "!cd Wav2Lip && pip install -r requirements.txt\n",
    "!pip install pyaudio\n",
    "\n",
    "\n",
    "# Step 5: Download pretrained model for face detection\n",
    "url = \"https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth\"\n",
    "response = requests.get(url)\n",
    "\n",
    "with open(\"Wav2Lip/face_detection/detection/sfd/s3fd.pth\", \"wb\") as f:\n",
    "    f.write(response.content)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8e86c988",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import subprocess\n",
    "from urllib import parse as urlparse\n",
    "\n",
    "# Step 1: Install yt-dlp\n",
    "subprocess.run(['pip', 'install', 'yt-dlp'])\n",
    "\n",
    "# Step 2: Define YouTube URL and Video ID\n",
    "YOUTUBE_URL = 'https://www.youtube.com/watch?v=vAnWYLTdvfY'\n",
    "url_data = urlparse.urlparse(YOUTUBE_URL)\n",
    "query = urlparse.parse_qs(url_data.query)\n",
    "YOUTUBE_ID = query[\"v\"][0]\n",
    "\n",
    "# Remove previous input video\n",
    "if os.path.isfile('input_vid.mp4'):\n",
    "    os.remove('input_vid.mp4')\n",
    "\n",
    "# Trim video (start, end) seconds\n",
    "start = 35\n",
    "end = 62\n",
    "interval = end - start\n",
    "\n",
    "# Step 3: Download and trim the YouTube video\n",
    "subprocess.run(['yt-dlp', '-f', 'bestvideo[ext=mp4]', '--output', \"youtube.%(ext)s\", f'https://www.youtube.com/watch?v={YOUTUBE_ID}'])\n",
    "\n",
    "# Cut the video using FFmpeg\n",
    "subprocess.run(['ffmpeg', '-y', '-i', 'youtube.mp4', '-ss', str(start), '-t', str(interval), '-async', '1', 'input_vid.mp4'])\n",
    "\n",
    "# Display video.\n",
    "from IPython.display import HTML\n",
    "from base64 import b64encode\n",
    "\n",
    "def show_video(path):\n",
    "    mp4 = open(path, 'rb').read()\n",
    "    data_url = \"data:video/mp4;base64,\" + b64encode(mp4).decode()\n",
    "    return HTML(f\"\"\"<video width=600 controls><source src=\"{data_url}\"></video>\"\"\")\n",
    "\n",
    "# Preview the trimmed video\n",
    "show_video('input_vid.mp4')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7da8e818",
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "import os\n",
    "from IPython.display import Audio\n",
    "from IPython.core.display import display\n",
    "\n",
    "upload_method = 'Path'  # Change this to 'Record' or 'Path'\n",
    "\n",
    "# Remove previous input audio\n",
    "if os.path.isfile('input_audio.wav'):\n",
    "    os.remove('input_audio.wav')\n",
    "\n",
    "def display_audio():\n",
    "    display(Audio('input_audio.wav'))\n",
    "\n",
    "if upload_method == 'Record':\n",
    "    import pyaudio\n",
    "    import wave\n",
    "\n",
    "    CHUNK = 1024\n",
    "    FORMAT = pyaudio.paInt16\n",
    "    CHANNELS = 1\n",
    "    RATE = 16000\n",
    "    RECORD_SECONDS = 5\n",
    "    WAVE_OUTPUT_FILENAME = \"input_audio.wav\"\n",
    "\n",
    "    p = pyaudio.PyAudio()\n",
    "\n",
    "    stream = p.open(format=FORMAT,\n",
    "                    channels=CHANNELS,\n",
    "                    rate=RATE,\n",
    "                    input=True,\n",
    "                    frames_per_buffer=CHUNK)\n",
    "\n",
    "    print(\"Recording...\")\n",
    "\n",
    "    frames = []\n",
    "\n",
    "    for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):\n",
    "        data = stream.read(CHUNK)\n",
    "        frames.append(data)\n",
    "\n",
    "    print(\"Finished recording.\")\n",
    "\n",
    "    stream.stop_stream()\n",
    "    stream.close()\n",
    "    p.terminate()\n",
    "\n",
    "    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')\n",
    "    wf.setnchannels(CHANNELS)\n",
    "    wf.setsampwidth(p.get_sample_size(FORMAT))\n",
    "    wf.setframerate(RATE)\n",
    "    wf.writeframes(b''.join(frames))\n",
    "    wf.close()\n",
    "\n",
    "    display_audio()\n",
    "\n",
    "elif upload_method == 'Path':\n",
    "    # Add the full path to your audio\n",
    "    PATH_TO_YOUR_AUDIO = 'C:/Users/justi/OneDrive/Desktop/wav2lip/Wav2Lip/input_audio.wav'\n",
    "\n",
    "    # Load audio with specified sampling rate\n",
    "    import librosa\n",
    "    audio, sr = librosa.load(PATH_TO_YOUR_AUDIO, sr=None)\n",
    "\n",
    "    # Save audio with specified sampling rate\n",
    "    import soundfile as sf\n",
    "    sf.write('input_audio.wav', audio, sr, format='wav')\n",
    "\n",
    "    display_audio()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "63289945",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Define the parameters for the Wav2Lip model\n",
    "pad_top = 0\n",
    "pad_bottom = 10\n",
    "pad_left = 0\n",
    "pad_right = 0\n",
    "rescaleFactor = 1\n",
    "nosmooth = False\n",
    "\n",
    "# Set the path to the Wav2Lip model and input files\n",
    "checkpoint_path = \"checkpoints/wav2lip_gan.pth\"\n",
    "input_face = \"input_vid.mp4\"\n",
    "input_audio = \"input_audio.wav\"\n",
    "\n",
    "# Run the Wav2Lip model\n",
    "!cd Wav2Lip && python inference.py --checkpoint_path {checkpoint_path} --face {input_face} --audio {input_audio} --pads {pad_top} {pad_bottom} {pad_left} {pad_right} --resize_factor {rescaleFactor} {\"--nosmooth\" if nosmooth else \"\"}\n",
    "\n",
    "# Preview the output video\n",
    "print(\"Final Video Preview\")\n",
    "print(\"Find the output video at\", 'Wav2Lip/results/result_voice.mp4')\n",
    "show_video('Wav2Lip/results/result_voice.mp4')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3fbafa56",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}