import os
import subprocess
import platform
import urllib.request
import tarfile

# Uninstall the existing llama-cpp-python package
subprocess.run("pip uninstall -y llama-cpp-python", shell=True)

# Check if CUDA compiler is available
cuda_path = os.environ.get("CUDA_PATH", "/usr/local/cuda")
nvcc_path = os.path.join(cuda_path, "bin", "nvcc")

if not os.path.exists(nvcc_path):
    # CUDA compiler not found, download and install CUDA toolkit
    print("CUDA compiler not found, downloading and installing CUDA toolkit...")

    # Determine the appropriate CUDA toolkit URL based on the platform
    system = platform.system()
    machine = platform.machine()
    if system == "Linux" and machine == "x86_64":
        cuda_url = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
    else:
        raise ValueError(f"Unsupported platform: {system} {machine}")

    # Download the CUDA toolkit installer
    cuda_installer, _ = urllib.request.urlretrieve(cuda_url)

    # Install the CUDA toolkit
    subprocess.run(f"sh {cuda_installer} --silent --toolkit --override", shell=True)

    # Update the CUDA path and compiler path
    cuda_path = "/usr/local/cuda"
    nvcc_path = os.path.join(cuda_path, "bin", "nvcc")

# Set the CMAKE_CUDA_COMPILER environment variable
os.environ["CMAKE_CUDA_COMPILER"] = nvcc_path

# Install llama-cpp-python with CUDA support
install_command = "pip install llama-cpp-python --install-option='--cmake-args=-DLLAMA_CUDA=on'"
subprocess.run(install_command, shell=True)