import os import subprocess import platform import urllib.request import tarfile # Uninstall the existing llama-cpp-python package subprocess.run("pip uninstall -y llama-cpp-python", shell=True) # Check if CUDA compiler is available cuda_path = os.environ.get("CUDA_PATH", "/usr/local/cuda") nvcc_path = os.path.join(cuda_path, "bin", "nvcc") if not os.path.exists(nvcc_path): # CUDA compiler not found, download and install CUDA toolkit print("CUDA compiler not found, downloading and installing CUDA toolkit...") # Determine the appropriate CUDA toolkit URL based on the platform system = platform.system() machine = platform.machine() if system == "Linux" and machine == "x86_64": cuda_url = "https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run" else: raise ValueError(f"Unsupported platform: {system} {machine}") # Download the CUDA toolkit installer cuda_installer, _ = urllib.request.urlretrieve(cuda_url) # Install the CUDA toolkit subprocess.run(f"sh {cuda_installer} --silent --toolkit --override", shell=True) # Update the CUDA path and compiler path cuda_path = "/usr/local/cuda" nvcc_path = os.path.join(cuda_path, "bin", "nvcc") # Set the CMAKE_CUDA_COMPILER environment variable os.environ["CMAKE_CUDA_COMPILER"] = nvcc_path # Install llama-cpp-python with CUDA support install_command = "pip install llama-cpp-python --install-option='--cmake-args=-DLLAMA_CUDA=on'" subprocess.run(install_command, shell=True)