My script:

Definir o dispositivo (GPU ou CPU)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Função para imprimir uso de memória da GPU

def print_memory_usage(description="Memory status"):
print(description)
print(f"Total memory: {torch.cuda.get_device_properties(0).total_memory / 1e9} GB")
print(f"Used memory: {torch.cuda.memory_allocated(0) / 1e9} GB")
print(f"Cached memory: {torch.cuda.memory_reserved(0) / 1e9} GB")

Função para imprimir informações da GPU

def print_gpu_info():
if torch.cuda.is_available():
gpu_name = torch.cuda.get_device_name(0)
total_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
print(f"Using GPU: {gpu_name}")
print(f"Total GPU memory: {total_memory:.2f} GB")
else:
print("No GPU available, using CPU.")

Função para estimar a memória necessária

def estimate_memory_requirements(model, batch_size, seq_length):
# Memória para o modelo
param_size = sum(p.numel() for p in model.parameters()) * 4 # 4 bytes por float32
# Memória para os dados de entrada e saída (batch size * seq length * 4 bytes por float32)
data_size = batch_size * seq_length * 4
# Memória para gradientes (assumindo que gradientes ocupam o mesmo espaço que os parâmetros)
grad_size = param_size

total_memory = param_size + data_size + grad_size
print(f"Estimated memory requirements: {total_memory / 1e9} GB")

Imprimir informações da GPU

print_gpu_info()

Configurações do modelo e dados

model_name = "amazon/MistralLite"
tokenizer = AutoTokenizer.from_pretrained(model_name)

batch_size = 8 # Você pode ajustar conforme necessário
seq_length = 512 # Comprimento máximo da sequência

amazon
/

MistralLite

Error during model loading: CUDA error: out of memory