colab tpu but run model on cpu only
colab tpu for big ram onnly
it tak 70g ram
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu

!pip uninstall -y tensorflow && pip install tensorflow-cpu

!pip install bitsandbytes[cpu]

import torch
from transformers import pipeline

تعيين المعالج (CPU)

DEVICE = torch.device("cpu")

تحميل الـ pipeline مع تمكين التخزين المؤقت واستخدام cache_position

pipe = pipeline(
"text-generation",
model="unsloth/Qwen2.5-32B-Instruct",
trust_remote_code=True,
device=DEVICE,
use_cache=True, # تمكين التخزين المؤقت لتحسين الأداء
max_length=100, # تحديد الحد الأقصى للطول لتقليل استهلاك الذاكرة
torch_dtype=torch.float16 # استخدام دقة عائمة 16 بت إذا كان المدعوم
)

الرسالة التي تريد إرسالها للنموذج مع إضافة cache_position

messages = [
{"role": "user", "content": "Who are you?", "cache_position": 0} # تعيين cache_position هنا
]

توليد الرد

try:
output = pipe(messages)
print(output)
except RuntimeError as e:
print(f"حدث خطأ: {e}")

unsloth
/

Qwen2.5-32B-Instruct

run on cpu

تعيين المعالج (CPU)

تحميل الـ pipeline مع تمكين التخزين المؤقت واستخدام cache_position

الرسالة التي تريد إرسالها للنموذج مع إضافة cache_position

توليد الرد