|
import os |
|
from typing import Dict, Tuple, Union, Optional |
|
|
|
from torch.nn import Module |
|
from transformers import AutoModel |
|
|
|
|
|
def auto_configure_device_map(num_gpus: int) -> Dict[str, int]: |
|
|
|
|
|
|
|
|
|
num_trans_layers = 28 |
|
per_gpu_layers = 30 / num_gpus |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
device_map = {'transformer.word_embeddings': 0, |
|
'transformer.final_layernorm': 0, 'lm_head': 0} |
|
|
|
used = 2 |
|
gpu_target = 0 |
|
for i in range(num_trans_layers): |
|
if used >= per_gpu_layers: |
|
gpu_target += 1 |
|
used = 0 |
|
assert gpu_target < num_gpus |
|
device_map[f'transformer.layers.{i}'] = gpu_target |
|
used += 1 |
|
|
|
return device_map |
|
|
|
|
|
def load_model_on_gpus(checkpoint_path: Union[str, os.PathLike], num_gpus: int = 2, |
|
device_map: Optional[Dict[str, int]] = None, **kwargs) -> Module: |
|
if num_gpus < 2 and device_map is None: |
|
model = AutoModel.from_pretrained(checkpoint_path, trust_remote_code=True, **kwargs).half().cuda() |
|
else: |
|
from accelerate import dispatch_model |
|
|
|
model = AutoModel.from_pretrained(checkpoint_path, trust_remote_code=True, **kwargs).half() |
|
|
|
if device_map is None: |
|
device_map = auto_configure_device_map(num_gpus) |
|
|
|
model = dispatch_model(model, device_map=device_map) |
|
|
|
return model |
|
|
|
|
|
|