get_sync_debug_mode, init_dump, current_blas_handle, is_bf16_supported, utilization, finalize_dump, set_dump, get_npu_overflow_flag, clear_npu_overflow_flag, mem_get_info) from .streams import Stream, Event from .mstx import mstx from .npu_config import * # noqa: F403 from ...
index is not None else 'npu' kwargs[device_arg] = torch.device(device_info) elif type(device) == int: kwargs[device_arg] = f'npu:{device}' elif type(device) == dict: kwargs[device_arg] = _replace_cuda_to_npu_in_dict(device) def _replace_cuda_to_npu_in_list...
if low_cpu_mem_usage: if device_map is not None: # The max memory utils require PyTorch >= 1.10 to have torch.cuda.mem_get_info. require_version_core("torch>=1.10") if is_deepspeed_zero3_enabled(): raise ValueError( "DeepSpeed Zero-3 is not compatible with `low_cpu_mem_usage=True...
# ip信息也都改为了上面设置的设备ip信息# LinearAllreduce类也替换,并import了torch_npudeepspeed --num_gpus=8 benchmark/pd_separate/run_prompt.py --model_path=/root/download/torchair/npu_tuned_model/llm/llama/meta-llama/Llama-2-70b-hf 我怀疑是我的网络配置有问题,因为实际上ping不通对应的GPU节...
get_sync_debug_mode, init_dump, current_blas_handle, is_bf16_supported, utilization, finalize_dump, set_dump, get_npu_overflow_flag, clear_npu_overflow_flag, mem_get_info) from .streams import Stream, Event from .mstx import mstx from .npu_config import * # noqa: F403 from ...
npu._lazy_init() return torch_npu._C._npu_getDeviceProperties(device_id) def mem_get_info(device=None): if device is None: device = torch_npu.npu.current_device() device_id = _get_device_index(device) if device_id < 0 or device_id >= device_count(): ...
npu._lazy_init() return torch_npu._C._npu_getDeviceProperties(device_id) def mem_get_info(device=None): if device is None: device = torch_npu.npu.current_device() device_id = _get_device_index(device) if device_id < 0 or device_id >= device_count(): raise Assertion...
npu._lazy_init() return torch_npu._C._npu_getDeviceProperties(device_id) def mem_get_info(device=None): if device is None: device = torch_npu.npu.current_device() device_id = _get_device_index(device) if device_id < 0 or device_id >= device_count(): ...
index is not None else 'npu' kwargs[device_arg] = torch.device(device_info) elif type(device) == int: kwargs[device_arg] = f'npu:{device}' elif type(device) == dict: kwargs[device_arg] = _replace_cuda_to_npu_in_dict(device) def _replace_cuda_to_npu_in_list...
replace('cuda', 'npu') elif type(device) == torch.device and 'cuda' in device.type: device_info = 'npu:{}'.format(device.index) if device.index is not None else 'npu' kwargs[device_arg] = torch.device(device_info) elif type(device) == int: kwargs[device_arg]...