[1].requires_grad = True part_input = None inputs = inputs[0] if len(inputs) == 1 else inputs self.pipe_buffers['inputs'][buffer_id] = inputs # inputs has no gradient because it is from a cloned tensor outputs = super().forward(inputs) # Reset activation checkpointing ...
azure bin csrc deepspeed elasticity launcher module_inject ops pipe profiling runtime activation_checkpointing comm compression fp16 pipe zero __init__.py config.py config_utils.py constants.py csr_tensor.py dataloader.py engine.py lr_schedules.py ...
inference launcher model_implementations module_inject moe monitor nebula ops pipe profiling runtime activation_checkpointing checkpoint_engine comm compression data_pipeline fp16 pipe swap_tensor zero __init__.py bf16_optimizer.py config.py
moe monitor nebula nvme ops pipe profiling runtime activation_checkpointing checkpoint_engine comm compression data_pipeline domino fp16 pipe swap_tensor tensor_parallel zero __init__.py base_optimizer.py bf16_optimizer.py compiler.py config.py ...