@@ -1245,8 +1250,17 @@ def get_data_parallel_world_size(with_context_parallel=False): return 0 def set_data_parallel_rank(rank): """Return world size for the data parallel group.""" global _MPU_DATA_PARALLEL_RANK _MPU_DATA_PARALLEL_RANK = rank def get_data_parallel_rank(with_cont...