use_mcore_models: True use_legacy_models: False spec: null micro_batch_size: 2 global_batch_size: 1281 change: 0 additions & 1 deletion 1 examples/gpt3/train_gpt3_175b_distributed.sh Original file line numberDiff line numberDiff line change @@ -49,7 +49,6 @@ TRAINING_ARGS=( -...
if args.use_mcore_models:if args.use_legacy_models: model = megatron.legacy.model.GPTModel( config, num_tokentypes=0, parallel_output=False, pre_process=pre_process, post_process=post_process ) else: if args.spec is None: if args.transformer_impl == 'local':...