jax.sharding.PartitionSpec()) count_pspec = jax.sharding.PartitionSpec() return ShampooState( return ShampooState( # pytype: disable=wrong-arg-types # numpy-scalars count=count_pspec, stats=ShardedShampooStats(global_stats, local_stats)) def sharded_init_shape_and_dtype_fn(params): @@ -166...