(stddev=initializer_range) def gelu(x): """Gaussian Error Linear unit.""" cdf = 0.5 * (1.0 + tf.math.erf(x / tf.math.sqrt(2.0))) return x * cdf def gelu_new(x): """Smoother gaussian Error Linear Unit.""" cdf = 0.5 * (1.0 + tf.tanh((np.sqrt(2 / np.pi) * (x +...