fn gelu_custom(x: Tensor) -> Tensor { let x = x.clone() * ((x / SQRT_2).erf() + 1); x / 2 }