llm.c/dev/cuda/softmax_forward.cu at master · Mu-L/llm.c...
softmax_forward_cpu(float* out, const float* inp, int N, int C) { // inp is (N, C) // out is (N, C), each row of inp will get softmaxed for (int i = 0; i < N; i++) { const float* inp_row = inp + i * C; float* out_row = out + i * C; float maxval ...