public void mul4f(Matrix4fArray leftM, Matrix4fArray rightM) {
final float[] left = leftM.buffer;
final float[] right = rightM.buffer;
final float[] buffer = this.buffer;
float lm00 = left[0];
float rm00 = right[0];
float lm10 = left[1];
float rm01 = right[4];
float lm20 = left[2];
float rm02 = right[8];
float lm30 = left[3];
buffer[0] = lm00 * rm00 + lm10 * rm01 + lm20 * rm02 + lm30 * right[12];
float rm10 = right[1];
float rm11 = right[5];
float rm12 = right[9];
buffer[1] = lm00 * rm10 + lm10 * rm11 + lm20 * rm12 + lm30 * right[13];
float rm20 = right[2];
float rm21 = right[6];
float rm22 = right[10];
buffer[2] = lm00 * rm20 + lm10 * rm21 + lm20 * rm22 + lm30 * right[14];
float rm30 = right[3];
float rm31 = right[7];
float rm32 = right[11];
buffer[3] = lm00 * rm30 + lm10 * rm31 + lm20 * rm32 + lm30 * right[15];
float lm01 = left[4];
float lm11 = left[5];
float lm21 = left[6];
float lm31 = left[7];
buffer[4] = lm01 * rm00 + lm11 * rm01 + lm21 * rm02 + lm31 * right[12];
buffer[5] = lm01 * rm10 + lm11 * rm11 + lm21 * rm12 + lm31 * right[13];
buffer[6] = lm01 * rm20 + lm11 * rm21 + lm21 * rm22 + lm31 * right[14];
buffer[7] = lm01 * rm30 + lm11 * rm31 + lm21 * rm32 + lm31 * right[15];
float lm02 = left[8];
float lm12 = left[9];
float lm22 = left[10];
float lm32 = left[11];
buffer[8] = lm02 * rm00 + lm12 * rm01 + lm22 * rm02 + lm32 * right[12];
buffer[9] = lm02 * rm10 + lm12 * rm11 + lm22 * rm12 + lm32 * right[13];
buffer[10] = lm02 * rm20 + lm12 * rm21 + lm22 * rm22 + lm32 * right[14];
buffer[11] = lm02 * rm30 + lm12 * rm31 + lm22 * rm32 + lm32 * right[15];
float lm03 = left[12];
float lm13 = left[13];
float lm23 = left[14];
float lm33 = left[15];
buffer[12] = lm03 * rm00 + lm13 * rm01 + lm23 * rm02 + lm33 * right[12];
buffer[13] = lm03 * rm10 + lm13 * rm11 + lm23 * rm12 + lm33 * right[13];
buffer[14] = lm03 * rm20 + lm13 * rm21 + lm23 * rm22 + lm33 * right[14];
buffer[15] = lm03 * rm30 + lm13 * rm31 + lm23 * rm32 + lm33 * right[15];
}