Hello,
My shader codes:
Vertex Shader:
layout(std140)uniform _Global{layout(row_major)mat4 ProjMatrix;vec3 CamAngVel;layout(row_major)mat4x3 CamMatrix;vec4 ClipPlane;mediump float AllowBackFlip;float TesselationDensity;vec2 GrassRangeMulAdd;mediump vec4 BendFactor;};layout(std140)uniform _ObjMatrix{layout(row_major)mat4x3 ViewMatrix[256];};layout(std140)uniform _ObjVel{vec3 ObjVel[256];};layout(std140)uniform _Mesh{float VtxHeightmap;float VtxSkinning;mediump vec4 Highlight;mediump vec3 ObjAngVel;};layout(location=0)in vec4 ATTR0;layout(location=1)in mediump vec3 ATTR1;layout(location=2)in mediump vec3 ATTR2;layout(location=3)in mediump vec4 ATTR3;layout(location=4)in vec2 ATTR4;layout(location=5)in vec2 ATTR5;layout(location=6)in vec2 ATTR6;layout(location=7)in mediump float ATTR7;layout(location=8)in vec4 ATTR8;layout(location=9)in mediump vec4 ATTR9;layout(location=10)in mediump vec4 ATTR10;layout(location=11)in mediump vec4 ATTR11;out vec2 IO0;out vec3 IO1;out vec3 IO2;out mediump vec3 IO3;vec2 _82;void main(){vec3 _103=(ViewMatrix[uint(gl_InstanceID)][0]*ATTR0.x)+((ViewMatrix[uint(gl_InstanceID)][1]*ATTR0.y)+((ViewMatrix[uint(gl_InstanceID)][2]*ATTR0.z)+ViewMatrix[uint(gl_InstanceID)][3]));mediump vec3 _36=cross(ATTR0.xyz,ObjAngVel);float _120=_103.x;float _124=_103.z;IO0=vec2(ATTR4.x,ATTR4.y);IO1=_103;IO2=(ObjVel[uint(gl_InstanceID)]-((ViewMatrix[uint(gl_InstanceID)][0]*_36.x)+((ViewMatrix[uint(gl_InstanceID)][1]*_36.y)+(ViewMatrix[uint(gl_InstanceID)][2]*_36.z))))+cross(_103,CamAngVel);IO3=(ViewMatrix[uint(gl_InstanceID)][0]*ATTR2.x)+((ViewMatrix[uint(gl_InstanceID)][1]*ATTR2.y)+(ViewMatrix[uint(gl_InstanceID)][2]*ATTR2.z));gl_Position=vec4((_120*ProjMatrix[0u].x)+(_124*ProjMatrix[2u].x),_103.y*ProjMatrix[1u].y,(_124*ProjMatrix[2u].z)+ProjMatrix[3u].z,(_124*ProjMatrix[2u].w)+ProjMatrix[3u].w);}
Pixel Shader:
precision mediump float;precision highp int;struct MaterialClass{vec4 _color;vec4 _ambient_specular;vec4 _sss_glow_rough_bump;highp vec4 _texscale_detscale_detpower_reflect;};struct DeferredSolidOutput{vec4 out0;vec4 out1;vec4 out2;};layout(std140)uniform _Global{layout(row_major)highp mat4 ProjMatrix;highp vec3 CamAngVel;layout(row_major)highp mat4x3 CamMatrix;highp vec4 ClipPlane;float AllowBackFlip;highp float TesselationDensity;highp vec2 GrassRangeMulAdd;vec4 BendFactor;};layout(std140)uniform _Mesh{highp float VtxHeightmap;highp float VtxSkinning;vec4 Highlight;vec3 ObjAngVel;};layout(std140)uniform _Material{MaterialClass Material;};uniform highp sampler2D Nrm;uniform highp sampler2D Col;in highp vec2 IO0;in highp vec3 IO1;in highp vec3 IO2;in vec3 IO3;layout(location=0)out vec4 RT0;layout(location=1)out vec4 RT1;layout(location=2)out vec4 RT2;void main(){vec4 _31=texture(Nrm,IO0);vec3 _36=normalize(IO3);vec4 _38=texture(Col,IO0);vec3 _43=(Material._color.xyz*_38.xyz)+Highlight.xyz;vec3 _60;if(!gl_FrontFacing){_60=_36*AllowBackFlip;}else{_60=_36;}DeferredSolidOutput param_var_output;param_var_output.out0=vec4(_43.x,_43.y,_43.z,param_var_output.out0.w);param_var_output.out0.w=Material._sss_glow_rough_bump.y*_31.w;vec3 _53=(_60*0.5)+vec3(0.5);param_var_output.out1=vec4(_53.x,_53.y,_53.z,param_var_output.out1.w);param_var_output.out1.w=Material._ambient_specular.w*_31.z;vec3 _59=((IO2/vec3(IO1.z))*0.5)+vec3(0.5);param_var_output.out2=vec4(_59.x,_59.y,_59.z,param_var_output.out2.w);param_var_output.out2.w=0.0;RT0=param_var_output.out0;RT1=param_var_output.out1;RT2=param_var_output.out2;}
I'm facing serious problems with handling of mat4x3 on ARM Mali GPU-s.
The above code works fine on Desktop OpenGL GeForce, Apple iOS GL ES (iPad mini 2), but when running on 2 Android Devices:Samsung Galaxy Note 4 (ARM Mali-T760)Huawei Mate 20 X (ARM Mali-G76)It doesn't work.
layout(std140)uniform _ObjMatrix{layout(row_major)mat4x3 ViewMatrix[256];};for this UBO using code:GLint size=0; glGetActiveUniformBlockiv(prog, i, GL_UNIFORM_BLOCK_DATA_SIZE, &size);On Desktop and Apple I'm getting the expected size of 3*Vec4*256 elements (total size 12288 bytes) = OKOn ARM Mali I'm getting 4*Vec4*256 elements (total size 16384 bytes) = Not OK
Which suggests that Mali is using mat4x4 instead of mat4x3
Then I check GLint offset =-1; glGetActiveUniformsiv(prog, 1, &uni, GL_UNIFORM_OFFSET , &offset ); GLint array_stride=-1; glGetActiveUniformsiv(prog, 1, &uni, GL_UNIFORM_ARRAY_STRIDE , & array_stride); GLint matrix_stride=-1; glGetActiveUniformsiv(prog, 1, &uni, GL_UNIFORM_MATRIX_STRIDE, &matrix_stride);
on all platforms I'm getting correct GL_UNIFORM_ARRAY_STRIDE of 48 and GL_UNIFORM_MATRIX_STRIDE of 16.
However this doesn't work on Mali!
When I set the UBO data:
glBindBuffer (GL_UNIFORM_BUFFER, buffer.buffer); glBufferSubData(GL_UNIFORM_BUFFER, 0, buffer.size, data);
just for 2 Matrices:first matrix byte offset =0 , size = 3 * Vec4second matrix byte offset =3 * Vec4 (48), size = 3 * Vec4 Then rendering doesn't work correctly on Arm Mali
if I treat them as mat4x4first matrix byte offset =0 , size = 4 * Vec4second matrix byte offset =4 * Vec4 (64), size = 4 * Vec4 then rendering starts to work ok
So despite I'm requesting mat4x3 (3xVec4), I'm actually getting bigger UBO that uses mat4x4 (4xVec4) = failand reports array_stride 48 for mat4x3 instead of what it actually allocated 64 for mat4x4 = fail
Is there an easy workaround for this problem without having to wait for a driver update?
Our compiler team have confirmed that this is a bug. The proposed workaround is to apply the row_major layout to the block rather than individual elements, e.g.:
layout(std140, row_major) {
mat4x3 ViewModel[256];
}
Let us know if this solves your problem.
Cheers,Pete
Awesome! Thank you very much for your quick assistance.
Confirming this workaround solved the problem.
Great, glad it worked for you.
Thanks for reporting it in the first place - bug reports are definitely appreciated =)