Hello,
My shader codes:
Vertex Shader:
layout(std140)uniform _Global{layout(row_major)mat4 ProjMatrix;vec3 CamAngVel;layout(row_major)mat4x3 CamMatrix;vec4 ClipPlane;mediump float AllowBackFlip;float TesselationDensity;vec2 GrassRangeMulAdd;mediump vec4 BendFactor;};layout(std140)uniform _ObjMatrix{layout(row_major)mat4x3 ViewMatrix[256];};layout(std140)uniform _ObjVel{vec3 ObjVel[256];};layout(std140)uniform _Mesh{float VtxHeightmap;float VtxSkinning;mediump vec4 Highlight;mediump vec3 ObjAngVel;};layout(location=0)in vec4 ATTR0;layout(location=1)in mediump vec3 ATTR1;layout(location=2)in mediump vec3 ATTR2;layout(location=3)in mediump vec4 ATTR3;layout(location=4)in vec2 ATTR4;layout(location=5)in vec2 ATTR5;layout(location=6)in vec2 ATTR6;layout(location=7)in mediump float ATTR7;layout(location=8)in vec4 ATTR8;layout(location=9)in mediump vec4 ATTR9;layout(location=10)in mediump vec4 ATTR10;layout(location=11)in mediump vec4 ATTR11;out vec2 IO0;out vec3 IO1;out vec3 IO2;out mediump vec3 IO3;vec2 _82;void main(){vec3 _103=(ViewMatrix[uint(gl_InstanceID)][0]*ATTR0.x)+((ViewMatrix[uint(gl_InstanceID)][1]*ATTR0.y)+((ViewMatrix[uint(gl_InstanceID)][2]*ATTR0.z)+ViewMatrix[uint(gl_InstanceID)][3]));mediump vec3 _36=cross(ATTR0.xyz,ObjAngVel);float _120=_103.x;float _124=_103.z;IO0=vec2(ATTR4.x,ATTR4.y);IO1=_103;IO2=(ObjVel[uint(gl_InstanceID)]-((ViewMatrix[uint(gl_InstanceID)][0]*_36.x)+((ViewMatrix[uint(gl_InstanceID)][1]*_36.y)+(ViewMatrix[uint(gl_InstanceID)][2]*_36.z))))+cross(_103,CamAngVel);IO3=(ViewMatrix[uint(gl_InstanceID)][0]*ATTR2.x)+((ViewMatrix[uint(gl_InstanceID)][1]*ATTR2.y)+(ViewMatrix[uint(gl_InstanceID)][2]*ATTR2.z));gl_Position=vec4((_120*ProjMatrix[0u].x)+(_124*ProjMatrix[2u].x),_103.y*ProjMatrix[1u].y,(_124*ProjMatrix[2u].z)+ProjMatrix[3u].z,(_124*ProjMatrix[2u].w)+ProjMatrix[3u].w);}
Pixel Shader:
precision mediump float;precision highp int;struct MaterialClass{vec4 _color;vec4 _ambient_specular;vec4 _sss_glow_rough_bump;highp vec4 _texscale_detscale_detpower_reflect;};struct DeferredSolidOutput{vec4 out0;vec4 out1;vec4 out2;};layout(std140)uniform _Global{layout(row_major)highp mat4 ProjMatrix;highp vec3 CamAngVel;layout(row_major)highp mat4x3 CamMatrix;highp vec4 ClipPlane;float AllowBackFlip;highp float TesselationDensity;highp vec2 GrassRangeMulAdd;vec4 BendFactor;};layout(std140)uniform _Mesh{highp float VtxHeightmap;highp float VtxSkinning;vec4 Highlight;vec3 ObjAngVel;};layout(std140)uniform _Material{MaterialClass Material;};uniform highp sampler2D Nrm;uniform highp sampler2D Col;in highp vec2 IO0;in highp vec3 IO1;in highp vec3 IO2;in vec3 IO3;layout(location=0)out vec4 RT0;layout(location=1)out vec4 RT1;layout(location=2)out vec4 RT2;void main(){vec4 _31=texture(Nrm,IO0);vec3 _36=normalize(IO3);vec4 _38=texture(Col,IO0);vec3 _43=(Material._color.xyz*_38.xyz)+Highlight.xyz;vec3 _60;if(!gl_FrontFacing){_60=_36*AllowBackFlip;}else{_60=_36;}DeferredSolidOutput param_var_output;param_var_output.out0=vec4(_43.x,_43.y,_43.z,param_var_output.out0.w);param_var_output.out0.w=Material._sss_glow_rough_bump.y*_31.w;vec3 _53=(_60*0.5)+vec3(0.5);param_var_output.out1=vec4(_53.x,_53.y,_53.z,param_var_output.out1.w);param_var_output.out1.w=Material._ambient_specular.w*_31.z;vec3 _59=((IO2/vec3(IO1.z))*0.5)+vec3(0.5);param_var_output.out2=vec4(_59.x,_59.y,_59.z,param_var_output.out2.w);param_var_output.out2.w=0.0;RT0=param_var_output.out0;RT1=param_var_output.out1;RT2=param_var_output.out2;}
I'm facing serious problems with handling of mat4x3 on ARM Mali GPU-s.
The above code works fine on Desktop OpenGL GeForce, Apple iOS GL ES (iPad mini 2), but when running on 2 Android Devices:Samsung Galaxy Note 4 (ARM Mali-T760)Huawei Mate 20 X (ARM Mali-G76)It doesn't work.
layout(std140)uniform _ObjMatrix{layout(row_major)mat4x3 ViewMatrix[256];};for this UBO using code:GLint size=0; glGetActiveUniformBlockiv(prog, i, GL_UNIFORM_BLOCK_DATA_SIZE, &size);On Desktop and Apple I'm getting the expected size of 3*Vec4*256 elements (total size 12288 bytes) = OKOn ARM Mali I'm getting 4*Vec4*256 elements (total size 16384 bytes) = Not OK
Which suggests that Mali is using mat4x4 instead of mat4x3
Then I check GLint offset =-1; glGetActiveUniformsiv(prog, 1, &uni, GL_UNIFORM_OFFSET , &offset ); GLint array_stride=-1; glGetActiveUniformsiv(prog, 1, &uni, GL_UNIFORM_ARRAY_STRIDE , & array_stride); GLint matrix_stride=-1; glGetActiveUniformsiv(prog, 1, &uni, GL_UNIFORM_MATRIX_STRIDE, &matrix_stride);
on all platforms I'm getting correct GL_UNIFORM_ARRAY_STRIDE of 48 and GL_UNIFORM_MATRIX_STRIDE of 16.
However this doesn't work on Mali!
When I set the UBO data:
glBindBuffer (GL_UNIFORM_BUFFER, buffer.buffer); glBufferSubData(GL_UNIFORM_BUFFER, 0, buffer.size, data);
just for 2 Matrices:first matrix byte offset =0 , size = 3 * Vec4second matrix byte offset =3 * Vec4 (48), size = 3 * Vec4 Then rendering doesn't work correctly on Arm Mali
if I treat them as mat4x4first matrix byte offset =0 , size = 4 * Vec4second matrix byte offset =4 * Vec4 (64), size = 4 * Vec4 then rendering starts to work ok
So despite I'm requesting mat4x3 (3xVec4), I'm actually getting bigger UBO that uses mat4x4 (4xVec4) = failand reports array_stride 48 for mat4x3 instead of what it actually allocated 64 for mat4x4 = fail
Is there an easy workaround for this problem without having to wait for a driver update?
Typically vertex buffers and textures are uploaded to the GPU only one time (except dynamic vertexes for UI).
However the UBO for view matrices is updated non-stop during rendering for all objects.
I'm trying to see if I can make it work with using "Vec4 ViewMatrix[256*3];" instead of "layout(row_major)mat4x3 ViewMatrix[256];"