Serious problems with handling of mat4x3

Hello,

My shader codes:


Vertex Shader:


layout(std140)uniform _Global
{
layout(row_major)mat4 ProjMatrix;
vec3 CamAngVel;
layout(row_major)mat4x3 CamMatrix;
vec4 ClipPlane;
mediump float AllowBackFlip;
float TesselationDensity;
vec2 GrassRangeMulAdd;
mediump vec4 BendFactor;
};
layout(std140)uniform _ObjMatrix
{
layout(row_major)mat4x3 ViewMatrix[256];
};
layout(std140)uniform _ObjVel
{
vec3 ObjVel[256];
};
layout(std140)uniform _Mesh
{
float VtxHeightmap;
float VtxSkinning;
mediump vec4 Highlight;
mediump vec3 ObjAngVel;
};
layout(location=0)in vec4 ATTR0;
layout(location=1)in mediump vec3 ATTR1;
layout(location=2)in mediump vec3 ATTR2;
layout(location=3)in mediump vec4 ATTR3;
layout(location=4)in vec2 ATTR4;
layout(location=5)in vec2 ATTR5;
layout(location=6)in vec2 ATTR6;
layout(location=7)in mediump float ATTR7;
layout(location=8)in vec4 ATTR8;
layout(location=9)in mediump vec4 ATTR9;
layout(location=10)in mediump vec4 ATTR10;
layout(location=11)in mediump vec4 ATTR11;
out vec2 IO0;
out vec3 IO1;
out vec3 IO2;
out mediump vec3 IO3;
vec2 _82;
void main()
{
vec3 _103=(ViewMatrix[uint(gl_InstanceID)][0]*ATTR0.x)+((ViewMatrix[uint(gl_InstanceID)][1]*ATTR0.y)+((ViewMatrix[uint(gl_InstanceID)][2]*ATTR0.z)+ViewMatrix[uint(gl_InstanceID)][3]));
mediump vec3 _36=cross(ATTR0.xyz,ObjAngVel);
float _120=_103.x;
float _124=_103.z;
IO0=vec2(ATTR4.x,ATTR4.y);
IO1=_103;
IO2=(ObjVel[uint(gl_InstanceID)]-((ViewMatrix[uint(gl_InstanceID)][0]*_36.x)+((ViewMatrix[uint(gl_InstanceID)][1]*_36.y)+(ViewMatrix[uint(gl_InstanceID)][2]*_36.z))))+cross(_103,CamAngVel);
IO3=(ViewMatrix[uint(gl_InstanceID)][0]*ATTR2.x)+((ViewMatrix[uint(gl_InstanceID)][1]*ATTR2.y)+(ViewMatrix[uint(gl_InstanceID)][2]*ATTR2.z));
gl_Position=vec4((_120*ProjMatrix[0u].x)+(_124*ProjMatrix[2u].x),_103.y*ProjMatrix[1u].y,(_124*ProjMatrix[2u].z)+ProjMatrix[3u].z,(_124*ProjMatrix[2u].w)+ProjMatrix[3u].w);
}

Pixel Shader:


precision mediump float;
precision highp int;
struct MaterialClass
{
vec4 _color;
vec4 _ambient_specular;
vec4 _sss_glow_rough_bump;
highp vec4 _texscale_detscale_detpower_reflect;
};
struct DeferredSolidOutput
{
vec4 out0;
vec4 out1;
vec4 out2;
};
layout(std140)uniform _Global
{
layout(row_major)highp mat4 ProjMatrix;
highp vec3 CamAngVel;
layout(row_major)highp mat4x3 CamMatrix;
highp vec4 ClipPlane;
float AllowBackFlip;
highp float TesselationDensity;
highp vec2 GrassRangeMulAdd;
vec4 BendFactor;
};
layout(std140)uniform _Mesh
{
highp float VtxHeightmap;
highp float VtxSkinning;
vec4 Highlight;
vec3 ObjAngVel;
};
layout(std140)uniform _Material
{
MaterialClass Material;
};
uniform highp sampler2D Nrm;
uniform highp sampler2D Col;
in highp vec2 IO0;
in highp vec3 IO1;
in highp vec3 IO2;
in vec3 IO3;
layout(location=0)out vec4 RT0;
layout(location=1)out vec4 RT1;
layout(location=2)out vec4 RT2;
void main()
{
vec4 _31=texture(Nrm,IO0);
vec3 _36=normalize(IO3);
vec4 _38=texture(Col,IO0);
vec3 _43=(Material._color.xyz*_38.xyz)+Highlight.xyz;
vec3 _60;
if(!gl_FrontFacing)
{
_60=_36*AllowBackFlip;
}
else
{
_60=_36;
}
DeferredSolidOutput param_var_output;
param_var_output.out0=vec4(_43.x,_43.y,_43.z,param_var_output.out0.w);
param_var_output.out0.w=Material._sss_glow_rough_bump.y*_31.w;
vec3 _53=(_60*0.5)+vec3(0.5);
param_var_output.out1=vec4(_53.x,_53.y,_53.z,param_var_output.out1.w);
param_var_output.out1.w=Material._ambient_specular.w*_31.z;
vec3 _59=((IO2/vec3(IO1.z))*0.5)+vec3(0.5);
param_var_output.out2=vec4(_59.x,_59.y,_59.z,param_var_output.out2.w);
param_var_output.out2.w=0.0;
RT0=param_var_output.out0;
RT1=param_var_output.out1;
RT2=param_var_output.out2;
}

I'm facing serious problems with handling of mat4x3 on ARM Mali GPU-s.

The above code works fine on Desktop OpenGL GeForce, Apple iOS GL ES (iPad mini 2), but when running on 2 Android Devices:
Samsung Galaxy Note 4 (ARM Mali-T760)
Huawei Mate 20 X (ARM Mali-G76)
It doesn't work.

layout(std140)uniform _ObjMatrix
{
layout(row_major)mat4x3 ViewMatrix[256];
};
for this UBO using code:
GLint size=0; glGetActiveUniformBlockiv(prog, i, GL_UNIFORM_BLOCK_DATA_SIZE, &size);
On Desktop and Apple I'm getting the expected size of 3*Vec4*256 elements (total size 12288 bytes) = OK
On ARM Mali I'm getting 4*Vec4*256 elements (total size 16384 bytes) = Not OK

Which suggests that Mali is using mat4x4 instead of mat4x3

Then I check
GLint offset =-1; glGetActiveUniformsiv(prog, 1, &uni, GL_UNIFORM_OFFSET , &offset );
GLint array_stride=-1; glGetActiveUniformsiv(prog, 1, &uni, GL_UNIFORM_ARRAY_STRIDE , & array_stride);
GLint matrix_stride=-1; glGetActiveUniformsiv(prog, 1, &uni, GL_UNIFORM_MATRIX_STRIDE, &matrix_stride);


on all platforms I'm getting correct GL_UNIFORM_ARRAY_STRIDE of 48 and GL_UNIFORM_MATRIX_STRIDE of 16.

However this doesn't work on Mali!

When I set the UBO data:


glBindBuffer (GL_UNIFORM_BUFFER, buffer.buffer);
glBufferSubData(GL_UNIFORM_BUFFER, 0, buffer.size, data);

just for 2 Matrices:
first matrix byte offset =0 , size = 3 * Vec4
second matrix byte offset =3 * Vec4 (48), size = 3 * Vec4
Then rendering doesn't work correctly on Arm Mali

if I treat them as mat4x4
first matrix byte offset =0 , size = 4 * Vec4
second matrix byte offset =4 * Vec4 (64), size = 4 * Vec4
then rendering starts to work ok

So despite I'm requesting mat4x3 (3xVec4), I'm actually getting bigger UBO that uses mat4x4 (4xVec4) = fail
and reports array_stride 48 for mat4x3 instead of what it actually allocated 64 for mat4x4 = fail

Is there an easy workaround for this problem without having to wait for a driver update?

More questions in this forum