Hell Arm,
Recently I use Malioc to analyze UE4 Niaggara Particle System Related Shader on ES31, and I have some confuse aboout how vertex attritubes affect VertexShader LS.
The Vertex Shader which used for Niaggara Particle Sprite have too many texelFetch on VS which cause T Bounded and stack spiling on Mali-G71, The VS code is here,
#version 310 es #ifdef GL_EXT_texture_buffer #extension GL_EXT_texture_buffer : enable #endif #define HLSLCC_DX11ClipSpace 1 // end extensions void compiler_internal_AdjustInputSemantic(inout vec4 TempVariable) { #if HLSLCC_DX11ClipSpace TempVariable.y = -TempVariable.y; TempVariable.z = ( TempVariable.z + TempVariable.w ) / 2.0; #endif } void compiler_internal_AdjustOutputSemantic(inout vec4 Src) { #if HLSLCC_DX11ClipSpace Src.y = -Src.y; Src.z = ( 2.0 * Src.z ) - Src.w; #endif } bool compiler_internal_AdjustIsFrontFacing(bool isFrontFacing) { #if HLSLCC_DX11ClipSpace return !isFrontFacing; #else return isFrontFacing; #endif } uniform vec4 vc0_h[13]; uniform vec4 vc2_h[5]; uniform uvec4 vc3_u[4]; uniform uvec4 vc4_u[1]; uniform ivec4 vc4_i[10]; uniform vec4 vc4_h[16]; uniform vec4 vc1_h[6]; uniform highp isamplerBuffer vs2; uniform highp samplerBuffer vs0; uniform highp samplerBuffer vs1; layout(location=0) in vec2 in_ATTRIBUTE0; layout(location=0) out vec4 var_TEXCOORD10; layout(location=1) out vec4 var_TEXCOORD11; layout(location=2) out mediump vec4 var_TEXCOORD7; layout(location=3) out vec4 var_TEXCOORD8; layout(location=4) out float var_OUTCLIPDIST; void main() { float f0; f0 = vc4_h[13].x; float f1; f1 = vc4_h[12].x; float f2; f2 = vc4_h[11].x; vec3 v3; v3.xyz = vc4_h[10].xyz; vec2 v4; v4.xy = vc4_h[9].xy; int i5; i5 = vc4_i[9].x; int i6; i6 = vc4_i[8].x; int i7; i7 = vc4_i[7].x; int i8; i8 = vc4_i[6].x; int i9; i9 = vc4_i[5].x; int i10; i10 = vc4_i[4].x; int i11; i11 = vc4_i[3].x; int i12; i12 = vc4_i[2].x; int i13; i13 = vc4_i[1].x; int i14; i14 = vc4_i[0].x; vec2 v15; v15.xy = vc4_h[7].xy; float f16; f16 = vc4_h[6].x; float f17; f17 = vc4_h[5].x; float f18; f18 = vc4_h[4].x; float f19; f19 = vc4_h[3].x; vec3 v20; v20.xyz = vc4_h[2].xyz; uint u21; u21 = vc4_u[0].x; uint u22; u22 = vc3_u[3].x; uint u23; u23 = vc3_u[2].x; uint u24; u24 = vc3_u[1].x; uint u25; u25 = vc3_u[0].x; vec3 v26; v26.xyz = vc0_h[11].xyz; vec3 v27; v27.xyz = vc0_h[10].xyz; vec3 v28; v28.xyz = vc0_h[9].xyz; vec3 v29; v29.xyz = vc0_h[8].xyz; vec3 v30; v30.xyz = vc0_h[7].xyz; vec3 v31; v31.xyz = vc0_h[6].xyz; vec3 v32; v32.xyz = vc0_h[5].xyz; vec3 v33; v33.xyz = vc0_h[4].xyz; //FVertexFactoryIntermediates VFIntermediates = GetVertexFactoryIntermediates(Input); uint u34; u34 = uint(gl_InstanceID); vec4 v35; mediump vec4 v36; vec4 v37; vec4 v38; mat3 m39; vec3 v40; vec3 v41; uint u42; vec3 v43; v43.xyz = vec3(0.000000e+00,0.000000e+00,0.000000e+00); u42 = u34; // Need SortedIndices reassign if ((u22!=4294967295u)) { u42 = uint(int(texelFetch(vs2,int((u22+u34))).x)); } // Fetch Position vec3 v44; v44.xyz = vc4_h[8].xyz; vec3 v45; if ((i14==-1)) { v45.xyz = v44; } else { vec3 v46; int i47; i47 = (i14&2147483647); if (bool((uint(i14)&2147483648u))) { vec3 v48; v48.x = texelFetch(vs0,int(((uint(i47)*u25)+u42))).x; v48.y = texelFetch(vs0,int(((uint((i47+1))*u25)+u42))).x; v48.z = texelFetch(vs0,int(((uint((i47+2))*u25)+u42))).x; v46.xyz = v48; } else { vec3 v49; v49.x = texelFetch(vs1,int(((uint(i47)*u25)+u42))).x; v49.y = texelFetch(vs1,int(((uint((i47+1))*u25)+u42))).x; v49.z = texelFetch(vs1,int(((uint((i47+2))*u25)+u42))).x; v46.xyz = v49; } v45.xyz = v46; } // Position Local to World vec3 v50; if (bool(u21)) { v50.xyz = ((((vc2_h[0].xyz*v45.xxx)+(vc2_h[1].xyz*v45.yyy))+(vc2_h[2].xyz*v45.zzz))+vc2_h[3].xyz); } else { v50.xyz = v45; } // Fetch Rotation float f51; float f52; if ((i12==-1)) { f52 = f2; } else { float f53; int i54; i54 = (i12&2147483647); if (bool((uint(i12)&2147483648u))) { f53 = texelFetch(vs0,int(((uint(i54)*u25)+u42))).x; } else { f53 = texelFetch(vs1,int(((uint(i54)*u25)+u42))).x; } f52 = f53; } f51 = ((f52/1.800000e+02)*3.141593e+00); // Fetch Size vec2 v55; if ((i11==-1)) { v55.xy = v4; } else { vec2 v56; int i57; i57 = (i11&2147483647); if (bool((uint(i11)&2147483648u))) { vec2 v58; v58.x = texelFetch(vs0,int(((uint(i57)*u25)+u42))).x; v58.y = texelFetch(vs0,int(((uint((i57+1))*u25)+u42))).x; v56.xy = v58; } else { vec2 v59; v59.x = texelFetch(vs1,int(((uint(i57)*u25)+u42))).x; v59.y = texelFetch(vs1,int(((uint((i57+1))*u25)+u42))).x; v56.xy = v59; } v55.xy = v56; } // Fetch Velocity vec3 v60; if ((i13==-1)) { v60.xyz = v3; } else { vec3 v61; int i62; i62 = (i13&2147483647); if (bool((uint(i13)&2147483648u))) { vec3 v63; v63.x = texelFetch(vs0,int(((uint(i62)*u25)+u42))).x; v63.y = texelFetch(vs0,int(((uint((i62+1))*u25)+u42))).x; v63.z = texelFetch(vs0,int(((uint((i62+2))*u25)+u42))).x; v61.xyz = v63; } else { vec3 v64; v64.x = texelFetch(vs1,int(((uint(i62)*u25)+u42))).x; v64.y = texelFetch(vs1,int(((uint((i62+1))*u25)+u42))).x; v64.z = texelFetch(vs1,int(((uint((i62+2))*u25)+u42))).x; v61.xyz = v64; } v60.xyz = v61; } // Velocity LocalToWorld vec3 v65; if (bool(u21)) { v65.xyz = ((((vc2_h[4].xxx*vc2_h[0].xyz)*v60.xxx)+((vc2_h[4].yyy*vc2_h[1].xyz)*v60.yyy))+((vc2_h[4].zzz*vc2_h[2].xyz)*v60.zzz)); } else { v65.xyz = v60; } v41.xyz = v65; //Fetch SubImageIndex float f66; if ((i10==-1)) { f66 = f0; } else { float f67; int i68; i68 = (i10&2147483647); if (bool((uint(i10)&2147483648u))) { f67 = texelFetch(vs0,int(((uint(i68)*u25)+u42))).x; } else { f67 = texelFetch(vs1,int(((uint(i68)*u25)+u42))).x; } f66 = f67; } // Fetch CustomFacing vec3 v69; v69.xyz = vc4_h[14].xyz; vec3 v70; if ((i9==-1)) { v70.xyz = v69; } else { vec3 v71; int i72; i72 = (i9&2147483647); if (bool((uint(i9)&2147483648u))) { vec3 v73; v73.x = texelFetch(vs0,int(((uint(i72)*u25)+u42))).x; v73.y = texelFetch(vs0,int(((uint((i72+1))*u25)+u42))).x; v73.z = texelFetch(vs0,int(((uint((i72+2))*u25)+u42))).x; v71.xyz = v73; } else { vec3 v74; v74.x = texelFetch(vs1,int(((uint(i72)*u25)+u42))).x; v74.y = texelFetch(vs1,int(((uint((i72+1))*u25)+u42))).x; v74.z = texelFetch(vs1,int(((uint((i72+2))*u25)+u42))).x; v71.xyz = v74; } v70.xyz = v71; } // CustomFacing LocalToWorld vec3 v75; if (bool(u21)) { v75.xyz = ((((vc2_h[4].xxx*vc2_h[0].xyz)*v70.xxx)+((vc2_h[4].yyy*vc2_h[1].xyz)*v70.yyy))+((vc2_h[4].zzz*vc2_h[2].xyz)*v70.zzz)); } else { v75.xyz = v70; } //SafeNormal CustomFacing float f76; f76 = length(v75); vec3 v77; v77.xyz = (v75/vec3(max(f76,1.000000e-06))); // Fetch CustomAlignment vec3 v78; v78.xyz = vc4_h[15].xyz; vec3 v79; if ((i8==-1)) { v79.xyz = v78; } else { vec3 v80; int i81; i81 = (i8&2147483647); if (bool((uint(i8)&2147483648u))) { vec3 v82; v82.x = texelFetch(vs0,int(((uint(i81)*u25)+u42))).x; v82.y = texelFetch(vs0,int(((uint((i81+1))*u25)+u42))).x; v82.z = texelFetch(vs0,int(((uint((i81+2))*u25)+u42))).x; v80.xyz = v82; } else { vec3 v83; v83.x = texelFetch(vs1,int(((uint(i81)*u25)+u42))).x; v83.y = texelFetch(vs1,int(((uint((i81+1))*u25)+u42))).x; v83.z = texelFetch(vs1,int(((uint((i81+2))*u25)+u42))).x; v80.xyz = v83; } v79.xyz = v80; } // CustomAlignment LocalToWorld vec3 v84; if (bool(u21)) { v84.xyz = ((((vc2_h[4].xxx*vc2_h[0].xyz)*v79.xxx)+((vc2_h[4].yyy*vc2_h[1].xyz)*v79.yyy))+((vc2_h[4].zzz*vc2_h[2].xyz)*v79.zzz)); } else { v84.xyz = v79; } // CustomAlignment SafeNormal float f85; f85 = length(v84); vec3 v86; v86.xyz = (v84/vec3(max(f85,1.000000e-06))); // Fetch PivotOffset vec2 v87; if ((i5==-1)) { v87.xy = v15; } else { vec2 v88; int i89; i89 = (i5&2147483647); if (bool((uint(i5)&2147483648u))) { vec2 v90; v90.x = texelFetch(vs0,int(((uint(i89)*u25)+u42))).x; v90.y = texelFetch(vs0,int(((uint((i89+1))*u25)+u42))).x; v88.xy = v90; } else { vec2 v91; v91.x = texelFetch(vs1,int(((uint(i89)*u25)+u42))).x; v91.y = texelFetch(vs1,int(((uint((i89+1))*u25)+u42))).x; v88.xy = v91; } v87.xy = v88; } // SafeNormalize(ResolvedView.WorldCameraOrigin - ParticlePosition) vec3 v92; v92.xyz = (v28+(-v50)); float f93; f93 = length(v92); vec3 v94; v94.xyz = (v92/vec3(max(f93,1.000000e-06))); // Fetch NiagaraCameraOffset float f95; if ((i6==-1)) { f95 = f1; } else { float f96; int i97; i97 = (i6&2147483647); if (bool((uint(i6)&2147483648u))) { f96 = texelFetch(vs0,int(((uint(i97)*u25)+u42))).x; } else { f96 = texelFetch(vs1,int(((uint(i97)*u25)+u42))).x; } f95 = f96; } //CameraOffset v40.xyz = (v94*vec3(f95)); // Intermediates.SizeRotSubImage = float4(ParticleSize.x, ParticleSize.y, ParticleRotation, SubImageIndex); vec4 v98; v98.x = v55.x; v98.y = v55.y; v98.z = f51; v98.w = f66; //const float3 ParticleDirection = SafeNormalizeWithFallback(ParticleVelocity, float3(0,0,1)); float f99; f99 = length(v41); vec3 v100; vec3 v101; v101.xyz = (v65/vec3(f99)); v100.xyz = ((f99>1.000000e-06))?(v101):(vec3(0.000000e+00,0.000000e+00,1.000000e+00)); // GetTangents(Input, ParticlePosition, ParticleRotation, CustomFacing, CustomAlignment, ParticleDirection, Right, Up); vec3 v102; vec3 v103; vec3 v104; v104.xyz = mix(v31,v29,vec3(f19)); vec3 v105; v105.xyz = mix(v32,v30,vec3(f19)); vec3 v106; v106.xyz = (-v105); vec3 v107; v107.xyz = (-v33); vec3 v108; v108.xyz = (v28+(-v50)); float f109; f109 = length(v108); vec3 v110; v110.xyz = (v108/vec3(max(f109,1.000000e-06))); vec3 v111; v111.xyz = ((u23==2u))?(v77):(v110); vec3 v112; v112.xyz = mix(v100,v86,vec3(float((u24==2u)))); vec3 v113; v113.xyz = cross(v111,v112); float f114; f114 = length(v113); vec3 v115; v115.xyz = (v113/vec3(max(f114,1.000000e-06))); vec3 v116; v116.xyz = mix((-v112),cross(v111,v115),vec3(float((u23==2u)))); vec3 v117; v117.xyz = cross(v107,v105); float f118; f118 = length(v117); vec3 v119; v119.xyz = (v117/vec3(max(f118,1.000000e-06))); vec3 v120; v120.xyz = cross(v107,v119); vec3 v121; v121.xyz = cross(v111,v105); float f122; f122 = length(v121); vec3 v123; v123.xyz = (v121/vec3(max(f122,1.000000e-06))); vec3 v124; v124.xyz = cross(v111,v123); vec3 v125; v125.xyz = cross(v110,vec3(0.000000e+00,0.000000e+00,1.000000e+00)); float f126; f126 = length(v125); vec3 v127; v127.xyz = (v125/vec3(max(f126,1.000000e-06))); vec3 v128; v128.xyz = cross(v110,v127); if ((u24==0u)) { if ((u23==4u)) { float f129; f129 = clamp(((dot(v108,v108)*v20.y)+(-v20.z)),0.000000e+00,1.000000e+00); vec3 v130; v130.xyz = cross(v110,vec3(0.000000e+00,0.000000e+00,1.000000e+00)); v103.xyz = mix(v130,v104,vec3(f129)); v102.xyz = mix(cross(v110,v130),v106,vec3(f129)); } else { if ((u23==0u)) { v103.xyz = v104; v102.xyz = v106; } else { if ((u23==1u)) { v103.xyz = v119; v102.xyz = v120; } else { if ((u23==2u)) { v103.xyz = v123; v102.xyz = v124; } else { v103.xyz = v127; v102.xyz = v128; } } } } } else { v103.xyz = v115; v102.xyz = v116; } float f131; f131 = ((f51*f18)+f17); float f132; float f133; f133 = cos(f131); f132 = sin(f131); vec3 v134; v134.xyz = ((v102*vec3(f132))+(v103*vec3(f133))); vec3 v135; v135.xyz = ((v102*vec3(f133))+(-(v103*vec3(f132)))); //const float2 Size = abs(Intermediates.SizeRotSubImage.xy); // Vertex position //const float2x3 Tangents = float2x3(Intermediates.TangentRight, Intermediates.TangentUp); //const float3 VertexOffset = CameraOffset + mul(Size * (UVForPosition - PivotOffset), Tangents); //Intermediates.VertexWorldPosition = ParticlePosition + VertexOffset; vec2 v136; v136.xy = (abs(v98.xy)*(in_ATTRIBUTE0+(-v87))); v43.xyz = (v50+(v40+((v135*v136.yyy)+(v134*v136.xxx)))); // SubUV. float f137; f137 = fract(f66); float f138; f138 = ((i7==1))?(f137):(0.000000e+00); // CalcTangentBasis mat3 m139; m139[0].xyz = v134; m139[1].xyz = v135; if ((f16<5.000000e-01)) { m139[2].xyz = normalize(cross(m139[0],m139[1])); } else { if ((f16<1.500000e+00)) { m139[2].xyz = normalize((v50+(-vc4_h[0].xyz))); } else { m139[2].xyz = normalize((v50+(-(vc4_h[0].xyz+(vec3(dot(vc4_h[1].xyz,(v50+(-vc4_h[0].xyz))))*vc4_h[1].xyz))))); } } m39 = m139; // float4 WorldPositionExcludingWPO = VertexFactoryGetWorldPosition(Input, VFIntermediates); vec4 v140; v140.w = 1.000000e+00; v140.xyz = (v43+v26); // half3 WorldPositionOffset = GetMaterialWorldPositionOffset(VertexParameters); // WorldPosition.xyz += WorldPositionOffset; v38.xyzw = v140; v38.xyz = (v140.xyz+vec3(0.000000e+00,0.000000e+00,0.000000e+00)); // Output.Position = mul(RasterizedWorldPosition, ResolvedView.TranslatedWorldToClip); vec4 v141; v141.xyzw = (vc0_h[3]+((vc0_h[2]*v38.zzzz)+((vc0_h[1]*v38.yyyy)+(vc0_h[0]*v38.xxxx)))); // #if USE_PS_CLIP_PLANE // Output.BasePassInterpolants.OutClipDistance = dot(ResolvedView.GlobalClippingPlane, float4(WorldPosition.xyz - ResolvedView.PreViewTranslation.xyz, 1)); // #endif v37.xyzw = v38; vec4 v142; v142.w = 1.000000e+00; v142.xyz = (v38.xyz+(-v26)); float f143; f143 = dot(vc0_h[12],v142); // CalculateHeightFog vec3 v144; v144.xyz = (v38.xyz+(-v27)); mediump float h145; mediump vec3 v146; float f147; float f148; float f149; float f150; vec3 v151; mediump float h152; h152 = vc1_h[2].w; float f153; f153 = min(v28.z,vc1_h[0].z); vec3 v154; v154.xy = v28.xy; v154.z = f153; v151.xyz = v144; v151.z = (v144.z+(v28.z+(-f153))); float f155; f155 = dot(v151,v151); float f156; f156 = inversesqrt(f155); float f157; f157 = (f155*f156); mediump vec3 v158; v158.xyz = (v151*vec3(f156)); f150 = vc1_h[0].x; f149 = vc1_h[1].x; f148 = f157; f147 = v151.z; float f159; f159 = max(0.000000e+00,vc1_h[0].w); if ((f159>0.000000e+00)) { float f160; f160 = (f159*f156); float f161; f161 = (f160*v151.z); float f162; f162 = (v154.z+f161); f148 = ((1.000000e+00+(-f160))*f157); f147 = (v151.z+(-f161)); f150 = (vc1_h[3].x*exp2((-max(-1.270000e+02,(vc1_h[0].y*(f162+(-vc1_h[3].y))))))); f149 = (vc1_h[1].z*exp2((-max(-1.270000e+02,(vc1_h[1].y*(f162+(-vc1_h[1].w))))))); } float f163; float f164; f164 = max(-1.270000e+02,(vc1_h[0].y*f147)); float f165; f165 = ((1.000000e+00+(-exp2((-f164))))/f164); float f166; f166 = (6.931472e-01+(-(2.402265e-01*f164))); float f167; f167 = abs(f164); float f168; f168 = ((f167>1.000000e-02))?(f165):(f166); f163 = (f150*f168); float f169; f169 = max(-1.270000e+02,(vc1_h[1].y*f147)); float f170; f170 = ((1.000000e+00+(-exp2((-f169))))/f169); float f171; f171 = (6.931472e-01+(-(2.402265e-01*f169))); float f172; f172 = abs(f169); float f173; f173 = ((f172>1.000000e-02))?(f170):(f171); float f174; f174 = (f163+(f149*f173)); float f175; f175 = (f174*f148); mediump vec3 v176; v176.xyz = vc1_h[2].xyz; v146.xyz = vec3(0.000000e+00,0.000000e+00,0.000000e+00); if ((vc1_h[4].w>=0.000000e+00)) { v146.xyz = ((vc1_h[5].xyz*vec3(pow(clamp(dot(v158,vc1_h[4].xyz),0.000000e+00,1.000000e+00),vc1_h[5].w)))*vec3((1.000000e+00+(-clamp(exp2((-(f174*max((f148+(-vc1_h[4].w)),0.000000e+00)))),0.000000e+00,1.000000e+00))))); } h145 = max(clamp(exp2((-f175)),0.000000e+00,1.000000e+00),h152); if (((vc1_h[3].w>0.000000e+00)&&(f157>vc1_h[3].w))) { h145 = 1.000000e+00; v146.xyz = vec3(0.000000e+00,0.000000e+00,0.000000e+00); } mediump vec4 v177; v177.xyz = ((v176*vec3((1.000000e+00+(-h145))))+v146); v177.w = h145; v36.xyzw = v177; // Output.FactoryInterpolants = VertexFactoryGetInterpolants(Input, VFIntermediates, VertexParameters); vec4 v178; v178.xyzw = vec4(0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00); v178.xyz = m139[0]; v178.w = f138; float f179; f179 = determinant(m39); vec4 v180; v180.xyz = m139[2]; v180.w = sign(f179); // Output.BasePassInterpolants.PixelPosition.w = Output.Position.w; v37.w = v141.w; v35.xyzw = v141; var_TEXCOORD10.xyzw = v178; // TANGENTTOWORLD0 var_TEXCOORD11.xyzw = v180; // TANGENTTOWORLD2 var_TEXCOORD7.xyzw = v36; // Fog var_TEXCOORD8.xyzw = v37; // PixelPosition var_OUTCLIPDIST = f143; // OutCLipDistance compiler_internal_AdjustOutputSemantic(v35); gl_Position.xyzw = v35; }
Malioc Reports,
Mali Offline Compiler v7.5.0 (Build 30e352) Copyright 2007-2022 Arm Limited, all rights reserved Configuration ============= Hardware: Mali-G71 r0p1 Architecture: Bifrost Driver: r36p0-00rel0 Shader type: OpenGL ES Vertex Main shader =========== Position variant ---------------- Work registers: 64 Uniform registers: 128 Stack spilling: 16 bytes 16-bit arithmetic: 0% A LS T Bound Total instruction cycles: 28.50 16.00 37.00 T Shortest path cycles: 9.67 11.00 0.00 LS Longest path cycles: 20.50 8.00 19.00 A A = Arithmetic, LS = Load/Store, T = Texture Varying variant --------------- Work registers: 62 Uniform registers: 128 Stack spilling: 32 bytes 16-bit arithmetic: 0% A LS T Bound Total instruction cycles: 41.67 33.00 39.00 A Shortest path cycles: 15.93 27.00 0.00 LS Longest path cycles: 31.67 24.00 20.00 A A = Arithmetic, LS = Load/Store, T = Texture Shader properties ================= Has uniform computation: true
So, I make a litte test which I use generic Read Particle Data from Vertex Buffer replacing of VTF. Modied code is here,
#version 310 es #ifdef GL_EXT_texture_buffer #extension GL_EXT_texture_buffer : enable #endif #define HLSLCC_DX11ClipSpace 1 // end extensions void compiler_internal_AdjustInputSemantic(inout vec4 TempVariable) { #if HLSLCC_DX11ClipSpace TempVariable.y = -TempVariable.y; TempVariable.z = ( TempVariable.z + TempVariable.w ) / 2.0; #endif } void compiler_internal_AdjustOutputSemantic(inout vec4 Src) { #if HLSLCC_DX11ClipSpace Src.y = -Src.y; Src.z = ( 2.0 * Src.z ) - Src.w; #endif } bool compiler_internal_AdjustIsFrontFacing(bool isFrontFacing) { #if HLSLCC_DX11ClipSpace return !isFrontFacing; #else return isFrontFacing; #endif } uniform vec4 vc0_h[13]; uniform vec4 vc2_h[5]; uniform uvec4 vc3_u[4]; uniform uvec4 vc4_u[1]; uniform ivec4 vc4_i[10]; uniform vec4 vc4_h[16]; uniform vec4 vc1_h[6]; //uniform highp isamplerBuffer vs2; //uniform highp samplerBuffer vs0; //uniform highp samplerBuffer vs1; layout(location=0) in vec2 in_ATTRIBUTE0; //UV layout(location=1) in vec4 in_ATTRIBUTE1; //Position layout(location=2) in vec4 in_ATTRIBUTE2; //Size + Rot + SubImage layout(location=3) in vec4 in_ATTRIBUTE3; //Velocity + CameraOffset layout(location=4) in vec4 in_ATTRIBUTE4; //CustomFacing layout(location=5) in vec4 in_ATTRIBUTE5; //CustomAligment layout(location=6) in vec4 in_ATTRIBUTE6; //PivotOffset + UVScale layout(location=0) out vec4 var_TEXCOORD10; layout(location=1) out vec4 var_TEXCOORD11; layout(location=2) out mediump vec4 var_TEXCOORD7; layout(location=3) out vec4 var_TEXCOORD8; layout(location=4) out float var_OUTCLIPDIST; void main() { float f0; f0 = vc4_h[13].x; float f1; f1 = vc4_h[12].x; float f2; f2 = vc4_h[11].x; vec3 v3; v3.xyz = vc4_h[10].xyz; vec2 v4; v4.xy = vc4_h[9].xy; int i5; i5 = vc4_i[9].x; int i6; i6 = vc4_i[8].x; int i7; i7 = vc4_i[7].x; int i8; i8 = vc4_i[6].x; int i9; i9 = vc4_i[5].x; int i10; i10 = vc4_i[4].x; int i11; i11 = vc4_i[3].x; int i12; i12 = vc4_i[2].x; int i13; i13 = vc4_i[1].x; int i14; i14 = vc4_i[0].x; vec2 v15; v15.xy = vc4_h[7].xy; float f16; f16 = vc4_h[6].x; float f17; f17 = vc4_h[5].x; float f18; f18 = vc4_h[4].x; float f19; f19 = vc4_h[3].x; vec3 v20; v20.xyz = vc4_h[2].xyz; uint u21; u21 = vc4_u[0].x; uint u22; u22 = vc3_u[3].x; uint u23; u23 = vc3_u[2].x; uint u24; u24 = vc3_u[1].x; uint u25; u25 = vc3_u[0].x; vec3 v26; v26.xyz = vc0_h[11].xyz; vec3 v27; v27.xyz = vc0_h[10].xyz; vec3 v28; v28.xyz = vc0_h[9].xyz; vec3 v29; v29.xyz = vc0_h[8].xyz; vec3 v30; v30.xyz = vc0_h[7].xyz; vec3 v31; v31.xyz = vc0_h[6].xyz; vec3 v32; v32.xyz = vc0_h[5].xyz; vec3 v33; v33.xyz = vc0_h[4].xyz; //FVertexFactoryIntermediates VFIntermediates = GetVertexFactoryIntermediates(Input); uint u34; u34 = uint(gl_InstanceID); vec4 v35; mediump vec4 v36; vec4 v37; vec4 v38; mat3 m39; vec3 v40; vec3 v41; uint u42; vec3 v43; v43.xyz = vec3(0.000000e+00,0.000000e+00,0.000000e+00); u42 = u34; // no need SortedIndices reassign //if ((u22!=4294967295u)) //{ // u42 = uint(0); //uint(int(texelFetch(vs2,int((u22+u34))).x)); //} vec3 v44; v44.xyz = vc4_h[8].xyz; // Fetch Position vec3 v45; if ((i14==-1)) { v45.xyz = v44; } else { // /* // vec3 v46; // int i47; // i47 = (i14&2147483647); // if (bool((uint(i14)&2147483648u))) // { // vec3 v48; // v48.x = texelFetch(vs0,int(((uint(i47)*u25)+u42))).x; // v48.y = texelFetch(vs0,int(((uint((i47+1))*u25)+u42))).x; // v48.z = texelFetch(vs0,int(((uint((i47+2))*u25)+u42))).x; // v46.xyz = v48; // } // else // { // vec3 v49; // v49.x = texelFetch(vs1,int(((uint(i47)*u25)+u42))).x; // v49.y = texelFetch(vs1,int(((uint((i47+1))*u25)+u42))).x; // v49.z = texelFetch(vs1,int(((uint((i47+2))*u25)+u42))).x; // v46.xyz = v49; // } // v45.xyz = v46; // */ v45 = in_ATTRIBUTE1.xyz; } // Position Local to World vec3 v50; if (bool(u21)) { v50.xyz = ((((vc2_h[0].xyz*v45.xxx)+(vc2_h[1].xyz*v45.yyy))+(vc2_h[2].xyz*v45.zzz))+vc2_h[3].xyz); } else { v50.xyz = v45; } // Fetch Rotation float f51; float f52; if ((i12==-1)) { f52 = f2; } else { /* float f53; int i54; i54 = (i12&2147483647); if (bool((uint(i12)&2147483648u))) { f53 = texelFetch(vs0,int(((uint(i54)*u25)+u42))).x; } else { f53 = texelFetch(vs1,int(((uint(i54)*u25)+u42))).x; } f52 = f53; */ f52 = in_ATTRIBUTE2.z; } f51 = ((f52/1.800000e+02)*3.141593e+00); // Fetch Size vec2 v55; if ((i11==-1)) { v55.xy = v4; } else { /* vec2 v56; int i57; i57 = (i11&2147483647); if (bool((uint(i11)&2147483648u))) { vec2 v58; v58.x = texelFetch(vs0,int(((uint(i57)*u25)+u42))).x; v58.y = texelFetch(vs0,int(((uint((i57+1))*u25)+u42))).x; v56.xy = v58; } else { vec2 v59; v59.x = texelFetch(vs1,int(((uint(i57)*u25)+u42))).x; v59.y = texelFetch(vs1,int(((uint((i57+1))*u25)+u42))).x; v56.xy = v59; } v55.xy = v56; */ v55 = in_ATTRIBUTE2.xy; } // Fetch Velocity vec3 v60; if ((i13==-1)) { v60.xyz = v3; } else { /* vec3 v61; int i62; i62 = (i13&2147483647); if (bool((uint(i13)&2147483648u))) { vec3 v63; v63.x = texelFetch(vs0,int(((uint(i62)*u25)+u42))).x; v63.y = texelFetch(vs0,int(((uint((i62+1))*u25)+u42))).x; v63.z = texelFetch(vs0,int(((uint((i62+2))*u25)+u42))).x; v61.xyz = v63; } else { vec3 v64; v64.x = texelFetch(vs1,int(((uint(i62)*u25)+u42))).x; v64.y = texelFetch(vs1,int(((uint((i62+1))*u25)+u42))).x; v64.z = texelFetch(vs1,int(((uint((i62+2))*u25)+u42))).x; v61.xyz = v64; } v60.xyz = v61; */ v60.xyz = in_ATTRIBUTE3.xyz; } // Velocity LocalToWorld vec3 v65; if (bool(u21)) { v65.xyz = ((((vc2_h[4].xxx*vc2_h[0].xyz)*v60.xxx)+((vc2_h[4].yyy*vc2_h[1].xyz)*v60.yyy))+((vc2_h[4].zzz*vc2_h[2].xyz)*v60.zzz)); } else { v65.xyz = v60; } v41.xyz = v65; //Fetch SubImageIndex float f66; if ((i10==-1)) { f66 = f0; } else { /* float f67; int i68; i68 = (i10&2147483647); if (bool((uint(i10)&2147483648u))) { f67 = texelFetch(vs0,int(((uint(i68)*u25)+u42))).x; } else { f67 = texelFetch(vs1,int(((uint(i68)*u25)+u42))).x; } f66 = f67; */ f66 = in_ATTRIBUTE2.w; } // Fetch CustomFacing vec3 v69; v69.xyz = vc4_h[14].xyz; vec3 v70; if ((i9==-1)) { v70.xyz = v69; } else { /* vec3 v71; int i72; i72 = (i9&2147483647); if (bool((uint(i9)&2147483648u))) { vec3 v73; v73.x = texelFetch(vs0,int(((uint(i72)*u25)+u42))).x; v73.y = texelFetch(vs0,int(((uint((i72+1))*u25)+u42))).x; v73.z = texelFetch(vs0,int(((uint((i72+2))*u25)+u42))).x; v71.xyz = v73; } else { vec3 v74; v74.x = texelFetch(vs1,int(((uint(i72)*u25)+u42))).x; v74.y = texelFetch(vs1,int(((uint((i72+1))*u25)+u42))).x; v74.z = texelFetch(vs1,int(((uint((i72+2))*u25)+u42))).x; v71.xyz = v74; } v70.xyz = v71; */ v70 = in_ATTRIBUTE4.xyz; } // CustomFacing LocalToWorld vec3 v75; if (bool(u21)) { v75.xyz = ((((vc2_h[4].xxx*vc2_h[0].xyz)*v70.xxx)+((vc2_h[4].yyy*vc2_h[1].xyz)*v70.yyy))+((vc2_h[4].zzz*vc2_h[2].xyz)*v70.zzz)); } else { v75.xyz = v70; } //SafeNormal CustomFacing float f76; f76 = length(v75); vec3 v77; v77.xyz = (v75/vec3(max(f76,1.000000e-06))); // Fetch CustomAlignment vec3 v78; v78.xyz = vc4_h[15].xyz; vec3 v79; if ((i8==-1)) { v79.xyz = v78; } else { /* vec3 v80; int i81; i81 = (i8&2147483647); if (bool((uint(i8)&2147483648u))) { vec3 v82; v82.x = texelFetch(vs0,int(((uint(i81)*u25)+u42))).x; v82.y = texelFetch(vs0,int(((uint((i81+1))*u25)+u42))).x; v82.z = texelFetch(vs0,int(((uint((i81+2))*u25)+u42))).x; v80.xyz = v82; } else { vec3 v83; v83.x = texelFetch(vs1,int(((uint(i81)*u25)+u42))).x; v83.y = texelFetch(vs1,int(((uint((i81+1))*u25)+u42))).x; v83.z = texelFetch(vs1,int(((uint((i81+2))*u25)+u42))).x; v80.xyz = v83; } v79.xyz = v80; */ v79.xyz = in_ATTRIBUTE5.xyz; } // CustomAlignment LocalToWorld vec3 v84; if (bool(u21)) { v84.xyz = ((((vc2_h[4].xxx*vc2_h[0].xyz)*v79.xxx)+((vc2_h[4].yyy*vc2_h[1].xyz)*v79.yyy))+((vc2_h[4].zzz*vc2_h[2].xyz)*v79.zzz)); } else { v84.xyz = v79; } // CustomAlignment SafeNormal float f85; f85 = length(v84); vec3 v86; v86.xyz = (v84/vec3(max(f85,1.000000e-06))); // Fetch PivotOffset vec2 v87; if ((i5==-1)) { v87.xy = v15; } else { /* vec2 v88; int i89; i89 = (i5&2147483647); if (bool((uint(i5)&2147483648u))) { vec2 v90; v90.x = texelFetch(vs0,int(((uint(i89)*u25)+u42))).x; v90.y = texelFetch(vs0,int(((uint((i89+1))*u25)+u42))).x; v88.xy = v90; } else { vec2 v91; v91.x = texelFetch(vs1,int(((uint(i89)*u25)+u42))).x; v91.y = texelFetch(vs1,int(((uint((i89+1))*u25)+u42))).x; v88.xy = v91; } v87.xy = v88; */ v87.xy = in_ATTRIBUTE6.xy; } // SafeNormalize(ResolvedView.WorldCameraOrigin - ParticlePosition) vec3 v92; v92.xyz = (v28+(-v50)); float f93; f93 = length(v92); vec3 v94; v94.xyz = (v92/vec3(max(f93,1.000000e-06))); // Fetch NiagaraCameraOffset float f95; if ((i6==-1)) { f95 = f1; } else { /* float f96; int i97; i97 = (i6&2147483647); if (bool((uint(i6)&2147483648u))) { f96 = texelFetch(vs0,int(((uint(i97)*u25)+u42))).x; } else { f96 = texelFetch(vs1,int(((uint(i97)*u25)+u42))).x; } f95 = f96; */ f95 = in_ATTRIBUTE3.w; } //CameraOffset v40.xyz = (v94*vec3(f95)); // Intermediates.SizeRotSubImage = float4(ParticleSize.x, ParticleSize.y, ParticleRotation, SubImageIndex); vec4 v98; v98.x = v55.x; v98.y = v55.y; v98.z = f51; v98.w = f66; //const float3 ParticleDirection = SafeNormalizeWithFallback(ParticleVelocity, float3(0,0,1)); float f99; f99 = length(v41); vec3 v100; vec3 v101; v101.xyz = (v65/vec3(f99)); v100.xyz = ((f99>1.000000e-06))?(v101):(vec3(0.000000e+00,0.000000e+00,1.000000e+00)); // GetTangents(Input, ParticlePosition, ParticleRotation, CustomFacing, CustomAlignment, ParticleDirection, Right, Up); vec3 v102; vec3 v103; vec3 v104; v104.xyz = mix(v31,v29,vec3(f19)); vec3 v105; v105.xyz = mix(v32,v30,vec3(f19)); vec3 v106; v106.xyz = (-v105); vec3 v107; v107.xyz = (-v33); vec3 v108; v108.xyz = (v28+(-v50)); float f109; f109 = length(v108); vec3 v110; v110.xyz = (v108/vec3(max(f109,1.000000e-06))); vec3 v111; v111.xyz = ((u23==2u))?(v77):(v110); vec3 v112; v112.xyz = mix(v100,v86,vec3(float((u24==2u)))); vec3 v113; v113.xyz = cross(v111,v112); float f114; f114 = length(v113); vec3 v115; v115.xyz = (v113/vec3(max(f114,1.000000e-06))); vec3 v116; v116.xyz = mix((-v112),cross(v111,v115),vec3(float((u23==2u)))); vec3 v117; v117.xyz = cross(v107,v105); float f118; f118 = length(v117); vec3 v119; v119.xyz = (v117/vec3(max(f118,1.000000e-06))); vec3 v120; v120.xyz = cross(v107,v119); vec3 v121; v121.xyz = cross(v111,v105); float f122; f122 = length(v121); vec3 v123; v123.xyz = (v121/vec3(max(f122,1.000000e-06))); vec3 v124; v124.xyz = cross(v111,v123); vec3 v125; v125.xyz = cross(v110,vec3(0.000000e+00,0.000000e+00,1.000000e+00)); float f126; f126 = length(v125); vec3 v127; v127.xyz = (v125/vec3(max(f126,1.000000e-06))); vec3 v128; v128.xyz = cross(v110,v127); if ((u24==0u)) { if ((u23==4u)) { float f129; f129 = clamp(((dot(v108,v108)*v20.y)+(-v20.z)),0.000000e+00,1.000000e+00); vec3 v130; v130.xyz = cross(v110,vec3(0.000000e+00,0.000000e+00,1.000000e+00)); v103.xyz = mix(v130,v104,vec3(f129)); v102.xyz = mix(cross(v110,v130),v106,vec3(f129)); } else { if ((u23==0u)) { v103.xyz = v104; v102.xyz = v106; } else { if ((u23==1u)) { v103.xyz = v119; v102.xyz = v120; } else { if ((u23==2u)) { v103.xyz = v123; v102.xyz = v124; } else { v103.xyz = v127; v102.xyz = v128; } } } } } else { v103.xyz = v115; v102.xyz = v116; } float f131; f131 = ((f51*f18)+f17); float f132; float f133; f133 = cos(f131); f132 = sin(f131); vec3 v134; v134.xyz = ((v102*vec3(f132))+(v103*vec3(f133))); vec3 v135; v135.xyz = ((v102*vec3(f133))+(-(v103*vec3(f132)))); //const float2 Size = abs(Intermediates.SizeRotSubImage.xy); // Vertex position //const float2x3 Tangents = float2x3(Intermediates.TangentRight, Intermediates.TangentUp); //const float3 VertexOffset = CameraOffset + mul(Size * (UVForPosition - PivotOffset), Tangents); //Intermediates.VertexWorldPosition = ParticlePosition + VertexOffset; vec2 v136; v136.xy = (abs(v98.xy)*(in_ATTRIBUTE0+(-v87))); v43.xyz = (v50+(v40+((v135*v136.yyy)+(v134*v136.xxx)))); // SubUV. float f137; f137 = fract(f66); float f138; f138 = ((i7==1))?(f137):(0.000000e+00); // CalcTangentBasis mat3 m139; m139[0].xyz = v134; m139[1].xyz = v135; if ((f16<5.000000e-01)) { m139[2].xyz = normalize(cross(m139[0],m139[1])); } else { if ((f16<1.500000e+00)) { m139[2].xyz = normalize((v50+(-vc4_h[0].xyz))); } else { m139[2].xyz = normalize((v50+(-(vc4_h[0].xyz+(vec3(dot(vc4_h[1].xyz,(v50+(-vc4_h[0].xyz))))*vc4_h[1].xyz))))); } } m39 = m139; // float4 WorldPositionExcludingWPO = VertexFactoryGetWorldPosition(Input, VFIntermediates); vec4 v140; v140.w = 1.000000e+00; v140.xyz = (v43+v26); v38.xyzw = v140; // half3 WorldPositionOffset = GetMaterialWorldPositionOffset(VertexParameters); // WorldPosition.xyz += WorldPositionOffset; v38.xyz = (v140.xyz+vec3(0.000000e+00,0.000000e+00,0.000000e+00)); // Output.Position = mul(RasterizedWorldPosition, ResolvedView.TranslatedWorldToClip); vec4 v141; v141.xyzw = (vc0_h[3]+((vc0_h[2]*v38.zzzz)+((vc0_h[1]*v38.yyyy)+(vc0_h[0]*v38.xxxx)))); // #if USE_PS_CLIP_PLANE // Output.BasePassInterpolants.OutClipDistance = dot(ResolvedView.GlobalClippingPlane, float4(WorldPosition.xyz - ResolvedView.PreViewTranslation.xyz, 1)); // #endif v37.xyzw = v38; vec4 v142; v142.w = 1.000000e+00; v142.xyz = (v38.xyz+(-v26)); float f143; f143 = dot(vc0_h[12],v142); // CalculateHeightFog vec3 v144; v144.xyz = (v38.xyz+(-v27)); //(vec3(0,0,0)+(-v27)); //(v38.xyz+(-v27)); mediump float h145; mediump vec3 v146; float f147; float f148; float f149; float f150; vec3 v151; mediump float h152; h152 = vc1_h[2].w; float f153; f153 = min(v28.z,vc1_h[0].z); vec3 v154; v154.xy = v28.xy; v154.z = f153; v151.xyz = v144; v151.z = (v144.z+(v28.z+(-f153))); float f155; f155 = dot(v151,v151); float f156; f156 = inversesqrt(f155); float f157; f157 = (f155*f156); mediump vec3 v158; v158.xyz = (v151*vec3(f156)); f150 = vc1_h[0].x; f149 = vc1_h[1].x; f148 = f157; f147 = v151.z; float f159; f159 = max(0.000000e+00,vc1_h[0].w); if ((f159>0.000000e+00)) { float f160; f160 = (f159*f156); float f161; f161 = (f160*v151.z); float f162; f162 = (v154.z+f161); f148 = ((1.000000e+00+(-f160))*f157); f147 = (v151.z+(-f161)); f150 = (vc1_h[3].x*exp2((-max(-1.270000e+02,(vc1_h[0].y*(f162+(-vc1_h[3].y))))))); f149 = (vc1_h[1].z*exp2((-max(-1.270000e+02,(vc1_h[1].y*(f162+(-vc1_h[1].w))))))); } float f163; float f164; f164 = max(-1.270000e+02,(vc1_h[0].y*f147)); float f165; f165 = ((1.000000e+00+(-exp2((-f164))))/f164); float f166; f166 = (6.931472e-01+(-(2.402265e-01*f164))); float f167; f167 = abs(f164); float f168; f168 = ((f167>1.000000e-02))?(f165):(f166); f163 = (f150*f168); float f169; f169 = max(-1.270000e+02,(vc1_h[1].y*f147)); float f170; f170 = ((1.000000e+00+(-exp2((-f169))))/f169); float f171; f171 = (6.931472e-01+(-(2.402265e-01*f169))); float f172; f172 = abs(f169); float f173; f173 = ((f172>1.000000e-02))?(f170):(f171); float f174; f174 = (f163+(f149*f173)); float f175; f175 = (f174*f148); mediump vec3 v176; v176.xyz = vc1_h[2].xyz; v146.xyz = vec3(0.000000e+00,0.000000e+00,0.000000e+00); if ((vc1_h[4].w>=0.000000e+00)) { v146.xyz = ((vc1_h[5].xyz*vec3(pow(clamp(dot(v158,vc1_h[4].xyz),0.000000e+00,1.000000e+00),vc1_h[5].w)))*vec3((1.000000e+00+(-clamp(exp2((-(f174*max((f148+(-vc1_h[4].w)),0.000000e+00)))),0.000000e+00,1.000000e+00))))); } h145 = max(clamp(exp2((-f175)),0.000000e+00,1.000000e+00),h152); if (((vc1_h[3].w>0.000000e+00)&&(f157>vc1_h[3].w))) { h145 = 1.000000e+00; v146.xyz = vec3(0.000000e+00,0.000000e+00,0.000000e+00); } mediump vec4 v177; v177.xyz = ((v176*vec3((1.000000e+00+(-h145))))+v146); v177.w = h145; v36.xyzw = v177; // Output.FactoryInterpolants = VertexFactoryGetInterpolants(Input, VFIntermediates, VertexParameters); vec4 v178; v178.xyzw = vec4(0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00); v178.xyz = m139[0]; v178.w = f138; float f179; f179 = determinant(m39); vec4 v180; v180.xyz = m139[2]; v180.w = sign(f179); // Output.BasePassInterpolants.PixelPosition.w = Output.Position.w; v37.w = v141.w; v35.xyzw = v141; var_TEXCOORD10.xyzw = v178; // TANGENTTOWORLD0 var_TEXCOORD11.xyzw = v180; // TANGENTTOWORLD2 var_TEXCOORD7.xyzw = v36; // Fog var_TEXCOORD8.xyzw = v37; // PixelPosition var_OUTCLIPDIST = f143; // OutCLipDistance compiler_internal_AdjustOutputSemantic(v35); gl_Position.xyzw = v35; }
Malioc Reports It's better than VTF version,
Mali Offline Compiler v7.5.0 (Build 30e352) Copyright 2007-2022 Arm Limited, all rights reserved Configuration ============= Hardware: Mali-G71 r0p1 Architecture: Bifrost Driver: r36p0-00rel0 Shader type: OpenGL ES Vertex Main shader =========== Position variant ---------------- Work registers: 50 Uniform registers: 128 Stack spilling: false 16-bit arithmetic: 0% A LS T Bound Total instruction cycles: 20.83 14.00 0.00 A Shortest path cycles: 8.00 13.00 0.00 LS Longest path cycles: 15.50 12.00 0.00 A A = Arithmetic, LS = Load/Store, T = Texture Varying variant --------------- Work registers: 55 Uniform registers: 128 Stack spilling: false 16-bit arithmetic: 0% A LS T Bound Total instruction cycles: 34.00 27.00 0.00 A Shortest path cycles: 14.33 26.00 0.00 LS Longest path cycles: 26.83 25.00 0.00 A A = Arithmetic, LS = Load/Store, T = Texture Shader properties ================= Has uniform computation: true
But VS just has 7 layout in_Attributes and LS seems larger than expected. So I did more test , then I found when I delete the
#version 310 es #ifdef GL_EXT_texture_buffer #extension GL_EXT_texture_buffer : enable #endif #define HLSLCC_DX11ClipSpace 1 // end extensions void compiler_internal_AdjustInputSemantic(inout vec4 TempVariable) { #if HLSLCC_DX11ClipSpace TempVariable.y = -TempVariable.y; TempVariable.z = ( TempVariable.z + TempVariable.w ) / 2.0; #endif } void compiler_internal_AdjustOutputSemantic(inout vec4 Src) { #if HLSLCC_DX11ClipSpace Src.y = -Src.y; Src.z = ( 2.0 * Src.z ) - Src.w; #endif } bool compiler_internal_AdjustIsFrontFacing(bool isFrontFacing) { #if HLSLCC_DX11ClipSpace return !isFrontFacing; #else return isFrontFacing; #endif } uniform vec4 vc0_h[13]; uniform vec4 vc2_h[5]; uniform uvec4 vc3_u[4]; uniform uvec4 vc4_u[1]; uniform ivec4 vc4_i[10]; uniform vec4 vc4_h[16]; uniform vec4 vc1_h[6]; //uniform highp isamplerBuffer vs2; //uniform highp samplerBuffer vs0; //uniform highp samplerBuffer vs1; layout(location=0) in vec2 in_ATTRIBUTE0; //UV layout(location=1) in vec4 in_ATTRIBUTE1; //Position layout(location=2) in vec4 in_ATTRIBUTE2; //Size + Rot + SubImage layout(location=3) in vec4 in_ATTRIBUTE3; //Velocity + CameraOffset layout(location=4) in vec4 in_ATTRIBUTE4; //CustomFacing layout(location=5) in vec4 in_ATTRIBUTE5; //CustomAligment layout(location=6) in vec4 in_ATTRIBUTE6; //PviotOffset + UVScale layout(location=0) out vec4 var_TEXCOORD10; layout(location=1) out vec4 var_TEXCOORD11; layout(location=2) out mediump vec4 var_TEXCOORD7; layout(location=3) out vec4 var_TEXCOORD8; layout(location=4) out float var_OUTCLIPDIST; void main() { float f0; f0 = vc4_h[13].x; float f1; f1 = vc4_h[12].x; float f2; f2 = vc4_h[11].x; vec3 v3; v3.xyz = vc4_h[10].xyz; vec2 v4; v4.xy = vc4_h[9].xy; int i5; i5 = vc4_i[9].x; int i6; i6 = vc4_i[8].x; int i7; i7 = vc4_i[7].x; int i8; i8 = vc4_i[6].x; int i9; i9 = vc4_i[5].x; int i10; i10 = vc4_i[4].x; int i11; i11 = vc4_i[3].x; int i12; i12 = vc4_i[2].x; int i13; i13 = vc4_i[1].x; int i14; i14 = vc4_i[0].x; vec2 v15; v15.xy = vc4_h[7].xy; float f16; f16 = vc4_h[6].x; float f17; f17 = vc4_h[5].x; float f18; f18 = vc4_h[4].x; float f19; f19 = vc4_h[3].x; vec3 v20; v20.xyz = vc4_h[2].xyz; uint u21; u21 = vc4_u[0].x; uint u22; u22 = vc3_u[3].x; uint u23; u23 = vc3_u[2].x; uint u24; u24 = vc3_u[1].x; uint u25; u25 = vc3_u[0].x; vec3 v26; v26.xyz = vc0_h[11].xyz; vec3 v27; v27.xyz = vc0_h[10].xyz; vec3 v28; v28.xyz = vc0_h[9].xyz; vec3 v29; v29.xyz = vc0_h[8].xyz; vec3 v30; v30.xyz = vc0_h[7].xyz; vec3 v31; v31.xyz = vc0_h[6].xyz; vec3 v32; v32.xyz = vc0_h[5].xyz; vec3 v33; v33.xyz = vc0_h[4].xyz; //FVertexFactoryIntermediates VFIntermediates = GetVertexFactoryIntermediates(Input); uint u34; u34 = uint(gl_InstanceID); vec4 v35; mediump vec4 v36; vec4 v37; vec4 v38; mat3 m39; vec3 v40; vec3 v41; uint u42; vec3 v43; v43.xyz = vec3(0.000000e+00,0.000000e+00,0.000000e+00); u42 = u34; // no need SortedIndices reassign //if ((u22!=4294967295u)) //{ // u42 = uint(0); //uint(int(texelFetch(vs2,int((u22+u34))).x)); //} vec3 v44; v44.xyz = vc4_h[8].xyz; // Fetch Position vec3 v45; if ((i14==-1)) { v45.xyz = v44; } else { // /* // vec3 v46; // int i47; // i47 = (i14&2147483647); // if (bool((uint(i14)&2147483648u))) // { // vec3 v48; // v48.x = texelFetch(vs0,int(((uint(i47)*u25)+u42))).x; // v48.y = texelFetch(vs0,int(((uint((i47+1))*u25)+u42))).x; // v48.z = texelFetch(vs0,int(((uint((i47+2))*u25)+u42))).x; // v46.xyz = v48; // } // else // { // vec3 v49; // v49.x = texelFetch(vs1,int(((uint(i47)*u25)+u42))).x; // v49.y = texelFetch(vs1,int(((uint((i47+1))*u25)+u42))).x; // v49.z = texelFetch(vs1,int(((uint((i47+2))*u25)+u42))).x; // v46.xyz = v49; // } // v45.xyz = v46; // */ v45 = in_ATTRIBUTE1.xyz; } // Position Local to World vec3 v50; if (bool(u21)) { v50.xyz = ((((vc2_h[0].xyz*v45.xxx)+(vc2_h[1].xyz*v45.yyy))+(vc2_h[2].xyz*v45.zzz))+vc2_h[3].xyz); } else { v50.xyz = v45; } // Fetch Rotation float f51; float f52; if ((i12==-1)) { f52 = f2; } else { /* float f53; int i54; i54 = (i12&2147483647); if (bool((uint(i12)&2147483648u))) { f53 = texelFetch(vs0,int(((uint(i54)*u25)+u42))).x; } else { f53 = texelFetch(vs1,int(((uint(i54)*u25)+u42))).x; } f52 = f53; */ f52 = in_ATTRIBUTE2.z; } f51 = ((f52/1.800000e+02)*3.141593e+00); // Fetch Size vec2 v55; if ((i11==-1)) { v55.xy = v4; } else { /* vec2 v56; int i57; i57 = (i11&2147483647); if (bool((uint(i11)&2147483648u))) { vec2 v58; v58.x = texelFetch(vs0,int(((uint(i57)*u25)+u42))).x; v58.y = texelFetch(vs0,int(((uint((i57+1))*u25)+u42))).x; v56.xy = v58; } else { vec2 v59; v59.x = texelFetch(vs1,int(((uint(i57)*u25)+u42))).x; v59.y = texelFetch(vs1,int(((uint((i57+1))*u25)+u42))).x; v56.xy = v59; } v55.xy = v56; */ v55 = in_ATTRIBUTE2.xy; } // Fetch Velocity vec3 v60; if ((i13==-1)) { v60.xyz = v3; } else { /* vec3 v61; int i62; i62 = (i13&2147483647); if (bool((uint(i13)&2147483648u))) { vec3 v63; v63.x = texelFetch(vs0,int(((uint(i62)*u25)+u42))).x; v63.y = texelFetch(vs0,int(((uint((i62+1))*u25)+u42))).x; v63.z = texelFetch(vs0,int(((uint((i62+2))*u25)+u42))).x; v61.xyz = v63; } else { vec3 v64; v64.x = texelFetch(vs1,int(((uint(i62)*u25)+u42))).x; v64.y = texelFetch(vs1,int(((uint((i62+1))*u25)+u42))).x; v64.z = texelFetch(vs1,int(((uint((i62+2))*u25)+u42))).x; v61.xyz = v64; } v60.xyz = v61; */ v60.xyz = in_ATTRIBUTE3.xyz; } // Velocity LocalToWorld vec3 v65; if (bool(u21)) { v65.xyz = ((((vc2_h[4].xxx*vc2_h[0].xyz)*v60.xxx)+((vc2_h[4].yyy*vc2_h[1].xyz)*v60.yyy))+((vc2_h[4].zzz*vc2_h[2].xyz)*v60.zzz)); } else { v65.xyz = v60; } v41.xyz = v65; //Fetch SubImageIndex float f66; if ((i10==-1)) { f66 = f0; } else { /* float f67; int i68; i68 = (i10&2147483647); if (bool((uint(i10)&2147483648u))) { f67 = texelFetch(vs0,int(((uint(i68)*u25)+u42))).x; } else { f67 = texelFetch(vs1,int(((uint(i68)*u25)+u42))).x; } f66 = f67; */ f66 = in_ATTRIBUTE2.w; } // Fetch CustomFacing vec3 v69; v69.xyz = vc4_h[14].xyz; vec3 v70; if ((i9==-1)) { v70.xyz = v69; } else { /* vec3 v71; int i72; i72 = (i9&2147483647); if (bool((uint(i9)&2147483648u))) { vec3 v73; v73.x = texelFetch(vs0,int(((uint(i72)*u25)+u42))).x; v73.y = texelFetch(vs0,int(((uint((i72+1))*u25)+u42))).x; v73.z = texelFetch(vs0,int(((uint((i72+2))*u25)+u42))).x; v71.xyz = v73; } else { vec3 v74; v74.x = texelFetch(vs1,int(((uint(i72)*u25)+u42))).x; v74.y = texelFetch(vs1,int(((uint((i72+1))*u25)+u42))).x; v74.z = texelFetch(vs1,int(((uint((i72+2))*u25)+u42))).x; v71.xyz = v74; } v70.xyz = v71; */ v70 = in_ATTRIBUTE4.xyz; } // CustomFacing LocalToWorld vec3 v75; if (bool(u21)) { v75.xyz = ((((vc2_h[4].xxx*vc2_h[0].xyz)*v70.xxx)+((vc2_h[4].yyy*vc2_h[1].xyz)*v70.yyy))+((vc2_h[4].zzz*vc2_h[2].xyz)*v70.zzz)); } else { v75.xyz = v70; } //SafeNormal CustomFacing float f76; f76 = length(v75); vec3 v77; v77.xyz = (v75/vec3(max(f76,1.000000e-06))); // Fetch CustomAlignment vec3 v78; v78.xyz = vc4_h[15].xyz; vec3 v79; if ((i8==-1)) { v79.xyz = v78; } else { /* vec3 v80; int i81; i81 = (i8&2147483647); if (bool((uint(i8)&2147483648u))) { vec3 v82; v82.x = texelFetch(vs0,int(((uint(i81)*u25)+u42))).x; v82.y = texelFetch(vs0,int(((uint((i81+1))*u25)+u42))).x; v82.z = texelFetch(vs0,int(((uint((i81+2))*u25)+u42))).x; v80.xyz = v82; } else { vec3 v83; v83.x = texelFetch(vs1,int(((uint(i81)*u25)+u42))).x; v83.y = texelFetch(vs1,int(((uint((i81+1))*u25)+u42))).x; v83.z = texelFetch(vs1,int(((uint((i81+2))*u25)+u42))).x; v80.xyz = v83; } v79.xyz = v80; */ v79.xyz = in_ATTRIBUTE5.xyz; } // CustomAlignment LocalToWorld vec3 v84; if (bool(u21)) { v84.xyz = ((((vc2_h[4].xxx*vc2_h[0].xyz)*v79.xxx)+((vc2_h[4].yyy*vc2_h[1].xyz)*v79.yyy))+((vc2_h[4].zzz*vc2_h[2].xyz)*v79.zzz)); } else { v84.xyz = v79; } // CustomAlignment SafeNormal float f85; f85 = length(v84); vec3 v86; v86.xyz = (v84/vec3(max(f85,1.000000e-06))); // Fetch PivotOffset vec2 v87; if ((i5==-1)) { v87.xy = v15; } else { /* vec2 v88; int i89; i89 = (i5&2147483647); if (bool((uint(i5)&2147483648u))) { vec2 v90; v90.x = texelFetch(vs0,int(((uint(i89)*u25)+u42))).x; v90.y = texelFetch(vs0,int(((uint((i89+1))*u25)+u42))).x; v88.xy = v90; } else { vec2 v91; v91.x = texelFetch(vs1,int(((uint(i89)*u25)+u42))).x; v91.y = texelFetch(vs1,int(((uint((i89+1))*u25)+u42))).x; v88.xy = v91; } v87.xy = v88; */ v87.xy = in_ATTRIBUTE6.xy; } // SafeNormalize(ResolvedView.WorldCameraOrigin - ParticlePosition) vec3 v92; v92.xyz = (v28+(-v50)); float f93; f93 = length(v92); vec3 v94; v94.xyz = (v92/vec3(max(f93,1.000000e-06))); // Fetch NiagaraCameraOffset float f95; if ((i6==-1)) { f95 = f1; } else { /* float f96; int i97; i97 = (i6&2147483647); if (bool((uint(i6)&2147483648u))) { f96 = texelFetch(vs0,int(((uint(i97)*u25)+u42))).x; } else { f96 = texelFetch(vs1,int(((uint(i97)*u25)+u42))).x; } f95 = f96; */ f95 = in_ATTRIBUTE3.w; } //CameraOffset v40.xyz = (v94*vec3(f95)); // Intermediates.SizeRotSubImage = float4(ParticleSize.x, ParticleSize.y, ParticleRotation, SubImageIndex); vec4 v98; v98.x = v55.x; v98.y = v55.y; v98.z = f51; v98.w = f66; //const float3 ParticleDirection = SafeNormalizeWithFallback(ParticleVelocity, float3(0,0,1)); float f99; f99 = length(v41); vec3 v100; vec3 v101; v101.xyz = (v65/vec3(f99)); v100.xyz = ((f99>1.000000e-06))?(v101):(vec3(0.000000e+00,0.000000e+00,1.000000e+00)); // GetTangents(Input, ParticlePosition, ParticleRotation, CustomFacing, CustomAlignment, ParticleDirection, Right, Up); vec3 v102; vec3 v103; vec3 v104; v104.xyz = mix(v31,v29,vec3(f19)); vec3 v105; v105.xyz = mix(v32,v30,vec3(f19)); vec3 v106; v106.xyz = (-v105); vec3 v107; v107.xyz = (-v33); vec3 v108; v108.xyz = (v28+(-v50)); float f109; f109 = length(v108); vec3 v110; v110.xyz = (v108/vec3(max(f109,1.000000e-06))); vec3 v111; v111.xyz = ((u23==2u))?(v77):(v110); vec3 v112; v112.xyz = mix(v100,v86,vec3(float((u24==2u)))); vec3 v113; v113.xyz = cross(v111,v112); float f114; f114 = length(v113); vec3 v115; v115.xyz = (v113/vec3(max(f114,1.000000e-06))); vec3 v116; v116.xyz = mix((-v112),cross(v111,v115),vec3(float((u23==2u)))); vec3 v117; v117.xyz = cross(v107,v105); float f118; f118 = length(v117); vec3 v119; v119.xyz = (v117/vec3(max(f118,1.000000e-06))); vec3 v120; v120.xyz = cross(v107,v119); vec3 v121; v121.xyz = cross(v111,v105); float f122; f122 = length(v121); vec3 v123; v123.xyz = (v121/vec3(max(f122,1.000000e-06))); vec3 v124; v124.xyz = cross(v111,v123); vec3 v125; v125.xyz = cross(v110,vec3(0.000000e+00,0.000000e+00,1.000000e+00)); float f126; f126 = length(v125); vec3 v127; v127.xyz = (v125/vec3(max(f126,1.000000e-06))); vec3 v128; v128.xyz = cross(v110,v127); if ((u24==0u)) { if ((u23==4u)) { float f129; f129 = clamp(((dot(v108,v108)*v20.y)+(-v20.z)),0.000000e+00,1.000000e+00); vec3 v130; v130.xyz = cross(v110,vec3(0.000000e+00,0.000000e+00,1.000000e+00)); v103.xyz = mix(v130,v104,vec3(f129)); v102.xyz = mix(cross(v110,v130),v106,vec3(f129)); } else { if ((u23==0u)) { v103.xyz = v104; v102.xyz = v106; } else { if ((u23==1u)) { v103.xyz = v119; v102.xyz = v120; } else { if ((u23==2u)) { v103.xyz = v123; v102.xyz = v124; } else { v103.xyz = v127; v102.xyz = v128; } } } } } else { v103.xyz = v115; v102.xyz = v116; } float f131; f131 = ((f51*f18)+f17); float f132; float f133; f133 = cos(f131); f132 = sin(f131); vec3 v134; v134.xyz = ((v102*vec3(f132))+(v103*vec3(f133))); vec3 v135; v135.xyz = ((v102*vec3(f133))+(-(v103*vec3(f132)))); //const float2 Size = abs(Intermediates.SizeRotSubImage.xy); // Vertex position //const float2x3 Tangents = float2x3(Intermediates.TangentRight, Intermediates.TangentUp); //const float3 VertexOffset = CameraOffset + mul(Size * (UVForPosition - PivotOffset), Tangents); //Intermediates.VertexWorldPosition = ParticlePosition + VertexOffset; vec2 v136; v136.xy = (abs(v98.xy)*(in_ATTRIBUTE0+(-v87))); v43.xyz = (v50+(v40+((v135*v136.yyy)+(v134*v136.xxx)))); // SubUV. float f137; f137 = fract(f66); float f138; f138 = ((i7==1))?(f137):(0.000000e+00); // CalcTangentBasis mat3 m139; m139[0].xyz = v134; m139[1].xyz = v135; if ((f16<5.000000e-01)) { m139[2].xyz = normalize(cross(m139[0],m139[1])); } else { if ((f16<1.500000e+00)) { m139[2].xyz = normalize((v50+(-vc4_h[0].xyz))); } else { m139[2].xyz = normalize((v50+(-(vc4_h[0].xyz+(vec3(dot(vc4_h[1].xyz,(v50+(-vc4_h[0].xyz))))*vc4_h[1].xyz))))); } } m39 = m139; // float4 WorldPositionExcludingWPO = VertexFactoryGetWorldPosition(Input, VFIntermediates); vec4 v140; v140.w = 1.000000e+00; v140.xyz = (v43+v26); v38.xyzw = v140; // half3 WorldPositionOffset = GetMaterialWorldPositionOffset(VertexParameters); // WorldPosition.xyz += WorldPositionOffset; v38.xyz = (v140.xyz+vec3(0.000000e+00,0.000000e+00,0.000000e+00)); // Output.Position = mul(RasterizedWorldPosition, ResolvedView.TranslatedWorldToClip); vec4 v141; v141.xyzw = (vc0_h[3]+((vc0_h[2]*v38.zzzz)+((vc0_h[1]*v38.yyyy)+(vc0_h[0]*v38.xxxx)))); // #if USE_PS_CLIP_PLANE // Output.BasePassInterpolants.OutClipDistance = dot(ResolvedView.GlobalClippingPlane, float4(WorldPosition.xyz - ResolvedView.PreViewTranslation.xyz, 1)); // #endif v37.xyzw = v38; vec4 v142; v142.w = 1.000000e+00; v142.xyz = (v38.xyz+(-v26)); float f143; f143 = dot(vc0_h[12],v142); // // CalculateHeightFog // vec3 v144; // v144.xyz = (v38.xyz+(-v27)); //(vec3(0,0,0)+(-v27)); //(v38.xyz+(-v27)); // mediump float h145; // mediump vec3 v146; // float f147; // float f148; // float f149; // float f150; // vec3 v151; // mediump float h152; // h152 = vc1_h[2].w; // float f153; // f153 = min(v28.z,vc1_h[0].z); // vec3 v154; // v154.xy = v28.xy; // v154.z = f153; // v151.xyz = v144; // v151.z = (v144.z+(v28.z+(-f153))); // float f155; // f155 = dot(v151,v151); // float f156; // f156 = inversesqrt(f155); // float f157; // f157 = (f155*f156); // mediump vec3 v158; // v158.xyz = (v151*vec3(f156)); // f150 = vc1_h[0].x; // f149 = vc1_h[1].x; // f148 = f157; // f147 = v151.z; // float f159; // f159 = max(0.000000e+00,vc1_h[0].w); // if ((f159>0.000000e+00)) // { // float f160; // f160 = (f159*f156); // float f161; // f161 = (f160*v151.z); // float f162; // f162 = (v154.z+f161); // f148 = ((1.000000e+00+(-f160))*f157); // f147 = (v151.z+(-f161)); // f150 = (vc1_h[3].x*exp2((-max(-1.270000e+02,(vc1_h[0].y*(f162+(-vc1_h[3].y))))))); // f149 = (vc1_h[1].z*exp2((-max(-1.270000e+02,(vc1_h[1].y*(f162+(-vc1_h[1].w))))))); // } // float f163; // float f164; // f164 = max(-1.270000e+02,(vc1_h[0].y*f147)); // float f165; // f165 = ((1.000000e+00+(-exp2((-f164))))/f164); // float f166; // f166 = (6.931472e-01+(-(2.402265e-01*f164))); // float f167; // f167 = abs(f164); // float f168; // f168 = ((f167>1.000000e-02))?(f165):(f166); // f163 = (f150*f168); // float f169; // f169 = max(-1.270000e+02,(vc1_h[1].y*f147)); // float f170; // f170 = ((1.000000e+00+(-exp2((-f169))))/f169); // float f171; // f171 = (6.931472e-01+(-(2.402265e-01*f169))); // float f172; // f172 = abs(f169); // float f173; // f173 = ((f172>1.000000e-02))?(f170):(f171); // float f174; // f174 = (f163+(f149*f173)); // float f175; // f175 = (f174*f148); // mediump vec3 v176; // v176.xyz = vc1_h[2].xyz; // v146.xyz = vec3(0.000000e+00,0.000000e+00,0.000000e+00); // if ((vc1_h[4].w>=0.000000e+00)) // { // v146.xyz = ((vc1_h[5].xyz*vec3(pow(clamp(dot(v158,vc1_h[4].xyz),0.000000e+00,1.000000e+00),vc1_h[5].w)))*vec3((1.000000e+00+(-clamp(exp2((-(f174*max((f148+(-vc1_h[4].w)),0.000000e+00)))),0.000000e+00,1.000000e+00))))); // } // h145 = max(clamp(exp2((-f175)),0.000000e+00,1.000000e+00),h152); // if (((vc1_h[3].w>0.000000e+00)&&(f157>vc1_h[3].w))) // { // h145 = 1.000000e+00; // v146.xyz = vec3(0.000000e+00,0.000000e+00,0.000000e+00); // } // mediump vec4 v177; // v177.xyz = ((v176*vec3((1.000000e+00+(-h145))))+v146); // v177.w = h145; // v36.xyzw = v177; v36.xyzw = vec4(0,0,0,0); // Output.FactoryInterpolants = VertexFactoryGetInterpolants(Input, VFIntermediates, VertexParameters); vec4 v178; v178.xyzw = vec4(0.000000e+00,0.000000e+00,0.000000e+00,0.000000e+00); v178.xyz = m139[0]; v178.w = f138; float f179; f179 = determinant(m39); vec4 v180; v180.xyz = m139[2]; v180.w = sign(f179); // Output.BasePassInterpolants.PixelPosition.w = Output.Position.w; v37.w = v141.w; v35.xyzw = v141; var_TEXCOORD10.xyzw = v178; // TANGENTTOWORLD0 var_TEXCOORD11.xyzw = v180; // TANGENTTOWORLD2 var_TEXCOORD7.xyzw = v36; // Fog var_TEXCOORD8.xyzw = v37; // PixelPosition var_OUTCLIPDIST = f143; // OutCLipDistance compiler_internal_AdjustOutputSemantic(v35); gl_Position.xyzw = v35; }
Hardware: Mali-G71 r0p1 Architecture: Bifrost Driver: r36p0-00rel0 Shader type: OpenGL ES Vertex Main shader =========== Position variant ---------------- Work registers: 47 Uniform registers: 128 Stack spilling: false 16-bit arithmetic: 0% A LS T Bound Total instruction cycles: 21.33 7.00 0.00 A Shortest path cycles: 8.50 7.00 0.00 A Longest path cycles: 15.93 7.00 0.00 A A = Arithmetic, LS = Load/Store, T = Texture Varying variant --------------- Work registers: 50 Uniform registers: 128 Stack spilling: false 16-bit arithmetic: 0% A LS T Bound Total instruction cycles: 25.00 17.00 0.00 A Shortest path cycles: 10.17 17.00 0.00 LS Longest path cycles: 18.27 17.00 0.00 A A = Arithmetic, LS = Load/Store, T = Texture Shader properties ================= Has uniform computation: true
So, I just want to ask
1. why calculate heigh fog affect LS even if the Position Variant?
2. "if.. else.." Branching seems like do not affect vertex buffer Loading but can braching TexelFetch(), Is it possible that I can slot some optional data like "custom facing", "custom aligning" using TexBuffer to optimize the shortest path?
The compiler team have reproduced this and have agreed that this is not optimal, but it is how the current compiler is designed to work. We've raised a ticket to improve this in a future release, but I don't have any committed schedule I can share.
Kind regards, Pete