Hello guys, I am new to ML/AI as a start I am trying to run a convolution layer on M4 by using CMSIS-NN function calls and comparing the output results with a hand calculated outputs for the same inputs and kernel.
I am using a arm_convolve_HWC_q7_basic cmsis-nn function for my convolution layer test and I am successfully able to infer the convolution layer with one channel i.e with cmsis-nn function call and the outputs are exactly matching with my hand calculation outputs.
But the tough part is when i am trying to run the same test with increase in INPUT-channels (greater than one) the outputs of the arm_convolve_HWC_q7_basic function outputs are not matching with my hand calculated outputs.
Please refer to my attached example reference and my code below and help me out with the missing solution thanks in advance.FYI... I took the same inputs, kernels as shown in the example reference just to make life easy i.e to compare outputs easily.#include <stdint.h> #include <stdlib.h> #include "CMSIS_5/CMSIS/NN/Include/arm_nn_math_types.h" #include "CMSIS_5/CMSIS/NN/Include/arm_nnfunctions.h" #define CONV_WT_M4 {1, 1, 1, -1, 0, 0, 1, 1, 1, 0, 0, -1, -1, 1, -1, 0, 1, 1, -1, 0, 0, 0, 1, 0, -1, -1, 0} const int8_t in_data[75] = { 1, 1, 0, 1, 1, 1, 1, 1, 2, 1, 1, 2, 0, 0, 0, 0, 2, 1, 1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 2, 2, 2, 0, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 0, 1, 1, 0, 0, 2, 1, 0, 1, 2, 0, 2, 1, 1, 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 0, 0}; #define CONV_BIAS_M4 {1} const int8_t expected_out_data[9] = { 6, 4, 3, 5, 5, 6, 1, -3, 2}; #define CONV_IN_DIM_M4 5 #define CONV_IN_CH_M4 3 #define CONV_KER_DIM_M4 3 #define CONV_PAD_M4 1 #define CONV_STRIDE_M4 2 #define CONV_OUT_CH_M4 1 #define CONV_OUT_DIM_M4 3 #define CONV_BIAS_LSHIFT_M4 0 #define CONV_OUT_RSHIFT_M4 0 static const q7_t conv2_wt[CONV_IN_CH_M4*CONV_KER_DIM_M4*CONV_KER_DIM_M4*CONV_OUT_CH_M4] = CONV_WT_M4; static const q7_t conv2_bias[CONV_OUT_CH_M4] = CONV_BIAS_M4; q7_t output_data[CONV_OUT_DIM_M4]; q15_t col_buffer[5000]; q7_t scratch_buffer[27200]; void main(void) { q7_t* buffer1 = scratch_buffer; arm_convolve_HWC_q7_basic((q7_t*)in_data, CONV_IN_DIM_M4, CONV_IN_CH_M4, conv2_wt, CONV_OUT_CH_M4, CONV_KER_DIM_M4, CONV_PAD_M4, CONV_STRIDE_M4, conv2_bias, CONV_BIAS_LSHIFT_M4, CONV_OUT_RSHIFT_M4, buffer1, CONV_OUT_DIM_M4, (q15_t*)col_buffer, NULL); printf("INPUTS\n"); for (int i=0; i< CONV_IN_CH_M4; i++) { printf("channel %d\n",i); for (int j=0; j< CONV_IN_DIM_M4; j++) { for (int k=0; k< CONV_IN_DIM_M4; k++) { printf("%4d ",in_data[i * CONV_IN_DIM_M4 * CONV_IN_DIM_M4 + j * CONV_IN_DIM_M4 + k]); } printf("\n"); } } printf("WEIGHTS\n"); for (int i=0; i< CONV_OUT_CH_M4*CONV_IN_CH_M4; i++) { printf("channel %d\n",i); for (int j=0; j< CONV_KER_DIM_M4; j++) { for (int k=0; k< CONV_KER_DIM_M4; k++) { printf("%4d ",conv2_wt[i * CONV_KER_DIM_M4 * CONV_KER_DIM_M4 + j * CONV_KER_DIM_M4 + k]); } printf("\n"); } } printf("Expected OUTPUTS\n"); for (int i=0; i< CONV_OUT_CH_M4; i++) { printf("channel %d\n",i); for (int j=0; j< CONV_OUT_DIM_M4; j++) { for (int k=0; k< CONV_OUT_DIM_M4; k++) { printf("%4d ",expected_out_data[i * CONV_OUT_DIM_M4 * CONV_OUT_DIM_M4 + j * CONV_OUT_DIM_M4 + k]); } printf("\n"); } } printf("\nM4 OUTPUTS\n"); for (int i=0; i< CONV_OUT_CH_M4; i++) { printf("channel %d\n",i); for (int j=0; j< CONV_OUT_DIM_M4; j++) { for (int k=0; k< CONV_OUT_DIM_M4; k++) { printf("%4d ",buffer1[i * CONV_OUT_DIM_M4 * CONV_OUT_DIM_M4 + j * CONV_OUT_DIM_M4 + k]); } printf("\n"); } } if(!memcmp(expected_out_data,buffer1,CONV_OUT_DIM_M4*CONV_OUT_DIM_M4*CONV_OUT_CH_M4)) { printf("Passed\n"); } else { printf("Failed\n"); } }
#include <stdint.h> #include <stdlib.h> #include "CMSIS_5/CMSIS/NN/Include/arm_nn_math_types.h" #include "CMSIS_5/CMSIS/NN/Include/arm_nnfunctions.h" #define CONV_WT_M4 {1, 1, 1, -1, 0, 0, 1, 1, 1, 0, 0, -1, -1, 1, -1, 0, 1, 1, -1, 0, 0, 0, 1, 0, -1, -1, 0} const int8_t in_data[75] = { 1, 1, 0, 1, 1, 1, 1, 1, 2, 1, 1, 2, 0, 0, 0, 0, 2, 1, 1, 2, 1, 2, 1, 1, 0, 2, 2, 1, 0, 2, 2, 2, 0, 1, 1, 1, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 0, 1, 1, 0, 0, 2, 1, 0, 1, 2, 0, 2, 1, 1, 1, 0, 2, 2, 1, 1, 1, 1, 1, 1, 2, 0, 0, 0}; #define CONV_BIAS_M4 {1} const int8_t expected_out_data[9] = { 6, 4, 3, 5, 5, 6, 1, -3, 2}; #define CONV_IN_DIM_M4 5 #define CONV_IN_CH_M4 3 #define CONV_KER_DIM_M4 3 #define CONV_PAD_M4 1 #define CONV_STRIDE_M4 2 #define CONV_OUT_CH_M4 1 #define CONV_OUT_DIM_M4 3 #define CONV_BIAS_LSHIFT_M4 0 #define CONV_OUT_RSHIFT_M4 0 static const q7_t conv2_wt[CONV_IN_CH_M4*CONV_KER_DIM_M4*CONV_KER_DIM_M4*CONV_OUT_CH_M4] = CONV_WT_M4; static const q7_t conv2_bias[CONV_OUT_CH_M4] = CONV_BIAS_M4; q7_t output_data[CONV_OUT_DIM_M4]; q15_t col_buffer[5000]; q7_t scratch_buffer[27200]; void main(void) { q7_t* buffer1 = scratch_buffer; arm_convolve_HWC_q7_basic((q7_t*)in_data, CONV_IN_DIM_M4, CONV_IN_CH_M4, conv2_wt, CONV_OUT_CH_M4, CONV_KER_DIM_M4, CONV_PAD_M4, CONV_STRIDE_M4, conv2_bias, CONV_BIAS_LSHIFT_M4, CONV_OUT_RSHIFT_M4, buffer1, CONV_OUT_DIM_M4, (q15_t*)col_buffer, NULL); printf("INPUTS\n"); for (int i=0; i< CONV_IN_CH_M4; i++) { printf("channel %d\n",i); for (int j=0; j< CONV_IN_DIM_M4; j++) { for (int k=0; k< CONV_IN_DIM_M4; k++) { printf("%4d ",in_data[i * CONV_IN_DIM_M4 * CONV_IN_DIM_M4 + j * CONV_IN_DIM_M4 + k]); } printf("\n"); } } printf("WEIGHTS\n"); for (int i=0; i< CONV_OUT_CH_M4*CONV_IN_CH_M4; i++) { printf("channel %d\n",i); for (int j=0; j< CONV_KER_DIM_M4; j++) { for (int k=0; k< CONV_KER_DIM_M4; k++) { printf("%4d ",conv2_wt[i * CONV_KER_DIM_M4 * CONV_KER_DIM_M4 + j * CONV_KER_DIM_M4 + k]); } printf("\n"); } } printf("Expected OUTPUTS\n"); for (int i=0; i< CONV_OUT_CH_M4; i++) { printf("channel %d\n",i); for (int j=0; j< CONV_OUT_DIM_M4; j++) { for (int k=0; k< CONV_OUT_DIM_M4; k++) { printf("%4d ",expected_out_data[i * CONV_OUT_DIM_M4 * CONV_OUT_DIM_M4 + j * CONV_OUT_DIM_M4 + k]); } printf("\n"); } } printf("\nM4 OUTPUTS\n"); for (int i=0; i< CONV_OUT_CH_M4; i++) { printf("channel %d\n",i); for (int j=0; j< CONV_OUT_DIM_M4; j++) { for (int k=0; k< CONV_OUT_DIM_M4; k++) { printf("%4d ",buffer1[i * CONV_OUT_DIM_M4 * CONV_OUT_DIM_M4 + j * CONV_OUT_DIM_M4 + k]); } printf("\n"); } } if(!memcmp(expected_out_data,buffer1,CONV_OUT_DIM_M4*CONV_OUT_DIM_M4*CONV_OUT_CH_M4)) { printf("Passed\n"); } else { printf("Failed\n"); } }
bob_tyson Thanks for the question and welcome to ML!
Since the code for generating the reference data is missing, I am unable to say where things go wrong. Are the data formats(HWC vs CHW) of the input and filter the same as used by arm_convolve_HWC_q7_basic ?
The function that you are tying to use is not supported any more and is part of the Legacy API. You can find information about that in https://github.com/ARM-software/CMSIS_5/tree/develop/CMSIS/NN#legacy-vs-tfl-micro-compliant-apis
The API's that are are active are the ones compatible with TensorFlow Lite for Microcontroller. You can use the example in https://github.com/ARM-software/CMSIS_5/tree/develop/CMSIS/NN/Examples to get started on that.
I hope that helps and please do not hesitate with any follow ups.
Thanks felix, I am able to test the conv layer and the hand calculation is matching, To answer your question I am using the same data formats (HWC as mentioned in the source developer.arm.com/.../Compare-the-ML-framework-and-CMSIS-NN-data-layouts)