CMSIS DSP arm_fir_decimate function usage

I want to decimate a signal with a decimation factor M=4

I used arm_fir_decimate_q15 function for the same but am unable to get correct results.

Following is my code

#include <stdio.h>
#include "arm_math.h"

#define DECIMATOR_BLOCK_SIZE 32
#define DECIMATOR_NO_OF_TAPS_FIR 4
#define DECIMATION_FACTOR 4

int main(void)
{
    arm_fir_decimate_instance_q15 decimate_inst;
    q15_t m_decimator_state[DECIMATOR_NO_OF_TAPS_FIR + DECIMATOR_BLOCK_SIZE - 1];
    const q15_t decimator_coeffs[DECIMATOR_NO_OF_TAPS_FIR] = {0, 0, 0, 1}; // In time reverse order
	
    int16_t src_buff_1[DECIMATOR_BLOCK_SIZE] = {859, -292, 356, -510, -835, -134, 921, -932, 868, -315, -933, 230, -513, -834, -567, 525, -87, 351, 754, 26, -132, 517, -726, -71, -749, 169, 792, -229, -500, -397, 717, 108};
	int16_t src_buff_2[DECIMATOR_BLOCK_SIZE] = {-35, -719, 264, 664, -426, 708, -670, -56, 287, 768, -704, 405, 316, 744, 862, -44, -895, 40, 540, -31, -257, -941, 795, -617, -712, 449, -853, -854, 470, 5, -194, 882};
	volatile int16_t dst_buff[DECIMATOR_BLOCK_SIZE / DECIMATION_FACTOR] = { 0 };

    arm_status ret_status;

	ret_status = arm_fir_decimate_init_q15(&decimate_inst,
										   DECIMATOR_NO_OF_TAPS_FIR,
										   DECIMATION_FACTOR,
										   &decimator_coeffs[0],
										   &m_decimator_state[0],
										   DECIMATOR_BLOCK_SIZE);
	if (ret_status != ARM_MATH_SUCCESS)
	{
		printf("---- arm_fir_decimate_init_q15 failed ----\n");
	}

	arm_fir_decimate_q15(&decimate_inst,
							 (q15_t*)src_buff_1,
							 (q15_t*)dst_buff,
							 DECIMATOR_BLOCK_SIZE);

	return 0;
}

Output

{0, -1, 0, -1, -1, -1, -1, -1}

What could be going wrong?