Hi @ all,
I have a 16 byte permutation mask uint8_t[16] and a 16 byte data array uint32_t[4]. I want to "shuffle" this data array using vtbx.
This is my code so far:
#include <arm_neon.h> #include <iostream> int main() { uint32_t* data32 = new uint32_t[4]; data32[0] = 2570; data32[1] = 655370; data32[2] = 168430090; data32[3] = 10; //load structure uint32x4_t data32Vec = vld1q_u32(data32); uint8_t* sMask = new uint8_t[16]; sMask[0] = 2; sMask[1] = 3; sMask[2] = 5; sMask[3] = 6; sMask[4] = 7; sMask[5] = 8; sMask[6] = 9; sMask[7] = 10; sMask[8] = 11; sMask[9] = 15; sMask[10] = 42; sMask[11] = 42; sMask[12] = 42; sMask[13] = 42; sMask[14] = 42; sMask[15] = 42; uint8x16_t shuffleMask = vld1q_u8(sMask); uint8x8x2_t dataVec8x8x2; dataVec8x8x2.val[0] = vget_low_u8(vreinterpretq_u8_u32(data32Vec)); dataVec8x8x2.val[1] = vget_high_u8(vreinterpretq_u8_u32(data32Vec)); uint8_t* comprData1 = new uint8_t[8]; uint8_t* comprData2 = new uint8_t[8]; uint8x8_t target1, target2; target1 = vtbx2_u8(target1, dataVec8x8x2, vget_low_u8(shuffleMask)); target2 = vtbx2_u8(target2, dataVec8x8x2, vget_high_u8(shuffleMask)); vst1_u8(comprData1, target1); vst1_u8(comprData2, target2); for(int i = 0; i < 8; ++i) { std::cout << (unsigned)comprData1[i] << " " << (unsigned)comprData2[i] <<std::endl; } delete[] comprData1; delete[] comprData2; delete[] sMask; delete[] data32; return 0; }
The output looks like the following:
0 10 0 0 0 162 #<-- this value [162] changes from everytime i run the code 10 190 0 0 10 0 10 0 10 0
But it should look like that:
10 10 10 10 10 0 0 0 10 0 10 0 10 0 10 0
I think it has something to do with endianness, but just don't see the problem. Does anyone has a hint?
Sincerely