I wrote a test program to call vector function from armpl directly. However element 4 - 7 doesn't look right to me. Is there anything wrong with the program?
Regards,
#include <math.h> #include <stdio.h> #include <stdlib.h> #include <arm_sve.h> #include <armpl.h> svfloat32_t _ZGVsMxv_expf(svfloat32_t a, svbool_t pg); int main() { int width = 16; float* src_data = (float *)malloc((width) * sizeof(float)); float* dst_data = (float *)malloc((width) * sizeof(float)); // Fill src_data with small values [-100, 100] float min = -10.f; float max = 10.f; for(int w = 0; w < width; w++) { double r = (double)random()/RAND_MAX; src_data[w] = min + (max - min) * r; } float *src, *dst; // SVE svbool_t pred; svfloat32_t sva, svc; int i = 0; src = src_data; dst = dst_data; for(int w = 0; w < width; w+=svcntw()) { pred = svwhilelt_b32(w, width); sva = svld1(pred, src+w); svc = _ZGVsMxv_expf(sva, pred); svst1(pred, dst+w, svc); } for(int w = 0; w < width; w++) { printf("%d %f %f %f\n", w, src_data[w], expf(src_data[w]), dst_data[w]); } return 0; }
compile -
gcc-10.2 -O3 -march=armv8.4-a+sve -o test_sve test_expf_sve.c -lamath -lm
result -
0 6.803754 901.224426 901.224365 1 -2.112341 0.120954 0.120954 2 5.661984 287.719025 287.718994 3 5.968801 391.036346 391.036377 4 8.232947 3762.907959 1.000000 5 -6.048973 0.002360 1.000000 6 -3.295545 0.037048 1.000000 7 5.364592 213.704041 1.000000 8 -4.444506 0.011743 0.011743 9 1.079399 2.942911 2.942911 10 -0.452059 0.636317 0.636317 11 2.577419 13.163115 13.163114 12 -2.704311 0.066916 0.066916 13 0.268018 1.307371 1.307371 14 9.044595 8472.617188 8472.617188 15 8.323901 4121.206055 4121.206055
Do you have an instruction on how to update to 2.32.1 on ubuntu 18.04?
I have tried to load binutils/11.2.0 instead acfl and that also solves this problem.
I believe your best option is to load the module binutils/11.2.0, like you just did.
Sorry I have no particular instructions on how to update binutils on ubuntu, but I imagine apt will do the job. Otherwise installing from sources, but then again I would probably just use the binutils module mentioned above instead.