The NVIDIA libcudacxx project does verification of both CPU and GPU results to ensure that computations are hopefully repeatable from either processor. We've determined that there is some rounding error in the implicitly linked amath.so. Specifically when testing cbrtf. I did not determine other exponent cmath functions to have the same issue.
I am unable to attach C++ files, so the code is pasted below. Sorry if there are any formatting issues.
/**********************************************************************************************
FAILING CASE
Compiled with:
$ /home/coder/armclang/24.10/arm-linux-compiler-24.10.1_Ubuntu-22.04/bin/armclang++ test.cpp \
-std=c++20 -O3 -nostdlib -L../armclang/24.10/arm-linux-compiler-24.10.1_Ubuntu-22.04/lib -lc -lamath -lgcc
$ ldd a.out
linux-vdso.so.1 (0x00007daa87514000)
libc.so.6 => /lib/aarch64-linux-gnu/libc.so.6 (0x00007daa86ae0000)
/lib/ld-linux-aarch64.so.1 (0x00007daa874c0000)
libamath.so => not found
$ LD_LIBRARY_PATH=/home/coder/armclang/24.10/arm-linux-compiler-24.10.1_Ubuntu-22.04/lib ./a.out
0X40000000 (expected)
0X40000001 (result)
********************************************************************************************
PASSING CASE
-std=c++20 -O3 -nostdlib -L../armclang/24.10/arm-linux-compiler-24.10.1_Ubuntu-22.04/lib -lc -lm -lgcc
linux-vdso.so.1 (0x0000717b8130b000)
libc.so.6 => /lib/aarch64-linux-gnu/libc.so.6 (0x0000717b808d0000)
libm.so.6 => /lib/aarch64-linux-gnu/libm.so.6 (0x0000717b80830000)
/lib/ld-linux-aarch64.so.1 (0x0000717b812b0000)
$ ./a.out
0X40000000 (result)
**********************************************************************************************/
#include <cmath>
#include <stdio.h>
int main();
extern "C" void _start() {
main();
exit(0);
}
int main() {
using T = float;
volatile float val = 64.0f / 8.0f;
auto result = (float)cbrtf(val);
auto expected = (float)T(2);
auto print4 = [](const char* v) {
printf("%#.2hhX%.2hhX%.2hhX%.2hhX\n", v[3], v[2], v[1], v[0]);
};
print4((const char*)&expected);
print4((const char*)&result);
return 0;