I'm trying to use OpenCL printf on an XU4 with the r9p0 OpenCL drivers. These report as OpenCL version 1.2, so my understanding is that printf should be supported without the need to use extensions, is this the case?
The following code works on OSX but not on the XU4:
#include <stdio.h>
#ifdef __APPLE__
#include <OpenCL/opencl.h>
#else
#include <CL/cl.h>
#endif
const char *hello = "\n" \
"__kernel void hello(void) \n" \
"{ \n" \
" printf(\"Hello world\\n\"); \n" \
"} \n" \
"\n";
int main(int argc, char** argv)
{
size_t global = 1;
char s[1024];
cl_device_id device_id;
clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
cl_context context = clCreateContext(0, 1, &device_id, NULL, NULL, NULL);
cl_command_queue queue = clCreateCommandQueue(context, device_id, 0, NULL);
cl_program program = clCreateProgramWithSource(context, 1, (const char **) &hello, NULL, NULL);
clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
cl_kernel kernel = clCreateKernel(program, "hello", NULL);
clGetPlatformInfo(NULL, CL_PLATFORM_VENDOR, 1024, s, NULL); printf("Vendor: %s\n", s);
clGetPlatformInfo(NULL, CL_PLATFORM_NAME, 1024, s, NULL); printf("Name: %s\n", s);
clGetPlatformInfo(NULL, CL_PLATFORM_PROFILE, 1024, s, NULL); printf("Profile: %s\n", s);
clGetPlatformInfo(NULL, CL_PLATFORM_VERSION, 1024, s, NULL); printf("Version: %s\n", s);
clGetPlatformInfo(NULL, CL_PLATFORM_EXTENSIONS, 1024, s, NULL); printf("Extensions: %s\n", s);
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, NULL, 0, NULL, NULL);
clFinish(queue);
printf("Finished\n");
return 0;
}
On OSX:
Simons-iMac:src simonj (master)$ gcc test_printf.c -framework OpenCL
Simons-iMac:src simonj (master)$ ./a.out
Vendor: Apple
Name: Apple
Profile: FULL_PROFILE
Version: OpenCL 1.2 (Feb 7 2016 15:43:50)
Extensions: cl_APPLE_SetMemObjectDestructor cl_APPLE_ContextLoggingFunctions cl_APPLE_clut cl_APPLE_query_kernel_names cl_APPLE_gl_sharing cl_khr_gl_event
Hello world
Finished
On XU4:
simonj@xu4:~/new_world/xps/src$ gcc test_printf.c -lmali
simonj@xu4:~/new_world/xps/src$ ./a.out
Vendor: ARM
Name: ARM Platform
Version: OpenCL 1.2 v1.r9p0-05rel0.816303d14b549c8bed2bad5983436ff4
Extensions: cl_khr_global_int32_base_atomics cl_khr_global_int32_extended_atomics cl_khr_local_int32_base_atomics cl_khr_local_int32_extended_atomics cl_khr_byte_addressable_store cl_khr_3d_image_writes cl_khr_fp64 cl_khr_int64_base_atomics cl_khr_int64_extended_atomics cl_khr_fp16 cl_khr_gl_sharing cl_khr_icd cl_khr_egl_event cl_khr_egl_image cl_arm_core_id cl_arm_printf cl_arm_thread_limit_hint cl_arm_non_uniform_work_group_size cl_arm_import_memory
Kind regards
Simon
Hi Anthony,
Thanks for the fast response, I've modified the test code and it now produces the expected result on both platforms, code below.
Kind regards,
#define CL_PRINTF_CALLBACK_ARM 0x40B0
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
void printf_callback( const char *buffer, size_t len, size_t complete, void *user_data )
printf( "%.*s", (int)len, buffer );
cl_platform_id platform_id; clGetPlatformIDs(1, &platform_id, NULL);
cl_device_id device_id; clGetDeviceIDs(NULL, CL_DEVICE_TYPE_GPU, 1, &device_id, NULL);
cl_context_properties properties[] = {
#ifndef __APPLE__
CL_PRINTF_CALLBACK_ARM, (cl_context_properties) printf_callback,
CL_PRINTF_BUFFERSIZE_ARM, (cl_context_properties) 0x100000,
CL_CONTEXT_PLATFORM, (cl_context_properties) platform_id,
0 };
cl_context context = clCreateContext(properties, 1, &device_id, NULL, NULL, NULL);