00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012 #include "test_compute.h"
00013 #ifdef USE_CL
00015
00016
00017
00018
00019 bool NBodySim::Init()
00020 {
00021
00022 dT = 0.01f;
00023 espSqr = 300.0;
00024 initPos = NULL;
00025 initVel = NULL;
00026 vel = NULL;
00027 total_flops = 0;
00028
00029 start = end = 0;
00030 kernel_time = flops = total_flops = 0.0;
00031
00032 numBodies = BODIES;
00033
00034
00035 if(numBodies < groupSize)
00036 numBodies = groupSize;
00037 numBodies = (numBodies / groupSize) * groupSize;
00038
00039
00040 initPos = new cl_float[4 * numBodies];
00041 initVel = new cl_float[4 * numBodies];
00042
00043 #if defined (_WIN32)
00044 pos = (cl_float*)_aligned_malloc(numBodies * sizeof(cl_float4), 16);
00045 #else
00046 pos = (cl_float*)memalign(16, numBodies * sizeof(cl_float4));
00047 #endif
00048 if(pos == NULL)
00049 {
00050 ShowMessage("OpenCL: Failed to allocate host memory. (pos)", false);
00051 return false;
00052 }
00053
00054 #if defined (_WIN32)
00055 vel = (cl_float*)_aligned_malloc(numBodies * sizeof(cl_float4), 16);
00056 #else
00057 vel = (cl_float*)memalign(16, numBodies * sizeof(cl_float4));
00058 #endif
00059 if(vel == NULL)
00060 {
00061 ShowMessage("OpenCL: Failed to allocate host memory. (vel)", false);
00062 return false;
00063 }
00064
00065
00066 for(unsigned i = 0; i < numBodies; ++i)
00067 {
00068
00069 int shift, mass;
00070 if(i < numBodies/2)
00071 {
00072 mass = 500;
00073 shift = 0;
00074 }
00075 else
00076 {
00077 mass = 1000;
00078 shift = -40;
00079 }
00080
00081 int index = 4 * i;
00082
00083 for(int j = 0; j < 3; ++j)
00084 initPos[index + j] = float(rand()%50 + shift);
00085
00086 initPos[index + 3] = float(rand()%mass);
00087
00088 for(int j = 0; j < 3; ++j)
00089 initVel[index + j] = 0.0f;
00090
00091 initVel[3] = 0.0f;
00092 }
00093
00094 memcpy(pos, initPos, 4 * numBodies * sizeof(cl_float));
00095 memcpy(vel, initVel, 4 * numBodies * sizeof(cl_float));
00096
00097
00098 if(!InitCL())
00099 return false;
00100
00101
00102 updatedPos = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, numBodies * sizeof(cl_float4), pos, &status);
00103 assert(status == CL_SUCCESS);
00104
00105
00106 updatedVel = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, numBodies * sizeof(cl_float4), vel, &status);
00107 assert(status == CL_SUCCESS);
00108
00109 return true;
00110 }
00111
00112
00113
00114
00115 bool NBodySim::SetupKernelArgs()
00116 {
00117 cl_int status;
00118
00119 status = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&updatedPos);
00120 assert(status == CL_SUCCESS);
00121
00122
00123 status = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&updatedVel);
00124 assert(status == CL_SUCCESS);
00125
00126
00127 status = clSetKernelArg(kernel, 2, sizeof(cl_int), (void *)&numBodies);
00128 assert(status == CL_SUCCESS);
00129
00130
00131 status = clSetKernelArg(kernel, 3, sizeof(cl_float), (void *)&dT);
00132 assert(status == CL_SUCCESS);
00133
00134
00135 status = clSetKernelArg(kernel, 4, sizeof(cl_float), (void *)&espSqr);
00136 assert(status == CL_SUCCESS);
00137
00138
00139 status = clSetKernelArg(kernel, 5, groupSize * 4 * sizeof(float), NULL);
00140 assert(status == CL_SUCCESS);
00141
00142 return true;
00143 }
00144
00145
00146
00147
00148 bool NBodySim::Run()
00149 {
00150 cl_event events[2];
00151
00152
00153 size_t threads = (size_t)numBodies;
00154 status = clEnqueueNDRangeKernel(commandQueue, kernel, 1, NULL, &threads, &groupSize, 0, NULL, &events[0]);
00155 if(status != CL_SUCCESS)
00156 return false;
00157
00158 clFinish(commandQueue);
00159
00160
00161
00162 status = clWaitForEvents(1, &events[0]);
00163
00164 clGetEventProfilingInfo(events[0], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL);
00165 clGetEventProfilingInfo(events[0], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL);
00166 kernel_time = (end - start) * 1.0e-6f;
00167 flops = ( BODIES * BODIES * 21 * 1000.0f/kernel_time)/1000000000.0f;
00168 total_flops += flops;
00169 clReleaseEvent(events[0]);
00170
00171
00172 status = clEnqueueReadBuffer(commandQueue, updatedPos, CL_TRUE, 0, numBodies* sizeof(cl_float4), pos, 0, NULL, &events[1]);
00173
00174
00175 status = clWaitForEvents(1, &events[1]);
00176 clReleaseEvent(events[1]);
00177
00178 return true;
00179 }
00180
00181
00182
00183
00184 bool NBodySim::Destroy()
00185 {
00186 cl_int status = TCompute::Destroy();
00187 status |= clReleaseMemObject(updatedPos);
00188 status |= clReleaseMemObject(updatedVel);
00189
00190
00191 if(initPos)
00192 delete [] initPos;
00193
00194 if(initVel)
00195 delete [] initVel;
00196
00197 if(pos)
00198 {
00199 #if defined (_WIN32)
00200
00201 free(pos);
00202 #else
00203 free(pos);
00204 #endif
00205 }
00206 if(vel)
00207 {
00208 #if defined (_WIN32)
00209
00210 free(vel);
00211 #else
00212 free(vel);
00213 #endif
00214 }
00215
00216 if(status == CL_SUCCESS)
00217 return true;
00218 else
00219 return false;
00220 }
00221
00222 #endif