首页 > 代码库 > 使用常量内存来处理光线跟踪

使用常量内存来处理光线跟踪

 项目打包下载

  1 /*  2 * Copyright 1993-2010 NVIDIA Corporation.  All rights reserved.  3 *  4 * NVIDIA Corporation and its licensors retain all intellectual property and  5 * proprietary rights in and to this software and related documentation.  6 * Any use, reproduction, disclosure, or distribution of this software  7 * and related documentation without an express license agreement from  8 * NVIDIA Corporation is strictly prohibited.  9 * 10 * Please refer to the applicable NVIDIA end user license agreement (EULA) 11 * associated with this source code for terms and conditions that govern 12 * your use of this NVIDIA software. 13 * 14 */ 15  16 #include <GL\glut.h> 17 #include "cuda.h" 18 #include "../common/book.h" 19 #include "../common/cpu_bitmap.h" 20 #include "cuda_runtime.h" 21 #include "device_launch_parameters.h" 22 #include <math.h> 23 #define DIM 1024 24  25 #define rnd( x ) (x * rand() / RAND_MAX) 26 #define INF     2e10f 27  28 struct Sphere { 29     float   r, b, g; 30     float   radius; 31     float   x, y, z; 32     __device__ float hit(float ox, float oy, float *n) { 33         float dx = ox - x; 34         float dy = oy - y; 35         if (dx*dx + dy*dy < radius*radius) { 36             float dz = sqrtf(radius*radius - dx*dx - dy*dy); 37             *n = dz / sqrtf(radius * radius); 38             return dz + z; 39         } 40         return -INF; 41     } 42 }; 43 #define SPHERES 20 44  45 __constant__ Sphere s[SPHERES]; 46  47 __global__ void kernel(unsigned char *ptr) { 48     // map from threadIdx/BlockIdx to pixel position 49     int x = threadIdx.x + blockIdx.x * blockDim.x; 50     int y = threadIdx.y + blockIdx.y * blockDim.y; 51     int offset = x + y * blockDim.x * gridDim.x; 52     float   ox = (x - DIM / 2); 53     float   oy = (y - DIM / 2); 54  55     float   r = 0, g = 0, b = 0; 56     float   maxz = -INF; 57     for (int i = 0; i<SPHERES; i++) { 58         float   n; 59         float   t = s[i].hit(ox, oy, &n); 60         if (t > maxz) { 61             float fscale = n; 62             r = s[i].r * fscale; 63             g = s[i].g * fscale; 64             b = s[i].b * fscale; 65             maxz = t; 66         } 67     } 68  69     ptr[offset * 4 + 0] = (int)(r * 255); 70     ptr[offset * 4 + 1] = (int)(g * 255); 71     ptr[offset * 4 + 2] = (int)(b * 255); 72     ptr[offset * 4 + 3] = 255; 73 } 74  75 // globals needed by the update routine 76 struct DataBlock { 77     unsigned char   *dev_bitmap; 78 }; 79  80 int main(void) { 81     DataBlock   data; 82     // capture the start time 83     cudaEvent_t     start, stop; 84     HANDLE_ERROR(cudaEventCreate(&start)); 85     HANDLE_ERROR(cudaEventCreate(&stop)); 86     HANDLE_ERROR(cudaEventRecord(start, 0)); 87  88     CPUBitmap bitmap(DIM, DIM, &data); 89     unsigned char   *dev_bitmap; 90  91     // allocate memory on the GPU for the output bitmap 92     HANDLE_ERROR(cudaMalloc((void**)&dev_bitmap, 93         bitmap.image_size())); 94  95     // allocate temp memory, initialize it, copy to constant 96     // memory on the GPU, then free our temp memory 97     Sphere *temp_s = (Sphere*)malloc(sizeof(Sphere)* SPHERES); 98     for (int i = 0; i<SPHERES; i++) { 99         temp_s[i].r = rnd(1.0f);100         temp_s[i].g = rnd(1.0f);101         temp_s[i].b = rnd(1.0f);102         temp_s[i].x = rnd(1000.0f) - 500;103         temp_s[i].y = rnd(1000.0f) - 500;104         temp_s[i].z = rnd(1000.0f) - 500;105         temp_s[i].radius = rnd(100.0f) + 20;106     }107     /*108     将SPHERES个球面对象存放在常量内存中109     通过cudaMemcpyToSymbol来操作110     */111     HANDLE_ERROR(cudaMemcpyToSymbol(s, temp_s,112         sizeof(Sphere)* SPHERES));113     free(temp_s);114 115     // generate a bitmap from our sphere data116     dim3    grids(DIM / 16, DIM / 16);117     dim3    threads(16, 16);118     kernel <<<grids, threads >>>(dev_bitmap);119 120     // copy our bitmap back from the GPU for display121     HANDLE_ERROR(cudaMemcpy(bitmap.get_ptr(), dev_bitmap,122         bitmap.image_size(),123         cudaMemcpyDeviceToHost));124 125     // get stop time, and display the timing results126     HANDLE_ERROR(cudaEventRecord(stop, 0));127     HANDLE_ERROR(cudaEventSynchronize(stop));128     float   elapsedTime;129     HANDLE_ERROR(cudaEventElapsedTime(&elapsedTime,130         start, stop));131     printf("Time to generate:  %3.1f ms\n", elapsedTime);132 133     HANDLE_ERROR(cudaEventDestroy(start));134     HANDLE_ERROR(cudaEventDestroy(stop));135 136     HANDLE_ERROR(cudaFree(dev_bitmap));137 138     // display139     bitmap.display_and_exit();140 }

 结果如下所示:

使用常量内存来处理光线跟踪