首页 > 代码库 > cuda中当元素个数超过线程个数时的处理案例
cuda中当元素个数超过线程个数时的处理案例
项目打包下载
当向量元素超过线程个数时的情况
向量元素个数为(33 * 1024)/(128 * 128)=2.x倍
1 /* 2 * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. 3 * 4 * NVIDIA Corporation and its licensors retain all intellectual property and 5 * proprietary rights in and to this software and related documentation. 6 * Any use, reproduction, disclosure, or distribution of this software 7 * and related documentation without an express license agreement from 8 * NVIDIA Corporation is strictly prohibited. 9 *10 * Please refer to the applicable NVIDIA end user license agreement (EULA)11 * associated with this source code for terms and conditions that govern12 * your use of this NVIDIA software.13 *14 */15 16 17 #include "../common/book.h"18 #include "cuda.h"19 #include "cuda_runtime.h"20 #include "device_launch_parameters.h"21 22 #define N (33 * 1024)23 24 __global__ void add(int *a, int *b, int *c) {25 int tid = threadIdx.x + blockIdx.x * blockDim.x;26 while (tid < N) {27 c[tid] = a[tid] + b[tid];28 tid += blockDim.x * gridDim.x;29 }30 }31 32 int main(void) {33 int *a, *b, *c;34 int *dev_a, *dev_b, *dev_c;35 36 // allocate the memory on the CPU37 a = (int*)malloc(N * sizeof(int));38 b = (int*)malloc(N * sizeof(int));39 c = (int*)malloc(N * sizeof(int));40 41 // allocate the memory on the GPU42 HANDLE_ERROR(cudaMalloc((void**)&dev_a, N * sizeof(int)));43 HANDLE_ERROR(cudaMalloc((void**)&dev_b, N * sizeof(int)));44 HANDLE_ERROR(cudaMalloc((void**)&dev_c, N * sizeof(int)));45 46 // fill the arrays ‘a‘ and ‘b‘ on the CPU47 for (int i = 0; i<N; i++) {48 a[i] = i;49 b[i] = 2 * i;50 }51 52 // copy the arrays ‘a‘ and ‘b‘ to the GPU53 HANDLE_ERROR(cudaMemcpy(dev_a, a, N * sizeof(int),54 cudaMemcpyHostToDevice));55 HANDLE_ERROR(cudaMemcpy(dev_b, b, N * sizeof(int),56 cudaMemcpyHostToDevice));57 58 /*59 当向量元素超过线程个数时的情况60 向量元素个数为(33 * 1024)/(128 * 128)=2.x倍61 */62 add << <128, 128 >> >(dev_a, dev_b, dev_c);63 64 // copy the array ‘c‘ back from the GPU to the CPU65 HANDLE_ERROR(cudaMemcpy(c, dev_c, N * sizeof(int),66 cudaMemcpyDeviceToHost));67 68 // verify that the GPU did the work we requested69 bool success = true;70 for (int i = 0; i<N; i++) {71 if ((a[i] + b[i]) != c[i]) {72 printf("Error: %d + %d != %d\n", a[i], b[i], c[i]);73 success = false;74 }75 }76 if (success) printf("We did it!\n");77 78 // free the memory we allocated on the GPU79 HANDLE_ERROR(cudaFree(dev_a));80 HANDLE_ERROR(cudaFree(dev_b));81 HANDLE_ERROR(cudaFree(dev_c));82 83 // free the memory we allocated on the CPU84 free(a);85 free(b);86 free(c);87 88 return 0;89 }
cuda中当元素个数超过线程个数时的处理案例
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。