首页 > 代码库 > 项目优化之:GPU编程
项目优化之:GPU编程
1GPU编程,依赖于显卡
2GPU变成依赖于OpenGL和direct
3CPU的特点是:频率比较快,GPU的特点是寄存器非常非常的多。
4如果电脑是windows7,没法直接调试GPU。Window8可以直接调试
5用VS2013新建一个项目,命名:GPU
6调试GPU的方式是VS中的:打断点—>运行项目à调试à窗口àGPU线程(通过这种方式实现调试GPU项目)
8.修改项目属性:右击项目à属性à配置属性à常规,修改调试器类型为仅GPU
修改Amp默认快捷键可以选择时时(Use C++ AMP runtime default)的方式,也可以使用软件加速器(WARPsoftware accelerator)的方式,截图
9.代码:
#include<iostream>
#include<amp.h> //GPU编程所需的头文件
usingnamespaceconcurrency;
intmain()
{
intv[11] = {‘G‘,‘d‘,‘k‘,‘k‘,‘n‘, 31,‘v‘,‘n‘,‘q‘,‘k‘,‘c‘ };
array_view<int>av(11,v);//array_view是GPU计算结构,av存储到GPU显存
//=表示直接操作AV
//(index<1> idx)操作每一个元素
//restrict(amp)定位GPU执行
parallel_for_each(av.extent, [=](index<1>idx)restrict(amp)
{
av[idx] += 1;//加完后变成了hello world
});
for (unsignedinti = 0;i < 11;i++)
{
std::cout << static_cast<char>(av[i]);
}
std::cin.get();
return 0;
}
10.CPU,GPU单值计算效率测试
案例:
#include<iostream>
#include<amp.h>
#include<WinBase.h>
#defineCOUNT 100000
floatnickName_GPU[COUNT];
floatnickName_CPU[COUNT];
//GPU并行计算比较占有优势,restrict(amp):限制使用GPU编程
doublerungpu(intnum)restrict(amp)
{
doubletemp = 0;
for (inti = 0;i <num;i++)
{
temp +=i;
}
returntemp;
}
//cpu处理单值计算比较有优势,单点计算比较有优势,只能在GPU内部执行
doubleruncpu(intnum)restrict(cpu)
{
//这是对一个数进行操作
doubletemp = 0;
for (inti = 0;i <num;i++)
{
temp +=i;
}
returntemp;
}
//限制使用GPU或CPU运行
doubleruncpugpu(intnum)restrict(amp,cpu)
{
doubletemp = 0;
for (inti = 0;i <num;i++)
{
temp +=i;
}
returntemp;
}
//测试单值计算的运行效率
intmain()
{
LARGE_INTEGERfreq;
LARGE_INTEGERstrt;
LARGE_INTEGERed;
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&strt);
doubledx[1] = { 0.0 };
double db = 0.0;
concurrency::array_view<double>myview(1,dx);
parallel_for_each(myview.extent,
[=](concurrency::index<1>idx)restrict(amp)
{
myview[idx] += rungpu(1000000);
});
myview.synchronize();//显式等待GPU计算完成并将数据打回内存
printf("%f\n",dx[0]);
QueryPerformanceCounter(&ed);
printf("GPU耗时: %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
QueryPerformanceCounter(&strt);
printf("%f\n",runcpu(1000000));
QueryPerformanceCounter(&ed);
printf("CPU耗时: %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
puts("测试结束");
getchar();
return 0;
}
运行结果:
案例2:
#include<iostream>
#include<amp.h>
#include<WinBase.h>
#defineCOUNT 3000
floatnickName_GPU[COUNT];
floatnickName_CPU[COUNT];
//GPU并行计算比较占有优势,restrict(amp):限制使用GPU编程
doublerungpu(intnum)restrict(amp)
{
double temp = 0;
for (inti = 0;i <num;i++)
{
temp += i;
}
return temp;
}
//cpu处理单值计算比较有优势,单点计算比较有优势,只能在GPU内部执行
doubleruncpu(intnum)restrict(cpu)
{
//这是对一个数进行操作
double temp = 0;
for (inti = 0;i <num;i++)
{
temp += i;
}
return temp;
}
//限制使用GPU或CPU运行
doubleruncpugpu(intnum)restrict(amp,cpu)
{
double temp = 0;
for (inti = 0;i <num;i++)
{
temp += i;
}
return temp;
}
intmain()
{
LARGE_INTEGER freq;
LARGE_INTEGER strt;
LARGE_INTEGER ed;
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&strt);
concurrency::array_view<float> myView(COUNT,nickName_GPU);//将数据打入显存
concurrency::parallel_for_each(myView.extent, [=](concurrency::index<1>idx)restrict(amp)
{
for (inti = 0;i <COUNT / 10;i++)
{
myView[idx] = (myView[idx] + 0.1f) / 2.3f;
}
});
myView.synchronize();//显式等待GPU计算完成并将数据打回内存
QueryPerformanceCounter(&ed);
printf("GPU耗时: %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
QueryPerformanceCounter(&strt);
for (intidx = 0; idx <COUNT; idx++)
{
for (inti = 0;i <COUNT / 10;i++)
{
nickName_CPU[idx] = (nickName_CPU[idx] + 0.1f) /2.3f;
}
}
QueryPerformanceCounter(&ed);
printf("CPU耗时: %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
for (intidx = 0; idx <COUNT; idx++)
{
if (nickName_CPU[idx] != nickName_GPU[idx])
{
puts("CPU和GPU的计算结果不相符!");
getchar();
return 0;
}
}
puts("测试结束");
getchar();
return 0;
}
运行结果:
项目优化之:GPU编程