首页 > 代码库 > cubla sample-code
cubla sample-code
cublasSscal
//Example 1. Application Using C and CUBLAS: 1-based indexing#include <stdlib.h>#include <math.h>#include <cuda_runtime.h>#include "cublas_v2.h"#include <stdio.h>#define M 6#define N 5#define IDX2F(i,j,ld) ((((j)-1)*(ld))+((i)-1))static __inline__ void modify (cublasHandle_t handle, float*m, int ldm, int n, int p, int q, float alpha, float beta){ cublasSscal (handle, n-p+1, &alpha, &m[IDX2F(p,q,ldm)], ldm); cublasSscal (handle, ldm-p+1, &beta, &m[IDX2F(p,q,ldm)], 1);}int main (void){ cudaError_t cudaStat; cublasStatus_t stat; cublasHandle_t handle; int i, j; float* devPtrA; float* a = 0; a = (float*)malloc (M * N * sizeof(*a)); if(!a) { printf("host memory allocation failed"); return EXIT_FAILURE; } for(j = 1; j <= N; j++) { for(i = 1; i <= M; i++) { a[IDX2F(i,j,M)] = (float)((i-1) * M + j); printf("%7.0f",a[IDX2F(i,j,M)]); }printf("\n"); }printf("\n"); cudaStat = cudaMalloc ((void**)&devPtrA, M*N*sizeof(*a)); if(cudaStat != cudaSuccess) { printf ("device memory allocation failed"); return EXIT_FAILURE; } stat = cublasCreate(&handle); if(stat != CUBLAS_STATUS_SUCCESS) { printf ("CUBLAS initialization failed\n"); return EXIT_FAILURE; } stat = cublasSetMatrix (M, N, sizeof(*a), a, M, devPtrA, M); if(stat != CUBLAS_STATUS_SUCCESS) { printf ("data download failed"); cudaFree (devPtrA); cublasDestroy(handle); return EXIT_FAILURE; } modify (handle, devPtrA, M, N, 2, 3, 16.0f, 12.0f); stat = cublasGetMatrix (M, N, sizeof(*a), devPtrA, M, a, M); if(stat != CUBLAS_STATUS_SUCCESS) { printf("data upload failed"); cudaFree (devPtrA); cublasDestroy(handle); return EXIT_FAILURE; } cudaFree (devPtrA); cublasDestroy(handle); for(j = 1; j <= N; j++) { for(i = 1; i <= M; i++) { printf ("%7.0f", a[IDX2F(i,j,M)]); } printf ("\n"); } free(a); return EXIT_SUCCESS;}
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。