visual c++ - seperate the cuda host code in .cpp file -
main.cpp
#include<iostream> #include "cuda.h" using namespace std; void cuda_calculation(); int main() { cuda_calculation(); return 0; }
cu.h
void call(int , int ,float* , int );
cuda.cpp
#include <stdio.h> #include <cuda.h> #include "cu.h" void cuda_calculation() { float *a_h, *a_d; // pointer host & device arrays const int n = 10; // number of elements in arrays size_t size = n * sizeof(float); a_h = (float *)malloc(size); // allocate array on host cudamalloc((void **) &a_d, size); // allocate array on device // initialize host array , copy cuda device (int i=0; i<n; i++) a_h[i] = (float)i; cudamemcpy(a_d, a_h, size, cudamemcpyhosttodevice); // calculation on device: int block_size = 4; int n_blocks = n/block_size + (n%block_size == 0 ? 0:1); void call(n_blocks, block_size,&a_d, n); /*square_array <<< n_blocks, block_size >>> (a_d, n);*/ // retrieve result device , store in host array cudamemcpy(a_h, a_d, sizeof(float)*n, cudamemcpydevicetohost); // print results (int i=0; i<n; i++) printf("%d %f\n", i, a_h[i]); // cleanup free(a_h); cudafree(a_d); }
cu.cu
#include <stdio.h> #include "cu.h" #include <cuda.h> // kernel executes on cuda device __global__ void square_array(float *a, int n) { int idx = blockidx.x * blockdim.x + threadidx.x; if (idx<n) a[idx] = a[idx] * a[idx]; } //} void call(int a,int b,float* c,int d) { square_array <<< 3,4 >>> (c,d); }
i tried seperate kernal code , host code in cpp file, following error prevails:
error 'cudamemcpy': identifier not found , other cuda related identifier not identified. how use cuda related identifier in cpp file , call kernal functions
there errors: void cuda_calculation();
needs visible main.cpp through header file (cu.h).
also make sure compile .cu files nvcc , not standard c++ file. use cuda compilation rules make process easy (installed default part of cuda toolkit)
Comments
Post a Comment