## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library("gpuMagic")

## -----------------------------------------------------------------------------
#The kernel code
src="
kernel void vecAdd(global double* A, global double* B, global double* res){
uint id=get_global_id(0);
res[id]=A[id]+B[id];
}
"
#check and set the device
getDeviceList()
setDevice(1)
#Data preparation
n=1000
A=runif(n)
B=runif(n)
#Send the data to the device
#The argument type and device are optional
A_dev=gpuMatrix(A,type="double",device = 1)
B_dev=gpuMatrix(B,type="double",device = 1)
#Create an empty vector to store the result
res_dev=gpuEmptMatrix(row=n,col=1,type="double",device = 1)

#Call the kernel function to excute the code
#No return value
.kernel(src = src,kernel="vecAdd",parms=list(A_dev,B_dev,res_dev),.device = 1,.globalThreadNum = n)

#retrieve the data and convert it into a vector
res_dev=download(res_dev)
res=as.vector(res_dev)

#Check the error
range(res-A-B)

## -----------------------------------------------------------------------------
#Use auto macro to declare the function argument
#The first argument has auto1, second has auto2 and so on.
#gAuto is short for global auto
src="
kernel void vecAdd(gAuto1* A, gAuto2* B, gAuto3* res){
uint id=get_global_id(0);
res[id]=A[id]+B[id];
}
"
#Send the data to the device
#Note that the variable A and B is in a float type
A_dev=gpuMatrix(A,type="float",device = 1)
B_dev=gpuMatrix(B,type="float",device = 1)
#Create an empty vector to store the result
res_dev=gpuEmptMatrix(row=n,col=1,type="float",device = 1)

#Call the kernel function to excute the code
#No return value
.kernel(src = src,kernel="vecAdd",parms=list(A_dev,B_dev,res_dev),.device = 1,.globalThreadNum = n)

#retrieve the data and convert it into a vector
res_dev=download(res_dev)
res=as.vector(res_dev)

#Check the error
range(res-A-B)

