section '.idata' import data readable writeable

  library kernel32,'KERNEL32.DLL',\
          user32,'USER32.DLL',\
          cudart,'dlls\cudart64_12.dll',\
          msvcrt,'MSVCRT.DLL',\
          cuda,'NVCUDA.DLL',\
          cuBlas,'dlls\cublas64_12.dll',\
          cuDNN1,'dlls\cudnn64_9.dll'


  include 'fasmAPI\kernel32.inc'
  include 'fasmAPI\user32.inc'

  import cuBlas,\
         cublasCreate,'cublasCreate_v2',\
         cublasSetMathMode,'cublasSetMathMode',\
         cublasSetStream,'cublasSetStream'


  include 'importCudart.txt'
  include 'import_cuDNN.txt'

;cuStreamCreate,&stream, 0 ; in NVCUDA.DLL
;cudaStreamCreate,stream in cudart64_101.dll

  import cuda,\
         cuInit,'cuInit',\
         cuDeviceGet,'cuDeviceGet',\
         cuCtxCreate,'cuCtxCreate',\
         cuMemAlloc,'cuMemAlloc',\
         cuModuleLoadData,'cuModuleLoadData',\
         cuModuleGetFunction,'cuModuleGetFunction',\
         cuMemcpyHtoD,'cuMemcpyHtoD',\
         cuParamSetSize,'cuParamSetSize',\
         cuParamSetv,'cuParamSetv',\
         cuFuncSetBlockShape,'cuFuncSetBlockShape',\
         cuCtxSynchronize,'cuCtxSynchronize',\
         cuMemcpyDtoH,'cuMemcpyDtoH',\
         cuMemFree,'cuMemFree',\
         cuCtxDestroy,'cuCtxDestroy',\
         cudaMallocAsync,'cudaMallocAsync',\
         cudaMemcpyAsync,'cudaMemcpyAsync',\
         cuLaunchGrid,'cuLaunchGrid'



import msvcrt,\
        sprintf,'sprintf'
