flat assembler
Message board for the users of flat assembler.

Index > Projects and Ideas > Create ai layers automatically.

Author
Thread Post new topic Reply to topic
Roman



Joined: 21 Apr 2012
Posts: 1866
Roman 04 Feb 2025, 11:47
Today AI becoming popular and ubiquitous.
We need created own AI.
Very interesting create AI auto layers for different tasks.

I found many AI models on Python. But not fasm or masm.
Post 04 Feb 2025, 11:47
View user's profile Send private message Reply with quote
Roman



Joined: 21 Apr 2012
Posts: 1866
Roman 05 Feb 2025, 16:48
Post 05 Feb 2025, 16:48
View user's profile Send private message Reply with quote
Roman



Joined: 21 Apr 2012
Posts: 1866
Roman 05 Feb 2025, 18:15
Post 05 Feb 2025, 18:15
View user's profile Send private message Reply with quote
Roman



Joined: 21 Apr 2012
Posts: 1866
Roman 07 Feb 2025, 14:45
cuDNN 8.8 who try install ?
I try download but access denied on Nvidia official site.
Post 07 Feb 2025, 14:45
View user's profile Send private message Reply with quote
bitRAKE



Joined: 21 Jul 2003
Posts: 4116
Location: vpcmpistri
bitRAKE 07 Feb 2025, 15:06
If this URL blocked then perhaps bounce through a redirector?
https://developer.download.nvidia.com/compute/cudnn/redist/

_________________
¯\(°_o)/¯ “languages are not safe - uses can be” Bjarne Stroustrup
Post 07 Feb 2025, 15:06
View user's profile Send private message Visit poster's website Reply with quote
Roman



Joined: 21 Apr 2012
Posts: 1866
Roman 07 Feb 2025, 17:57
cuDNN 9.7.1
8 dlls 1 gigabyte ! 64bits I not found 32bit.

I found this cuDNN dlls. But its from not officially site.
https://huggingface.co/MonsterMMORPG/94_CUDA_Fix/tree/main
https://www.opendll.com/index.php?file-download=cudnn64_8.dll&arch=64bit&version=6.14.11.6050
Post 07 Feb 2025, 17:57
View user's profile Send private message Reply with quote
Roman



Joined: 21 Apr 2012
Posts: 1866
Roman 08 Feb 2025, 04:47
Code:
format PE64 GUI 5.0
entry start
;https://github.com/ggerganov/whisper.cpp/blob/master/src/whisper-mel-cuda.cu
include 'win64a.inc'

CUDNN_STATUS_SUCCESS = 0
CUDNN_POOLING_MAX = 0
CUDNN_NOT_PROPAGATE_NAN = 0
CUDNN_TENSOR_NCHW = 0
CUDNN_DTYPE = 0
CUDNN_DATA_FLOAT                         = 0
CUDNN_DATA_DOUBLE                        = 1
CUDNN_DATA_HALF                          = 2
CUDNN_DATA_INT8                          = 3
CUDNN_DATA_INT32                         = 4
;CUDNN_DATA_INT8x4 CUDNN_DEPRECATED_ENUM  = 5
CUDNN_DATA_UINT8                         = 6
;CUDNN_DATA_UINT8x4 CUDNN_DEPRECATED_ENUM = 7
;CUDNN_DATA_INT8x32 CUDNN_DEPRECATED_ENUM = 8
CUDNN_DATA_BFLOAT16                      = 9
CUDNN_DATA_INT64                         = 10
CUDNN_DATA_BOOLEAN                       = 11
CUDNN_DATA_FP8_E4M3                      = 12
CUDNN_DATA_FP8_E5M2                      = 13
CUDNN_DATA_FAST_FLOAT_FOR_FP8            = 14
CUDNN_DATA_FP8_E8M0                      = 15
CUDNN_DATA_FP4_E2M1                      = 16

cudaMemcpyHostToHost = 0
cudaMemcpyHostToDevice = 1
cudaMemcpyDeviceToHost = 2
cudaMemcpyDeviceToDevice = 3
cudaMemcpyDefault = 4

macro ifcuError { test eax,eax
                  jz   @f
                  call GetcuError
@@:
}
section '.text' code readable executable
  proc  GetcuError
        invoke  cudnnGetErrorString,Message
        invoke  MessageBox,0,Message,'cu error:',0
        ret
  endp

  start:
        push  rbp
        mov eax,enn


        invoke  cudnnCreate,cuDNNHndl
        ifcuError

        invoke  cudnnGetCudartVersion
        ;ifcuError

        invoke  cudnnCreatePoolingDescriptor,pooling_desc
        ifcuError

        invoke  cudnnSetPooling2dDescriptor,[pooling_desc],CUDNN_POOLING_MAX,\
                CUDNN_NOT_PROPAGATE_NAN,3,3,0,0,1,1
        ifcuError

        invoke  cudnnCreateTensorDescriptor,in_desc
        ifcuError

        invoke  cudnnSetTensor4dDescriptor,[in_desc],CUDNN_TENSOR_NCHW,\
                CUDNN_DTYPE,2,2,10,10
        ifcuError

        invoke   cudnnCreateTensorDescriptor,out_desc
        ifcuError

        invoke  cudnnSetTensor4dDescriptor,[out_desc],CUDNN_TENSOR_NCHW,\
                CUDNN_DTYPE,2,2,8,8
        ifcuError

;gpu mem alloc
        IN_DATA_BYTES  = 2*2*10*10*4
        OUT_DATA_BYTES = 2*2*8*8*4
        invoke cudaMalloc,in_data,IN_DATA_BYTES
        invoke cudaMalloc,out_data,OUT_DATA_BYTES

        invoke cudaMemcpy,[in_data],input,IN_DATA_BYTES,cudaMemcpyHostToDevice
        invoke cudaMemset,[out_data],0,OUT_DATA_BYTES

        invoke  cudnnPoolingForward,[cuDNNHndl],[pooling_desc],alpha,[in_desc],[in_data],\
                beta,[out_desc],[out_data]
        ifcuError

;get gpu data
        invoke  cudaMemcpy,result,[out_data],OUT_DATA_BYTES,cudaMemcpyDeviceToHost

        mov     ebx,32
        call    printFlts

enn:
        invoke cudaMalloc,in_grad,IN_DATA_BYTES

        invoke cudaMemset,[in_grad],0,IN_DATA_BYTES

        invoke cudnnPoolingBackward,[cuDNNHndl],[pooling_desc],alpha,[out_desc],[out_data],\
               [out_desc],[out_data],[in_desc],[in_data],beta,[in_desc],[in_grad]
        ifcuError

        invoke  cudaMemcpy,result,[in_grad],IN_DATA_BYTES,cudaMemcpyDeviceToHost

        mov     ebx,32
        call    printFlts
;end
        invoke  cudaFree,[in_data]
        invoke  cudaFree,[in_grad]
        invoke  cudaFree,[out_data]


        invoke  cudnnDestroyTensorDescriptor,[in_desc]
        invoke  cudnnDestroyTensorDescriptor,[out_desc]
        invoke  cudnnDestroyPoolingDescriptor,[pooling_desc]

        invoke  cudnnDestroy,[cuDNNHndl]

        invoke  MessageBox,0,'cuDNN init !','Exit',0
        invoke  ExitProcess,0

macro nxtLine reg {  mov byte [reg],13
                     inc reg           }

proc printFlts
        mov edi,result
        mov esi,Temp
        ;mov ebx,24
.up:
        cvtss2sd xmm1,[edi]
        movq    rax,xmm1
        invoke  sprintf,rsi,MessageFormat,rax
        add     esi,eax
        nxtLine esi
        add     edi,4
        dec     ebx
        jnz     .up
        invoke  MessageBox,0,Temp,'out:',0
     ret
endp



section '.data' data readable writeable
        MessageFormat db '%1.6f;;',0
        alpha             dd 1.0
        beta              dd 0
        in_data           dq 0
        out_data          dq 0
        cuDNNHndl         dq 0
        pooling_desc      dq 0
        in_desc           dq 0
        out_desc          dq 0
        in_grad           dq 0
        include 'input.txt'  ;any 400 floats numbers. input db 400 dup(2.0)

section '.bss' readable writeable

  Message rb 50*600
  result  rd 40000
  Temp    rb 6400

section '.idata' import data readable writeable

  library kernel32,'KERNEL32.DLL',\
          user32,'USER32.DLL',\
          cudart,'cudart64_101.dll',\
          msvcrt,'MSVCRT.DLL',\
          cuDNN1,'cudnn64_9.dll' 
          

  include 'api\kernel32.inc'
  include 'api\user32.inc'
  
  import cudart,\
          cudaMalloc,'cudaMalloc',\
          cudaMemcpy,'cudaMemcpy',\
          cudaMemset,'cudaMemset',\
          cudaFree,'cudaFree',\
          cudaStreamCreate,'cudaStreamCreate'

  import cuDNN1,\
         cudnnTransformFilter,'cudnnTransformFilter',\
         cudnnSoftmaxForward,'cudnnSoftmaxForward',\
         cudnnGetRNNForwardTrainingAlgorithmMaxCount,'cudnnGetRNNForwardTrainingAlgorithmMaxCount',\
         cudnnSetFilter4dDescriptor,'cudnnSetFilter4dDescriptor',\
         cudnnGetFilter4dDescriptor,'cudnnGetFilter4dDescriptor',\
         cudnnSetFilterNdDescriptor,'cudnnSetFilterNdDescriptor',\
         cudnnGetFilterNdDescriptor,'cudnnGetFilterNdDescriptor',\
         cudnnSetTensor4dDescriptorEx,'cudnnSetTensor4dDescriptorEx',\
         cudnnSetTensorNdDescriptor,'cudnnSetTensorNdDescriptor',\
         cudnnGetCudartVersion,'cudnnGetCudartVersion',\
         cudnnDestroyPoolingDescriptor,'cudnnDestroyPoolingDescriptor',\
         cudnnDestroyTensorDescriptor,'cudnnDestroyTensorDescriptor',\
         cudnnPoolingBackward,'cudnnPoolingBackward',\
         cudnnPoolingForward,'cudnnPoolingForward',\
         cudnnSetTensor4dDescriptor,'cudnnSetTensor4dDescriptor',\
         cudnnCreateTensorDescriptor,'cudnnCreateTensorDescriptor',\
         cudnnSetPooling2dDescriptor,'cudnnSetPooling2dDescriptor',\
         cudnnCreatePoolingDescriptor,'cudnnCreatePoolingDescriptor',\
         cudnnGetErrorString,'cudnnGetErrorString',\
         cudnnCreate,'cudnnCreate',\
         cudnnDestroy,'cudnnDestroy'



import msvcrt,\
        sprintf,'sprintf'

    
Post 08 Feb 2025, 04:47
View user's profile Send private message Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2025, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.