flat assembler
Message board for the users of flat assembler.

Index > Tutorials and Examples > CRC-32

Author
Thread Post new topic Reply to topic
Ali.Z



Joined: 08 Jan 2018
Posts: 718
Ali.Z 10 Jul 2020, 00:39
here is a stripped out version of my CRC-32, do not be confused with other CRC-32 variants; this is a plain CRC-32.

i tried to write some comments to clear out things, and used different names for labels and data to make it simper to read.

Code:
  include 'win32a.inc'
  format PE GUI 4.0

section '.text' code readable executable

    mov                 ebx,crc32_table ; base address
    xor                 ecx,ecx ; index used to update table entries
begin:
    mov                 eax,ecx
    and                 edx,0 ; inner loop counter
inner_loop:
    cmp                 dl,8
    jz                  update_table
    test                al,1 ; check for odd number
    jz                  shr_and_loop_back
    shr                 eax,1
    xor                 eax,0EDB88320h ; reversed (lil endian)
    inc                 dl
    jmp                 inner_loop
shr_and_loop_back:
    shr                 eax,1
    inc                 dl
    jmp                 inner_loop
update_table:
    mov                 [ebx+ecx*4],eax
    inc                 ecx
    or                  ch,ch
    jz                  begin
finish: ; as of here, we have full table ... the rest of the code is clear i think
    xor                 edx,edx
    and                 eax,edx
    not                 edx
    mov                 ecx,[input_length]
    mov                 esi,input_data
calc_crc32_checksum:
    lodsb
    xor                 al,dl
    movzx               edi,al
    mov                 eax,[ebx+edi*4]
    shr                 edx,8
    xor                 edx,eax
    loop                calc_crc32_checksum
    not                 edx
    push                10h
    push                crc32_checksum
    push                edx
    call                [itoa]
    add                 esp,0Ch
    push                0
    push                eax
    push                0
    push                0
    call                [MessageBox]
    sub                 eax,eax
    ret
    int3

section '.data' data readable writeable

  crc32_table rd 400h
  input_length dd 4
  input_data db 'fasm',0
  crc32_checksum rb 9


section '.idata' import data readable

  library user32,'user32.dll',\
          ntdll,'ntdll.dll'

  import user32,\
         MessageBox,'MessageBoxA'

  import ntdll,\
         itoa,'_itoa'

section '.reloc' fixups data readable discardable
    

_________________
Asm For Wise Humans
Post 10 Jul 2020, 00:39
View user's profile Send private message Reply with quote
bzt



Joined: 09 Nov 2018
Posts: 77
bzt 22 Dec 2020, 14:50
Here's how I do it (CRC-32 with Castagnoli polynom and pre-calculated lookup table):
Code:
; --- CRC-32c ---
            ; IN: esi = buffer, ecx = length
            ; OUT: edx = crc
crc32_calc: xor         edx, edx
            xor         eax, eax
            xor         ebx, ebx
.next:      lodsb
            mov         bl, dl
            xor         bl, al
            mov         eax, edx
            shr         edx, 8
            xor         edx, dword [crclkp+4*ebx]
            dec         ecx
            jnz         .next
            ret

crclkp:     dd          000000000h, 0F26B8303h, 0E13B70F7h, 01350F3F4h, 0C79A971Fh, 035F1141Ch, 026A1E7E8h, 0D4CA64EBh
            dd          08AD958CFh, 078B2DBCCh, 06BE22838h, 09989AB3Bh, 04D43CFD0h, 0BF284CD3h, 0AC78BF27h, 05E133C24h
            dd          0105EC76Fh, 0E235446Ch, 0F165B798h, 0030E349Bh, 0D7C45070h, 025AFD373h, 036FF2087h, 0C494A384h
            dd          09A879FA0h, 068EC1CA3h, 07BBCEF57h, 089D76C54h, 05D1D08BFh, 0AF768BBCh, 0BC267848h, 04E4DFB4Bh
            dd          020BD8EDEh, 0D2D60DDDh, 0C186FE29h, 033ED7D2Ah, 0E72719C1h, 0154C9AC2h, 0061C6936h, 0F477EA35h
            dd          0AA64D611h, 0580F5512h, 04B5FA6E6h, 0B93425E5h, 06DFE410Eh, 09F95C20Dh, 08CC531F9h, 07EAEB2FAh
            dd          030E349B1h, 0C288CAB2h, 0D1D83946h, 023B3BA45h, 0F779DEAEh, 005125DADh, 01642AE59h, 0E4292D5Ah
            dd          0BA3A117Eh, 04851927Dh, 05B016189h, 0A96AE28Ah, 07DA08661h, 08FCB0562h, 09C9BF696h, 06EF07595h
            dd          0417B1DBCh, 0B3109EBFh, 0A0406D4Bh, 0522BEE48h, 086E18AA3h, 0748A09A0h, 067DAFA54h, 095B17957h
            dd          0CBA24573h, 039C9C670h, 02A993584h, 0D8F2B687h, 00C38D26Ch, 0FE53516Fh, 0ED03A29Bh, 01F682198h
            dd          05125DAD3h, 0A34E59D0h, 0B01EAA24h, 042752927h, 096BF4DCCh, 064D4CECFh, 077843D3Bh, 085EFBE38h
            dd          0DBFC821Ch, 02997011Fh, 03AC7F2EBh, 0C8AC71E8h, 01C661503h, 0EE0D9600h, 0FD5D65F4h, 00F36E6F7h
            dd          061C69362h, 093AD1061h, 080FDE395h, 072966096h, 0A65C047Dh, 05437877Eh, 04767748Ah, 0B50CF789h
            dd          0EB1FCBADh, 0197448AEh, 00A24BB5Ah, 0F84F3859h, 02C855CB2h, 0DEEEDFB1h, 0CDBE2C45h, 03FD5AF46h
            dd          07198540Dh, 083F3D70Eh, 090A324FAh, 062C8A7F9h, 0B602C312h, 044694011h, 05739B3E5h, 0A55230E6h
            dd          0FB410CC2h, 0092A8FC1h, 01A7A7C35h, 0E811FF36h, 03CDB9BDDh, 0CEB018DEh, 0DDE0EB2Ah, 02F8B6829h
            dd          082F63B78h, 0709DB87Bh, 063CD4B8Fh, 091A6C88Ch, 0456CAC67h, 0B7072F64h, 0A457DC90h, 0563C5F93h
            dd          0082F63B7h, 0FA44E0B4h, 0E9141340h, 01B7F9043h, 0CFB5F4A8h, 03DDE77ABh, 02E8E845Fh, 0DCE5075Ch
            dd          092A8FC17h, 060C37F14h, 073938CE0h, 081F80FE3h, 055326B08h, 0A759E80Bh, 0B4091BFFh, 0466298FCh
            dd          01871A4D8h, 0EA1A27DBh, 0F94AD42Fh, 00B21572Ch, 0DFEB33C7h, 02D80B0C4h, 03ED04330h, 0CCBBC033h
            dd          0A24BB5A6h, 0502036A5h, 04370C551h, 0B11B4652h, 065D122B9h, 097BAA1BAh, 084EA524Eh, 07681D14Dh
            dd          02892ED69h, 0DAF96E6Ah, 0C9A99D9Eh, 03BC21E9Dh, 0EF087A76h, 01D63F975h, 00E330A81h, 0FC588982h
            dd          0B21572C9h, 0407EF1CAh, 0532E023Eh, 0A145813Dh, 0758FE5D6h, 087E466D5h, 094B49521h, 066DF1622h
            dd          038CC2A06h, 0CAA7A905h, 0D9F75AF1h, 02B9CD9F2h, 0FF56BD19h, 00D3D3E1Ah, 01E6DCDEEh, 0EC064EEDh
            dd          0C38D26C4h, 031E6A5C7h, 022B65633h, 0D0DDD530h, 00417B1DBh, 0F67C32D8h, 0E52CC12Ch, 01747422Fh
            dd          049547E0Bh, 0BB3FFD08h, 0A86F0EFCh, 05A048DFFh, 08ECEE914h, 07CA56A17h, 06FF599E3h, 09D9E1AE0h
            dd          0D3D3E1ABh, 021B862A8h, 032E8915Ch, 0C083125Fh, 0144976B4h, 0E622F5B7h, 0F5720643h, 007198540h
            dd          0590AB964h, 0AB613A67h, 0B831C993h, 04A5A4A90h, 09E902E7Bh, 06CFBAD78h, 07FAB5E8Ch, 08DC0DD8Fh
            dd          0E330A81Ah, 0115B2B19h, 0020BD8EDh, 0F0605BEEh, 024AA3F05h, 0D6C1BC06h, 0C5914FF2h, 037FACCF1h
            dd          069E9F0D5h, 09B8273D6h, 088D28022h, 07AB90321h, 0AE7367CAh, 05C18E4C9h, 04F48173Dh, 0BD23943Eh
            dd          0F36E6F75h, 00105EC76h, 012551F82h, 0E03E9C81h, 034F4F86Ah, 0C69F7B69h, 0D5CF889Dh, 027A40B9Eh
            dd          079B737BAh, 08BDCB4B9h, 0988C474Dh, 06AE7C44Eh, 0BE2DA0A5h, 04C4623A6h, 05F16D052h, 0AD7D5351h
    

Note: this is the CRC that might have hardware implementation on Intel CPUs. Used mostly in networking. For the "standard" CRC, also called ANSI CRC (used by GPT, png and gzip for example) you'll have to XOR the checksum before and after the loop with 0FFFFFFFFh and the lookup table is different of course.

Cheers,
bzt
Post 22 Dec 2020, 14:50
View user's profile Send private message Reply with quote
macomics



Joined: 26 Jan 2021
Posts: 957
Location: Russia
macomics 21 Aug 2021, 19:59
Or you can do it like this.
Code:
; IN EAX=Current CRC32 xor new_byte
; IN EBX=HASH code (0EDB88320h)
CharCRC32:
 repeat 8
  shr eax, 1
  sbb edx, edx
  and edx, ebx
  xor eax, eax
 end repeat
 retn
    


Code:
; IN EBX = HASH code (0EDB88320h)
; IN EDI = TABLE buffer
CRC32Table:
 xor ecx, ecx
 @@:
  mov eax, ecx
  call CharCRC32
  mov [edi + ecx * 4], eax
  inc cl
  jnz @b
 retn
    


Code:
; IN EBX = TABLE buffer
; IN EAX = Prev CRC32 value (start 0)
; IN ESI = Buffer
; IN ECX = Length
; OUT EAX = CRC32
 CRC32ByTable:
 mov edx, eax
 not edx
 @@:
  movzx eax, dl
  xor eax, byte [esi]
  shr edx, 8
  inc esi
  xor edx, [ebx + eax * 4]
 loop @b
 not edx
 mov eax, edx
 retn
    


Code:
; IN EBX = HASH code (0EDB88320h)
; IN EAX = Prev CRC32 value (start 0)
; IN ESI = Buffer
; IN ECX = Length
; OUT EAX = CRC32
CRC32OnTheFly:
 not eax
 @@:
  xor al, byte [esi]
  call CharCRC32
  inc esi
 loop @b
 not eax
 retn
    

I typed in the browser - sorry for the typos
Post 21 Aug 2021, 19:59
View user's profile Send private message Reply with quote
sylware



Joined: 23 Oct 2020
Posts: 440
Location: Marseille/France
sylware 22 Aug 2021, 13:33
It would be interesting to get some zlib crc32 implementations. There is a sse accelerated implementation one in linux (modulo the xor 0xffffffff stuff).
I am looking for an AVX2 (256bits) accelerated one, if it has some meaning (on ymm regs instead of xmm regs), I plan to have a look at zlib-ng, but it uses abominations which are the gcc intrinsics.
Post 22 Aug 2021, 13:33
View user's profile Send private message Reply with quote
macomics



Joined: 26 Jan 2021
Posts: 957
Location: Russia
macomics 22 Aug 2021, 16:40
On processors with SSE4.(2), there is a built-in CRC32 command (See Intel SDM Vol 2A 3-329)
Post 22 Aug 2021, 16:40
View user's profile Send private message Reply with quote
sylware



Joined: 23 Oct 2020
Posts: 440
Location: Marseille/France
sylware 22 Aug 2021, 17:51
internet says intel "crc32" is actually crc32c which is not zlib crc32. I recall a post from adler himself.
Post 22 Aug 2021, 17:51
View user's profile Send private message Reply with quote
macomics



Joined: 26 Jan 2021
Posts: 957
Location: Russia
macomics 22 Aug 2021, 21:43
sylware wrote:
internet says intel "crc32" is actually crc32c which is not zlib crc32. I recall a post from adler himself.
They differ only by a polynomial. If there is no need for compatibility with other zlib implementations, then CRC32 is fully suitable with the polynomial proposed by Intel. Although they could make the command more flexible Sad
Post 22 Aug 2021, 21:43
View user's profile Send private message Reply with quote
sylware



Joined: 23 Oct 2020
Posts: 440
Location: Marseille/France
sylware 22 Aug 2021, 23:15
that's why I was asking about an AVX2 implementation of zlib crc32.
Post 22 Aug 2021, 23:15
View user's profile Send private message Reply with quote
bzt



Joined: 09 Nov 2018
Posts: 77
bzt 01 Sep 2021, 15:47
macomics wrote:
sylware wrote:
internet says intel "crc32" is actually crc32c which is not zlib crc32. I recall a post from adler himself.
They differ only by a polynomial.
Nope, not just in the polynomial (aka. lookup table), there are additional operations required. To modify my previous CRC-32c example to make it ANSI CRC, you'd need
Code:
; --- CRC-32 (ANSI, like in zlib) ---
            ; IN: esi = buffer, ecx = length
            ; OUT: edx = crc
crc32_calc: xor         edx, edx
            xor         eax, eax
            xor         ebx, ebx
            dec         edx                  ; <- crc value starts with 0FFFFFFFFh and not 0
.next:      lodsb
            mov         bl, dl
            xor         bl, al
            mov         eax, edx
            shr         edx, 8
            xor         edx, dword [crclkp+4*ebx]
            dec         ecx
            jnz         .next
            xor         edx, 0FFFFFFFFh      ; <- you must also invert bits after the calculation
            ret

; ...and the lookup table is entirely different of course
crclkp:
    db  000000000h, 077073096h, 0ee0e612ch, 0990951bah, 0076dc419h, 0706af48fh, 0e963a535h, 09e6495a3h, 00edb8832h
    db  079dcb8a4h, 0e0d5e91eh, 097d2d988h, 009b64c2bh, 07eb17cbdh, 0e7b82d07h, 090bf1d91h, 01db71064h, 06ab020f2h
    db  0f3b97148h, 084be41deh, 01adad47dh, 06ddde4ebh, 0f4d4b551h, 083d385c7h, 0136c9856h, 0646ba8c0h, 0fd62f97ah
    db  08a65c9ech, 014015c4fh, 063066cd9h, 0fa0f3d63h, 08d080df5h, 03b6e20c8h, 04c69105eh, 0d56041e4h, 0a2677172h
    db  03c03e4d1h, 04b04d447h, 0d20d85fdh, 0a50ab56bh, 035b5a8fah, 042b2986ch, 0dbbbc9d6h, 0acbcf940h, 032d86ce3h
    db  045df5c75h, 0dcd60dcfh, 0abd13d59h, 026d930ach, 051de003ah, 0c8d75180h, 0bfd06116h, 021b4f4b5h, 056b3c423h
    db  0cfba9599h, 0b8bda50fh, 02802b89eh, 05f058808h, 0c60cd9b2h, 0b10be924h, 02f6f7c87h, 058684c11h, 0c1611dabh
    db  0b6662d3dh, 076dc4190h, 001db7106h, 098d220bch, 0efd5102ah, 071b18589h, 006b6b51fh, 09fbfe4a5h, 0e8b8d433h
    db  07807c9a2h, 00f00f934h, 09609a88eh, 0e10e9818h, 07f6a0dbbh, 0086d3d2dh, 091646c97h, 0e6635c01h, 06b6b51f4h
    db  01c6c6162h, 0856530d8h, 0f262004eh, 06c0695edh, 01b01a57bh, 08208f4c1h, 0f50fc457h, 065b0d9c6h, 012b7e950h
    db  08bbeb8eah, 0fcb9887ch, 062dd1ddfh, 015da2d49h, 08cd37cf3h, 0fbd44c65h, 04db26158h, 03ab551ceh, 0a3bc0074h
    db  0d4bb30e2h, 04adfa541h, 03dd895d7h, 0a4d1c46dh, 0d3d6f4fbh, 04369e96ah, 0346ed9fch, 0ad678846h, 0da60b8d0h
    db  044042d73h, 033031de5h, 0aa0a4c5fh, 0dd0d7cc9h, 05005713ch, 0270241aah, 0be0b1010h, 0c90c2086h, 05768b525h
    db  0206f85b3h, 0b966d409h, 0ce61e49fh, 05edef90eh, 029d9c998h, 0b0d09822h, 0c7d7a8b4h, 059b33d17h, 02eb40d81h
    db  0b7bd5c3bh, 0c0ba6cadh, 0edb88320h, 09abfb3b6h, 003b6e20ch, 074b1d29ah, 0ead54739h, 09dd277afh, 004db2615h
    db  073dc1683h, 0e3630b12h, 094643b84h, 00d6d6a3eh, 07a6a5aa8h, 0e40ecf0bh, 09309ff9dh, 00a00ae27h, 07d079eb1h
    db  0f00f9344h, 08708a3d2h, 01e01f268h, 06906c2feh, 0f762575dh, 0806567cbh, 0196c3671h, 06e6b06e7h, 0fed41b76h
    db  089d32be0h, 010da7a5ah, 067dd4acch, 0f9b9df6fh, 08ebeeff9h, 017b7be43h, 060b08ed5h, 0d6d6a3e8h, 0a1d1937eh
    db  038d8c2c4h, 04fdff252h, 0d1bb67f1h, 0a6bc5767h, 03fb506ddh, 048b2364bh, 0d80d2bdah, 0af0a1b4ch, 036034af6h
    db  041047a60h, 0df60efc3h, 0a867df55h, 0316e8eefh, 04669be79h, 0cb61b38ch, 0bc66831ah, 0256fd2a0h, 05268e236h
    db  0cc0c7795h, 0bb0b4703h, 0220216b9h, 05505262fh, 0c5ba3bbeh, 0b2bd0b28h, 02bb45a92h, 05cb36a04h, 0c2d7ffa7h
    db  0b5d0cf31h, 02cd99e8bh, 05bdeae1dh, 09b64c2b0h, 0ec63f226h, 0756aa39ch, 0026d930ah, 09c0906a9h, 0eb0e363fh
    db  072076785h, 005005713h, 095bf4a82h, 0e2b87a14h, 07bb12baeh, 00cb61b38h, 092d28e9bh, 0e5d5be0dh, 07cdcefb7h
    db  00bdbdf21h, 086d3d2d4h, 0f1d4e242h, 068ddb3f8h, 01fda836eh, 081be16cdh, 0f6b9265bh, 06fb077e1h, 018b74777h
    db  088085ae6h, 0ff0f6a70h, 066063bcah, 011010b5ch, 08f659effh, 0f862ae69h, 0616bffd3h, 0166ccf45h, 0a00ae278h
    db  0d70dd2eeh, 04e048354h, 03903b3c2h, 0a7672661h, 0d06016f7h, 04969474dh, 03e6e77dbh, 0aed16a4ah, 0d9d65adch
    db  040df0b66h, 037d83bf0h, 0a9bcae53h, 0debb9ec5h, 047b2cf7fh, 030b5ffe9h, 0bdbdf21ch, 0cabac28ah, 053b39330h
    db  024b4a3a6h, 0bad03605h, 0cdd70693h, 054de5729h, 023d967bfh, 0b3667a2eh, 0c4614ab8h, 05d681b02h, 02a6f2b94h
    db  0b40bbe37h, 0c30c8ea1h, 05a05df1bh, 02d02ef8dh
    

sylware wrote:
that's why I was asking about an AVX2 implementation of zlib crc32.
I'm sorry, there's no AVX accelerated Open Source ANSI CRC implementation that I know of. But using my code as skeleton it shouldn't be hard to apply the same operations on SIMD registers.
Otherwise use "gcc -S zlib/crc32_simd.c" to convert the intrinsics into Assembly, but remember it's going to be AT&T syntax, not fasm's (source and destination arguments swapped).

Cheers,
bzt
Post 01 Sep 2021, 15:47
View user's profile Send private message Reply with quote
bitRAKE



Joined: 21 Jul 2003
Posts: 4043
Location: vpcmpistri
bitRAKE 01 Sep 2021, 20:06
Very Happy

https://gcc.godbolt.org/z/edobc3GYY
(I think hand coded asm could be much smaller.)

The Paper is a good example of Barrett reduction method - widely applicable in GF(2).
Post 01 Sep 2021, 20:06
View user's profile Send private message Visit poster's website Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2024, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.