flat assembler
Message board for the users of flat assembler.

Index > Projects and Ideas > approximate textual comparison (algo)

Author
Thread Post new topic Reply to topic
ProMiNick



Joined: 24 Mar 2012
Posts: 820
Location: Russian Federation, Sochi
ProMiNick 11 Jul 2025, 14:08
preambula approximate comparison present for comparison numbers but absent in convinient form for comparison text.

tester wrapper, not demo of working algorithm, but algo result viewer:
Code:
format PE GUI 4.0
entry start

include 'win32a.inc'

section '.text' code readable executable

  start: PURE_ASM
        mov     esi,str1
        mov     edi,str2
        invoke  LenMinusCountExactOrderOnes
        ;mov     eax,5483345 ;test i2a
        ;mov     eax,[debug1]
        ;mov     eax,[debug2]
        ;mov     eax,[eax]
        xor     ecx,ecx
        mov     ebx,10
      @@:
        xor     edx,edx
        div     ebx
        add     dl,"0"
        mov     [ecx+_buffer+253],dl
        dec     ecx
        test    eax,eax
        jnz     @B
        add     ecx,_buffer+254
        invoke MessageBox,NULL,ecx,NULL,MB_ICONERROR+MB_OK
        invoke  ExitProcess,0
flush_locals

section '.data' data readable writeable
  str1 TCHAR '1234567904',0
  str2 TCHAR '3461735785481',0
  _buffer TCHAR[255]

section '.idata' import data readable writeable

  library kernel32,'KERNEL32.DLL',\
          user32,'USER32.DLL',\
          strhelp,'STRHELP.DLL'

  include 'api\kernel32.inc'
  include 'api\user32.inc'

  import strhelp,\
         LenMinusCountExactOrderOnes,'LenMinusCountExactOrderOnes',\
         debug1,'debug1',\
         debug2,'debug2'    
algo itself wrapped in dll with all possible comments
Code:
format PE GUI 4.0 DLL
entry DllEntryPoint

include 'win32a.inc'

section '.text' code readable executable

DllEntryPoint: RTL_C hinstDLL,fdwReason,lpvReserved
        mov     eax,TRUE
        stack_cleanup callee
flush_locals

GetStrLength: PURE_ASM
; string:                   [in]        edi
; string length:            [out]       ecx,zero-ender is counted
;                           [unchanged] ebx, edx, esi, edi, ebp, esp
        or      ecx, -1
        xor     eax, eax
        cld
        repne scasb
        not     ecx
        sub     edi,ecx
        stack_cleanup caller
flush_locals

twice_recurce_for_each: PURE_ASM
; string1:                  [in]        esi
; string2:                  [in]        edi
;                           [in]        ecx=0, that assumed and never tested, no effect for validity
; ordermatchedcount:        [out]       ecx, length of longest pattern with symbol order exact for both strings
;                           [unchanged] ebx, edx, ebp, esp

;if not string1 end
        mov     al,[esi]
        test    al,al
        je      .locret
;do loop
      @@:
;if not string2 end
        mov     ah,[edi]
        test    ah,ah
        je      .locret
;do compare chars
        cmp     al,ah
        jz      @F
        inc     edi
        jmp     @B
;until they not match
      @@:

;matched symbol operated in two ways:
;1) skip it - assume that longest pattern will be reached without it
;2) count it - assume that longest pattern will be reached with it

;common for both cases
        inc     esi             ;current string1 symbol is handled no matter it counted or skiped, go to next one

;case 1:
                                ;skipping of string1 symbol makes current string2 symbol unhandled
                                ;so it is still valid for further recursion matching
                                ;only needed preservation of string1 & string2 because their states trashed in recursion
        push    ecx
        push    esi
        push    edi
        call    twice_recurce_for_each
        pop     edi
        pop     esi

        xchg    ecx,[esp]       ; store result of case 1 calculation

;case 2:
        inc     edi             ;current string2 symbol is handled as matched to string1 symbol, go to next one for recursion matching
        call    twice_recurce_for_each
        inc     ecx             ;apply that matched symbol to recursion result

;find length of longest pattern within results of case 1(stack) & case 2(ecx)
        sub     ecx,[esp]       ; [esp]=MAX([esp],ecx)
        js      @F              ;
        add     [esp],ecx       ;
      @@:                       ;
        pop     ecx
        inc     [debug2]
;exit
      .locret:
        inc     [debug1]
        stack_cleanup caller
flush_locals

LenMinusCountExactOrderOnes: PURE_ASM
; string1:                  [in]        esi
; string2:                  [in]        edi
; ordermatchedcount:        [out]       ecx, length of longest pattern with symbol order exact for both strings
;                           [unchanged] ebx, edx, ebp, esp

;find length of string no matter which of them
        call    GetStrLength
        push    ecx             ; store that length

;if strings equal there is no unmatched symbols, so result is 0 such symbols
        push    esi
        push    edi
        repe cmpsb
        pop     edi
        pop     esi
        jne     .untrivial
        pop     ecx
        ;xor    eax,eax         ;correct value already there: GetStrLength make it. nothing change it
        jmp     .locret

;else find length of other string and than ...
      .untrivial:
        xchg    esi,edi
        call    GetStrLength
;find length of longest string within string2(stack) & string1(ecx)
        sub     ecx,[esp]       ; [esp]=MAX([esp],ecx)
        js      @F              ;
        add     [esp],ecx       ;
      @@:                       ;
        dec     dword[esp]      ;because zero-ender is counted, cut it off
;at here in stack longest length of string1 & string2 without zero-ender symbol

;calculate to ecx the length of longest pattern with symbol order exact for both strings
        xor     ecx,ecx
        call    twice_recurce_for_each
;and substract it from longest length of strings
        ;sub     [esp],ecx       ;comment both lines(no sub & no mov) - see length of longest string - calculation valid
        mov     [esp],ecx       ;(mov) - see only length of matched pattern,(sub & no mov) - see their difference
;correct value in stack, pop it
        pop     eax
      .locret:
        stack_cleanup caller
flush_locals

section '.data' data readable writeable

        debug1 dd 0
        debug2 dd 0

section '.edata' export data readable

  export 'STRHELP.DLL',\
         LenMinusCountExactOrderOnes,'LenMinusCountExactOrderOnes',\
         debug1,'debug1',\
         debug2,'debug2'

section '.reloc' fixups data readable discardable

  if $=$$
    dd 0,8              ; if there are no fixups, generate dummy entry
  end if    
largest matched pattern of 2 input strings '1234567904' & '3461735785481' is '34574' & '34674' (interpret them as patterns ")and length of maximum of it is 5, then largest length of input is length of '3461735785481' is 13. mismatch count is 13-5=8, algo works wrong & displays 14. for cases when srings equal algo successfully display 0 unmatched symblos.
To adapt code compilable on fasm original "PURE_ASM fix", "stack_cleanup fix", "caller fix 0", "callee fix 12", "flush_locals fix", "macro RTL_C arg& {}".

Error somewhere in twice_recurce_for_each. all the rest tested successfully.

_________________
I don`t like to refer by "you" to one person.
My soul requires acronim "thou" instead.
Post 11 Jul 2025, 14:08
View user's profile Send private message Send e-mail Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2025, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.