flat assembler
Message board for the users of flat assembler.

Index > Windows > [String algorithm]How to do string wildcard search/replaces?

Author
Thread Post new topic Reply to topic
wht36



Joined: 18 Sep 2005
Posts: 106
wht36
Hi,

Does anybody have a good idea/suggestion/algorithm/source for a string search and replace function that can accept wildcards (should at least be able to accept any combinations of the ? and * wildcards) ?

Definitions of ? and * (just my version Smile
? - any single letter
* - any string of zero or more letters

Below is a version I made long ago for NASM

Code:
;Call @Match to search in ES:EDI source for DS:ESI search string.
; EBX = end of source string. ECX = end of search string
; Search string = extended search string with each character having 4 bytes:
; Byte  1 exact character to match
;       2 if ? then ignore one character
;         if * then ignore zero or more characters
;         if non-zero character, then treat as alternate character to match
;       3 undefined
;       4 undefined

; CF clear if success. CF set if error
; EAX ?, ESI,EDI := where match stopped

; * = *.* / H*LO* = HELLO.8 / *HEL* = HELLO
; Algorithm:
; Load a char from source
; - if it's not a wildcard, do a normal comparison, and quit if fails
; - if it's '?' then move src & dest to next char and continue test
; - if it's '*', then keep loading a char from source 'til a non-wildcard,
;   then scan up to the full length of destination and quit if still no match.
;   If there is a match, continue the rest of the test and see if the test is
;   successful, if not, go get the next match, and do another test. This is
;   done until test is successful or there are no more matches and no more '*'

@Good_match
        cmp     esi,ecx
        jnb     @Match.ret      ;If the end of search string, can quit now
                                ; with CF clear (JNB = JNC)
@Match:
        lodsd                   ; load a char, SI:=next char
        cmp     ah,'*'
        je      .select_all?    ; if wild card '*' go chk if end of source

.scan_char                      ; Not '*' so match char
        scasb                   ; check for a match & point DI to next char
        je      @Good_match     ; if char of src = destination
        cmp     ah,'?'          ; if ? then okay to skip a char
        je      @Match
        mov     al,ah           ; if match failed, see if alternate char
        or      al,al
        jz      .err
        dec     edi             ; EDI => where match failed
        scasb
        je      @Good_match
.err    stc
.ret    ret                     ; quit

.skip   inc     edi             ; ? = skip one char of source string

.select_all?
        cmp     esi,ecx         ; if '*' = end of search string
        jnb     .all            ; then select remaining source string

.new_match
        ;begin with '*' already

        ;process any * or ? that may come after the first '*'
        lodsd                   ; load next char
        cmp     ah,'?'
        je      .skip           ; if '?' then skip a char
        cmp     ah,'*'
        je      .new_match      ; if '*' then ignore it

        ;scan for the non-wildcard char
.scan   cmp     edi,ebx
        jnb     .err
        scasb                   ; search whole string
        je      .test
        or      ah,ah
        jz      .scan
        dec     edi             ; EDI => where match failed
        xchg    al,ah           ; if match failed, see if alternate char
        scasb
        jne     .scan

.test
        push    edi
        push    esi             ; store current stats
        call    @Good_match     ; start new test, and also checking
                                ; if only has one non-wildcard char
        mov     eax,edi
        pop     esi
        pop     edi             ; restore stats
        lea     esi,[esi-4]     ; reload same char (last lodsd has inc SI)
                                ; does not change CF
        jc      .new_match      ;If previous test failed, try another test
        mov     edi,eax         ;If previous test was successful, then
        ret                     ; use previous test's EDI

.all    mov     edi,ebx
        ret
    


I'm sure there are better algorithms then this old recursive thing.... Smile
Post 22 Jul 2006, 12:20
View user's profile Send private message Reply with quote
Vasilev Vjacheslav



Joined: 11 Aug 2004
Posts: 392
Vasilev Vjacheslav
strange, but i also working on this problem and yesterday i wrote such algorithm Smile of course author right belongs to me

Code:
  szFind        db "DEST_STRING"
  szFindMask    db 0,1,0,0,0,0,0,0,0,0,0 ; 1 - skip searching byte
  szReplace     db "ABCD_EFGHJK"
  szReplaceMask db 0,1,1,1,1,1,1,1,1,1,0 ; 1 - skip writing byte
  hPatSize dd 11

  ; backward byte pattern search/replace algorithm by mask

  proc  _snr uses ebx esi edi, lpMem,hMemSize,lpFind,lpFindMask,lpReplace,lpReplaceMask,hPatSize
  locals
        hRetValue       dd ?
  endl
  
        mov     ebx,[lpMem]
        mov     ecx,[hMemSize]
        mov     edi,[hFindSize]

        and     [hRetValue],0

        or      ecx,ecx
        jz      .ret
        or      edi,edi
        jz      .ret

  .again:
        mov     al,byte [ebx+ecx-1]
        mov     edx,[hFindStr]
        cmp     al,byte [edx+edi-1]
        jnz     .next

        lea     edx,[edi-1]
        lea     eax,[ecx-1]
        cmp     edx,eax
        ja      .ret

  .search_loop:
        mov     al,byte [ebx+ecx-2]
        mov     esi,[hFindStr]
        dec     ecx
        cmp     al,byte [esi+edx-1]
        jz      @F
        mov     eax,[hFindStrMsk]
        cmp     byte [edx+eax-1],0
        jz      .next_test
  @@:
        dec     edx
        jnz     .search_loop

        lea     eax,[ebx+ecx-1]
        mov     edx,edi
        lea     esi,[eax+edi-1]

  .replace_loop:
        mov     eax,[hReplaceStrMsk]
        cmp     byte [eax+edx-1],0
        jnz     @F
        
        mov     eax,[hReplaceStr]
        mov     al,byte [edx+eax-1]
        inc     [hRetValue]
        mov     byte [esi],al
  
  @@:
        dec     esi
        dec     edx
        jnz     .replace_loop

        cmp     edi,ecx
        ja      .ret

  .next:
        dec     ecx
  
  .next_test:
        or      ecx,ecx
        jnz     .again

  .ret:
        mov     eax,[hRetValue]
        ret
  endp  
    


ps. fixed length searching and tested not much
Post 23 Jul 2006, 07:34
View user's profile Send private message Reply with quote
kohlrak



Joined: 21 Jul 2006
Posts: 1421
Location: Uncle Sam's Pad
kohlrak
What's needed the most (that most wildcard check systems don't have) is for the wild card to be only restricted by spaces when checking for the search word... for example:

context

F##udge

Searcher *udge

and still find the word in the context.

Also to be able to tell when it's not at the end of the searched for word (so we could say "F*e" and still find teh word in the context).

But i have a feeling i'm gonna end up makin' that code and have to look at all yours for a base. (and no, incase you're worried since i'm new, i really do mean look, not copy and paste. lol)
Post 23 Jul 2006, 14:18
View user's profile Send private message Visit poster's website AIM Address Yahoo Messenger MSN Messenger Reply with quote
f0dder



Joined: 19 Feb 2004
Posts: 3170
Location: Denmark
f0dder
PCRE Wink
Post 23 Jul 2006, 14:31
View user's profile Send private message Visit poster's website Reply with quote
kohlrak



Joined: 21 Jul 2006
Posts: 1421
Location: Uncle Sam's Pad
kohlrak
PCRE?
Post 23 Jul 2006, 15:25
View user's profile Send private message Visit poster's website AIM Address Yahoo Messenger MSN Messenger Reply with quote
f0dder



Joined: 19 Feb 2004
Posts: 3170
Location: Denmark
f0dder
Post 23 Jul 2006, 19:15
View user's profile Send private message Visit poster's website Reply with quote
kohlrak



Joined: 21 Jul 2006
Posts: 1421
Location: Uncle Sam's Pad
kohlrak
oh cool, so we already have it, we just need to open up an exe in perle and locate it and change the code so it works better. lol
Post 23 Jul 2006, 19:28
View user's profile Send private message Visit poster's website AIM Address Yahoo Messenger MSN Messenger Reply with quote
f0dder



Joined: 19 Feb 2004
Posts: 3170
Location: Denmark
f0dder
You didn't look very closely, did you? Wink

It's a C library that provides perl compatible regular expressions. Since it's in C, it's linkable from just about everything. If you need the power of regular expressions (ie, more than just '?' and '*'), it's hard to find anything much better than this.
Post 23 Jul 2006, 19:33
View user's profile Send private message Visit poster's website Reply with quote
kohlrak



Joined: 21 Jul 2006
Posts: 1421
Location: Uncle Sam's Pad
kohlrak
Fine, we'll open it up. lol
Post 23 Jul 2006, 19:59
View user's profile Send private message Visit poster's website AIM Address Yahoo Messenger MSN Messenger Reply with quote
f0dder



Joined: 19 Feb 2004
Posts: 3170
Location: Denmark
f0dder
It's overkill if you only need '?' and '*' though. But if you want full regular expression support, it's more sane to use PCRE than starting your own - it's a massive task to get regex working, stable and efficient.
Post 23 Jul 2006, 20:04
View user's profile Send private message Visit poster's website Reply with quote
kohlrak



Joined: 21 Jul 2006
Posts: 1421
Location: Uncle Sam's Pad
kohlrak
the simple idea is to take what i want out the teh DLL or whatever (and then cutting out the junk of that and rewriting some of the loops more efficiantly and such.
Post 23 Jul 2006, 20:19
View user's profile Send private message Visit poster's website AIM Address Yahoo Messenger MSN Messenger Reply with quote
ronware



Joined: 08 Jan 2004
Posts: 179
Location: Israel
ronware
PCRE is an excellent and quite efficient library. I doubt "rewriting some of the loops" will be a simple excersize or worth the time you spend on it.
Post 25 Jul 2006, 17:58
View user's profile Send private message Visit poster's website AIM Address Yahoo Messenger Reply with quote
kohlrak



Joined: 21 Jul 2006
Posts: 1421
Location: Uncle Sam's Pad
kohlrak
Eh, maybe, maybe not... I'm thinking about working on a new project some day where i'll build a little digital assistant like thing with instant message capabilities. I won't go into all the details, but it won't require people to pay for some wireless service...
Post 25 Jul 2006, 18:45
View user's profile Send private message Visit poster's website AIM Address Yahoo Messenger MSN Messenger Reply with quote
f0dder



Joined: 19 Feb 2004
Posts: 3170
Location: Denmark
f0dder
kohlrak wrote:
Eh, maybe, maybe not... I'm thinking about working on a new project some day where i'll build a little digital assistant like thing with instant message capabilities. I won't go into all the details, but it won't require people to pay for some wireless service...


What does that have to do with this thread? Confused

_________________
Image - carpe noctem
Post 26 Jul 2006, 08:06
View user's profile Send private message Visit poster's website Reply with quote
kohlrak



Joined: 21 Jul 2006
Posts: 1421
Location: Uncle Sam's Pad
kohlrak
Well.... Directly to the post itself... nothing... lol But it points out that editing and using the smallest code possible would be required, even if it seems unimportant.
Post 26 Jul 2006, 08:15
View user's profile Send private message Visit poster's website AIM Address Yahoo Messenger MSN Messenger Reply with quote
wht36



Joined: 18 Sep 2005
Posts: 106
wht36
I've found a wildcard matching routine by comrade ( http://comrade.ownz.com/ ) and made some changes to so it doesn't look like his original routine anymore, but the idea was his.

The wmatch routine is the actual routine. I've included some stuff in front so one can assemble and test it.


Code:
org     100h
        mov     si,mask
     mov     di,string
   call    wmatch
      add     al,'0'
    int     29h
 ret

mask:    db      "*",0
string:  db      "Hello",0

;Compare nul terminated SI mask to DI string
;Return AX non-zero if match
wmatch:
.new:     xor     cx, cx          ; clear wildcard indicator
.chkz:        sub     ax, ax          ; initialize return value
   cmp     al, [si]        ; check for end of mask
     jne     .next
       cmp     byte [di], 1    ; return TRUE if end of source
      adc     ax, cx          ; OR if end of mask is wildcard
     ret
.cmp3:       inc     di              ;  else if no match
 cmp     [di], ah
    jz      .error
.next:    mov     al, [si]
    cmp     al, "*"
   je      .wild           ; if * then set wildcard indicator to TRUE
  jcxz    .nowc           ; if not wildcard indicator then match char
 sub     bx,bx           ; Wildcard loop - initialize base counter
.cmp2: test    ax, ax          ;  if no more char in mask then end current match
   jz      .mtch
       cmp     al, "?"               ;  if ? then skip one char
  je      .sc
 cmp     al, [bx+di]
 jne     .cmp3
.sc:       inc     bx              ; Increment base counter if match
   mov     al, [bx+si]     ; 
  cmp     al, "*"               ; If next byte in mask is * then end current match
  jne     .cmp2
.mtch:     inc     di
  inc     si              ; move to next byte in source & mask
    jmp     .new
.wild:      inc     cx              ;       set wildcard indicator to TRUE
      inc     si              ;       move to next byte in mask
   jmp     .chkz           ;       check for end of mask & end of source
.nowc: cmp     al, "?"               ; if ? then
 je      .mtch           ;       match next char
     cmp     al, [di]        ; else if char matches
      je      .mtch
.error:    xor     ax, ax  ;  or return FALSE
  ret    
Post 15 Nov 2008, 06:49
View user's profile Send private message Reply with quote
comrade



Joined: 16 Jun 2003
Posts: 1137
Location: Russian Federation
comrade
It is amazing you were able to understand what it does, and actually rename the labels and add comments. I wrote this many years ago when I did neither, and looking at it now, I have no idea what it does... this day I would use PCRE though. It even has JIT-like support for performance Smile
Post 27 Feb 2009, 14:42
View user's profile Send private message Visit poster's website AIM Address Yahoo Messenger MSN Messenger ICQ Number Reply with quote
wht36



Joined: 18 Sep 2005
Posts: 106
wht36
PCRE is very powerful but I like your code very much. For simple jobs I think it is very lovely indeed. Thanks again for providing the code!
Post 21 Mar 2009, 10:53
View user's profile Send private message Reply with quote
booter



Joined: 08 Dec 2006
Posts: 67
booter
It's a bit off-topic, but a while ago I wrote a function that looks for a substring in a string (case insensitive). It's rather easy (though less effective) to implement wildcard search based on searching for substrings. Anyway, below is my code, not optimal, though.
Code:
macro Return    rc      {mov  dword [esp+28],rc}  ; inside pushad/popad

proc  Pos strptr:DWORD,search:DWORD
  pushad
  mov    eax,[strptr]
  mov    edi,[search]
  dec    eax
  .tst: ; iterate source
  inc    eax
  mov    edx,edi  ; edx->search
  .tst2: ; iterate search
  mov    cl,byte [edx]
  cmp    cl,0
  je     .found  ; end of search str
  mov    ch,byte [eax]
  cmp    ch,0 ; if we got here - end of source
  je     .fail
  cmp    ch,cl
  je     .same
  ; case insensitivity
  jnc    .nx   ; ch < cl
  xchg   cl,ch
  .nx:   ; now ch > cl, meaning only cl may be lower case
  add    cl,20h  ; to upper case
  cmp    cl,ch
  jne    .tst  ; different
  cmp    cl,61h ; 'A'
  jc     .tst  ; out of range
  cmp    cl,7Bh ;  'Z'+1
  jnc    .tst  ; out of range
  .same:
  inc    edx   ; next in search
  inc    eax   ; next in source
  jmp    .tst2
  .fail:
  Return 0
  popad
  ret
  .found:
  sub    eax,[strptr]   ; current offset
  add    eax,edi
  sub    eax,edx        ; - search len
  inc    eax            ; offset to Pos
  Return eax 
  popad
  ret
endp
    
Post 29 Apr 2009, 06:51
View user's profile Send private message Reply with quote
Madis731



Joined: 25 Sep 2003
Posts: 2141
Location: Estonia
Madis731
Post 29 Apr 2009, 07:52
View user's profile Send private message Visit poster's website Yahoo Messenger MSN Messenger Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2020, Tomasz Grysztar. Also on YouTube, Twitter.

Website powered by rwasa.