flat assembler
Message board for the users of flat assembler.

Index > Windows > 1.5Kb Expression Evaluator

Author
Thread Post new topic Reply to topic
rain_storm



Joined: 05 Apr 2007
Posts: 67
Location: Ireland
rain_storm 02 Aug 2011, 22:10
This is a port of strchr's expression evaluator to FASM. This has been sleeping on my harddrive for a couple of years now. I managed to crunch this down to 1536 bytes of code + data + pe header. I leave it as an exercise to the reader to do better.

What does it do? First of all this evaluator does not support assignment.
It compiles expressions of the following form to x87 floating point.
Code:
((1+2)*(3-4) - (5+6)*(7-8)) / 9
    


The compilation is done to memory, The mnemonics are printed to the console as they are being compiled. Then the compiler executes the compiled code to obtain the result of the expression which is also printed to the console.

Code:
 ;
 ; CODEGEN.ASM
 ;
 ; An FASM port of the expression evaluator from strchr.com
 ; http://www.strchr.com/expression_evaluator
 ;
 ; please note that asignment '=' is not supported, This can
 ; handle expressions consisting of numbers and the following
 ; operators:
 ;  + - * /
 ;

 format  pe console
 entry   WinMainCRTStartup

 include 'win32a.inc'

 MAX_BUFFER_SIZE = 0x1000

;mnemonics   opcodes (big endian)
 OP_FADDP  = 0xC1DE
 OP_FSUBP  = 0xE9DE
 OP_FMULP  = 0xC9DE
 OP_FDIVP  = 0xF9DE
 OP_FCHS   = 0xE0D9
 OP_RETNOP = 0x90C3
 OP_FLD    = 0x05DD
 OP_FSTP   = 0x1DDD

 CG_FADDP  = 0
 CG_FSUBP  = 1
 CG_FMULP  = 2
 CG_FDIVP  = 3
 CG_FCHS   = 4
 CG_RETNOP = 5
 CG_FLD    = 6
 CG_FSTP   = 7

;error codes
 NO_ERROR    = 0
 PAREN_ERROR = 1
 UNREC_ERROR = 2
 DIVID_ERROR = 3
 NOMEM_ERROR = 4
 INVAL_ERROR = 5

 macro m2m Target, Source
 {
      push    Source
      pop     Target
 }

 ;section '.libs' import data readable writable
 data import
      library kernel32, 'kernel32.dll',\
              msvcrt,   'msvcrt.dll'

      include 'api\kernel32.inc'
      import  msvcrt,\
              printf, 'printf',\
              strtod, 'strtod'
 end data
      ;temporarily this read only data is kept here for size reasons
      ;it should be kept in the data section or read in from file
      tblError     dd szParenErr
                   dd szUnRecErr
                   dd szDividErr
                   dd szMemoryErr
                   dd szInvalidErr
      tblMnemonics dd szFaddp
                   dd szFsubp
                   dd szFmulp
                   dd szFdivp
                   dd szFchs
                   dd szReturn
                   dd szFldVar
                   dd szFstpVar
      tblOpCodes   dw OP_FADDP
                   dw OP_FSUBP
                   dw OP_FMULP
                   dw OP_FDIVP
                   dw OP_FCHS
                   dw OP_RETNOP
                   dw OP_FLD
                   dw OP_FSTP
      szHeader     db 'Enter an expression (or empty string to exit)',0x0D,0x0A,0
      szResult     db 'Result : %f',0x0D,0x0A,0
      szError      db 'Error : %s',0x0D,0x0A,0
      szParenErr   db 'Unmatched parenthesis!',0
      szUnRecErr   db 'Unrecognised charictor!',0
      szDividErr   db 'Divide by zero!',0
      szMemoryErr  db 'Out of memory!',0
      szInvalidErr db 'Invalid opcode!',0
      szFaddp      db 'faddp',0
      szFsubp      db 'fsubp',0
      szFmulp      db 'fmulp',0
      szFdivp      db 'fdivp',0
      szFchs       db 'fchs',0
      szReturn     db 'ret',0
      szFldVar     db '  fld   [%p]',0x0D,0x0A,0 ; pointer
      szFstpVar    db '  fstp  [%p]',0x0D,0x0A,0 ; pointer
      szOperation  db '  %s',0x0D,0x0A,0        ; szOpString
     ;szFldVal     db '  fld   %g',0x0D,0x0A,0   ; value



 ;section '.text' code readable writable executable
   WinMainCRTStartup:
        ; all data accesses will be an offset from this pointer (generates smaller opcodes)
        mov     ebp, lpEndPos
       ;invoke  GetModuleHandleA
       ;mov     [ebp - 0x14], eax  ; hInstance
       ;invoke  GetCommandLineA
       ;mov     [ebp - 0x10], eax  ; lpCmdLine
       ;invoke  GetProcessHeap
       ;mov     [ebp - 0x0C], eax  ; hHeap
       ;invoke  HeapAlloc, eax, HEAP_ZERO_MEMORY, MAX_BUFFER_SIZE
       ;mov     [ebp - 0x08], eax  ; lpHeap
        mov     dword [ebp - 0x08], aInput ; lpHeap temporarily its a local buffer should be a heap

   ;WinMain main flow control routine
   WinMain:
        ; print header
        cinvoke printf, szHeader

   .Main:
        ; get input from console
        pusha
        mov     edi, [ebp - 0x08] ; lpHeap
        push    NULL
        push    lpPointer
        push    MAX_BUFFER_SIZE
        push    edi
        mov     ecx, MAX_BUFFER_SIZE
        xor     eax, eax
        rep     stosb
        invoke  GetStdHandle, STD_INPUT_HANDLE
        invoke  ReadFile, eax
        popa

        ; if input = newline then exit
        mov     esi, [ebp - 0x08] ; lpHeap
        cmp     word [esi], 0x0A0D
        jz      .Finish

        ; evaluate the statement
        call    Statement

   .CheckForErrors:
        jecxz   .PrintResult
        cinvoke printf, szError, [tblError - 0x04 + ecx*0x04]
        jmp     .Main

   .PrintResult:
        ; ebp + 0x14 -> qword qResult low, ebp + 0x18 -> qword qResult high
        cinvoke printf, szResult, dword [ebp + 0x14], dword [ebp + 0x18]
        jmp     .Main

   .Finish:
        ; return code zero for success
        invoke  ExitProcess, 0


   ;-----------------------------------------------------------------------------
   ;Generate code and data
   ;-----------------------------------------------------------------------------
                                            ;
 macro CodeGen opCode                       ;
 {                                          ;
       mov     cl, opCode                   ;
       call    EmitCode                     ;
 }                                          ;
   EmitCode:                                ;
       movzx    ecx, cl                     ; make ecx an 8bit value
       mov      ax, [tblOpCodes + ecx*2]    ; grab the opcode from the table
       stosw                                ; store the opcode to the code being generated
       cmp      cl, CG_FLD                  ; the opcodes are arranged such at math ops are first and load ops come later
       jb       .Operation                  ; if the opcode is less than fld than we grab a faddp fsubp fmulp or fdivp
       jz       .LoadVar                    ; if equal to fld than we generate an fld [address]
                                            ; if above than it can only be a store fstp [address]
   .StoreVar:                               ;
       mov      eax, qResult                ; grab the 32 bit address
       jmp      .Address                    ; and go store it
                                            ;
   .Operation:                              ; ecx was an index into the table of opcodes to use
       mov      eax, [tblMnemonics + ecx*4] ; now we use it as an index into the string pointers for printing
       mov      ecx, szOperation            ; eax = offset to string and ecx is base pointer to string
       jmp      .PrintMnemonic              ; go and print the string
                                            ;
   .LoadVar:                                ; Grab the address of the variable and take this opertunity
       mov      eax, [lpIndex]              ; to write the value of the variable to that address
       fstp     qword [eax]                 ; the value is already loaded in st0
       add      [lpIndex], 0x00000008       ; increment the variable address to point to the next double (QWORD)
                                            ;
   .Address:                                ;
       stosd                                ; store the 32 bit address (this appends an fld [@] or an fstp [@]
       mov      ecx, [tblMnemonics + ecx*4] ; grab the string version of the opcode
                                            ;
   .PrintMnemonic:                          ;
       pusha                                ; now ecx contains the string and eax contains the address
       cinvoke  printf, ecx, eax            ; print the opcode string with the address in square braces
       popa                                 ;
       xor      ecx, ecx                    ;
       ret                                  ;

   ;-----------------------------------------------------------------------------
   ;Parse a statement
   ;-----------------------------------------------------------------------------
   Statement:                           ;
        finit                           ; reset the fpu state
        mov     edi, aCode              ; point edi to code buffer for storing opcodes
        mov     [lpIndex], aData        ; reset data index pointer for storing values
                                        ;
   .IsExpression:                       ;
        xor     edx, edx                ; reset parenthesis counter
        xor     ecx, ecx                ; reset error message
        stdcall ParseExpression         ; parse an expression
        jecxz   .GetNewLine             ; trap any errors
        ret                             ; return error code
                                        ;
   .GetNewLine:                         ;
        cmp     word [esi], 0x0A0D      ; have we reached the end of this line
        jz      .GotStatement           ; if so go back to block
        mov     cl, UNREC_ERROR         ; issue error code
       ;fldz                            ; invalidate statement
        ret                             ; don't execute the expression
                                        ;
   .GotStatement:                       ;
        CodeGen CG_RETNOP               ; emit a return opcode to avoid executing invalid opcodes and/or data
        call    aCode                   ; execute the code that was generated for this expression
        ;should check result for infinity (divide by zero)
        ret                             ; return statement

   ;-----------------------------------------------------------------------------
   ;Parse an expression
   ;-----------------------------------------------------------------------------
   ParseExpression:                     ;
        call    ParseSummands           ; parse addition and subtraction
        jecxz   .ParenthesisAreClosed   ; trap any errors
        ret                             ; return error code
                                        ;
   .ParenthesisAreClosed:               ;
        or      edx, edx                ; have all open parenthesis been closed
        jz      .GotExpression          ; if so then return our result
        mov     cl, PAREN_ERROR         ; issue an error code
       ;fldz                            ; invalidate expression
                                        ;
   .GotExpression:                      ;
       ;fstp    qword [ebp + 0x14]      ; finally we have our result store it in qResult
        CodeGen CG_FSTP                 ; generate code for storing result
        ret                             ; return expression

   ;-----------------------------------------------------------------------------
   ;Parse an addition or subtraction
   ;-----------------------------------------------------------------------------
   ParseSummands:                       ;
        call    ParseFactors            ; parse multiplication and division
                                        ;
   .GrabAnotherSummand:                 ;
        jecxz   .GetSummandOp           ; trap any errors
        ret                             ; return error code
                                        ;
   .GetSummandOp:                       ;
        call    SkipWhiteSpace          ; skip over white space
       ;mov     [ebp + 0x10], eax;      ; save the operation string  in szSummandOp
       ;mov     [ebp + 0x08], esi       ; save the pointer position  in lpSummand
                                        ;
   .IsAtomOrSummand:                    ; check if we are dealing with an atom or a summand
        cmp     al, '-'                 ; check for a subtraction operation
        jz      .SkipOverSummandOp      ; if it was a subtraction operation skip over it
        cmp     al, '+'                 ; check for an addition operation
        jz      .SkipOverSummandOp      ; if it was an addition operation skip over it
                                        ;
   .GotSummand:                         ;
        ret                             ; return summand
                                        ;
   .SkipOverSummandOp:                  ;
        inc     esi                     ; skip over summand operation
        push    eax                     ; preserve summand operation
        call    ParseFactors            ; parse multiplication and division
        pop     eax                     ; restore summand operation
                                        ;
   .CheckSummandOp:                     ;
       ;cmp     byte [ebp + 0x10], '-'  ; is the operation in szSummandOp a subtraction
        cmp     al, '-'                 ; was the summand operation a subtraction
        jnz     .DoPlus                 ; if not go do an addition
                                        ;
   .DoSubtract:                         ;
       ;fsubp                           ; summand = left factor - right factor
        CodeGen CG_FSUBP                ; generate code for subtraction
        jmp     .GrabAnotherSummand     ; continue parsing more factors
                                        ;
   .DoPlus:                             ;
       ;faddp                           ; summand = left factor + right factor
        CodeGen CG_FADDP                ; generate code for addition
        jmp     .GrabAnotherSummand     ; continue parsing more factors

   ;-----------------------------------------------------------------------------
   ;Parse a multiplication or division
   ;-----------------------------------------------------------------------------
   ParseFactors:                        ;
        call    ParseAtom               ; parse a number or a sub-expression
                                        ;
   .GrabAnotherFactor:                  ;
        jecxz   .GetFactorOp            ; trap any errors
        ret                             ; return error code
                                        ;
   .GetFactorOp:                        ;
        call    SkipWhiteSpace          ; skip over white space
       ;mov     [ebp + 0x0C], eax       ; save the operation string in szFactorOp
       ;mov     [ebp + 0x04], esi       ; save the pointer position in lpFactor
                                        ;
   .IsAtomOrFactor:                     ; check if we are dealing with an atom or a factor
        cmp     al, '/'                 ; check for a division operation
        jz      .SkipOverFactorOp       ; if it was a division operation skip over it
        cmp     al, '*'                 ; check for a multiplication operation
        jz      .SkipOverFactorOp       ; if it was a multiplication operation skip over it
                                        ;
   .GotFactor:                          ;
        ret                             ; otherwise return factor
                                        ;
   .SkipOverFactorOp:                   ;
        inc     esi                     ; skip over factor operation
        push    eax                     ; preserve factor operation
        call    ParseAtom               ; parse a number or a sub-expression
        pop     eax                     ; restor factor operation
                                        ;
   .CheckFactorOp:                      ;
       ;cmp     byte [ebp + 0x0C], '/'  ; is the operation in szFactorOp a division
        cmp     al, '/'                 ; was the factor operation a division
        jnz     .DoMultiply             ; if not go do a multiplication
                                        ;
   .DoDivide:                           ;
       ;fdivp                           ; factor = left atom / right atom
        CodeGen CG_FDIVP                ; generate code for division
        jmp     .GrabAnotherFactor      ; continue parsing more factors
                                        ;
   .DoMultiply:                         ;
       ;fmulp                           ; factor = left atom * right atom
        CodeGen CG_FMULP                ; generate code for multiplication
        jmp     .GrabAnotherFactor      ; continue parsing more factors

   ;-----------------------------------------------------------------------------
   ;Parse a number or a sub-expression
   ;-----------------------------------------------------------------------------
   ParseAtom:                           ;
        call    SkipWhiteSpace          ; skip over spaces
                                        ;
   .UnaryMinus:                         ;
        xor     ebx, ebx                ; unary minus = false
        cmp     al, '-'                 ; check for unary minus
        jnz     .UnaryPlus              ; if not a unary minus check for unary plus
        inc     esi                     ; skip over unary minus
        not     ebx                     ; unary minus = true
                                        ;
   .UnaryPlus:                          ;
        cmp     byte [esi], '+'         ; check for unary plus
        jnz     .OpenParenthesis        ; if not unary plus check for open parenthesis
        inc     esi                     ; skip over unary plus
                                        ;
   .OpenParenthesis:                    ;
        cmp     byte [esi], '('         ; check for open parenthesis
        jnz     .Value                  ; if not open parenthesis get atom value
        inc     esi                     ; skip over open parenthesis
        inc     edx                     ; increase the parenthesis counter
        push    ebx                     ; preserve the unary minus
        call    ParseSummands           ; parse the sub expression
        pop     ebx                     ; restore the unary minus
        jecxz   .CloseParenthesis       ; trap any errors
        ret                             ; return error code
                                        ;
   .CloseParenthesis:                   ;
        cmp     byte [esi], ')'         ; check for closing parenthesis
        jz      .GotSubExpression       ; if we matched the parenthesis then we have a sub expression
        mov     cl, PAREN_ERROR         ; issue an error code
       ;fldz                            ; invalidate atom
        ret                             ; return error code and invaled atom
                                        ;
   .GotSubExpression:                   ;
        inc     esi                     ; skip over the closing parenthesis
        dec     edx                     ; decrease the parenthesis counter
        jmp     .DoUnaryMinus           ; go deal with that unary minus
                                        ;
   .Value:                              ;
        pusha                           ; preserve parser state
        cinvoke strtod, esi, ebp        ; convert string to double (ebp points to end of atom pointer)
        popa                            ; restore parser state
        cmp     esi, [ebp]              ; check if the atom is a null string (by comparing start of atom to end of atom pointers)
        jnz     .SkipOverAtom           ; if not a null string update pointer to end of atom
        mov     cl, UNREC_ERROR         ; issue an error code
       ;fldz                            ; invalidate atom
        ret                             ; return error code and invalid atom
                                        ;
   .SkipOverAtom:                       ;
        mov     esi, [ebp]              ; skip over atom string (by swapping to end of atom pointer)
        CodeGen CG_FLD                  ; generate code to load a value from memory
                                        ;
   .DoUnaryMinus:                       ;
        or      ebx, ebx                ; was there a unary minus to deal with
        jz      .GotAtom                ; if not then just return the value
       ;fchs                            ; otherwise handle the unary minus
        CodeGen CG_FCHS                 ; generate code to handle the unary minus
                                        ;
   .GotAtom:                            ;
        ret                             ; return atom

   ;-----------------------------------------------------------------------------
   ;Skip over spaces
   ;-----------------------------------------------------------------------------
   SkipOverWhiteSpace:                  ;
        inc     esi                     ; increment pointer
   SkipWhiteSpace:                      ;
        cmp     byte [esi], ' '         ; if pointing at white space
        jz      SkipOverWhiteSpace      ; increment the pointer again
        mov     eax, [esi]              ; grab the charictor
        ret                             ; return to caller

   ; executable code buffer
   aCode:        dd MAX_BUFFER_SIZE dup ?

 ;section '.data' data readable writable
     ;hInstance   dd ?
     ;lpCmdLine   dd ?
     ;hHeap       dd ?
     lpHeap      dd ?
     lpPointer   dd ?
     lpEndPos    dd ?
     lpFactor    dd ?
     lpSummand   dd ?
     szFactorOp  dd ?
     szSummandOp dd ?
     qResult     dq ?
     lpIndex     dd ?
     ; readable / writable data buffer
     aData       dd MAX_BUFFER_SIZE dup ?
     ; source code unput buffer
     aInput      db MAX_BUFFER_SIZE dup ?, ?
    


Description:
Download
Filename: CODEGEN.ZIP
Filesize: 5.7 KB
Downloaded: 281 Time(s)

Post 02 Aug 2011, 22:10
View user's profile Send private message Reply with quote
typedef



Joined: 25 Jul 2010
Posts: 2909
Location: 0x77760000
typedef 03 Aug 2011, 00:53
Sleek, maybe you could also support parentheses and add an option so we don't have to see the verbosity.

Laughing
Post 03 Aug 2011, 00:53
View user's profile Send private message Reply with quote
rain_storm



Joined: 05 Apr 2007
Posts: 67
Location: Ireland
rain_storm 03 Aug 2011, 19:23
The verbose output is the whole point of the exercise, Actually the exercise was to get that level of output in such a small size. Hence the name "CODEGEN".
Post 03 Aug 2011, 19:23
View user's profile Send private message Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2025, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.