;
; this is an md4 algorithm which creates 2 hashes at once
;
; i've just switched from MASM to FASM recently because of problems with SSE2 code
; i can't assemble this code, now, though it did work with MASM syntax.
;
; i encounter this problem.
;====================================
;flat assembler  version 1.64
;fasy.asm [285]:
;      FF   d[t1], eax, ecx, esi, (05*4), 07, d[t2],ebx,edx,edi
;fasy.asm [68] FF [28]:
;    lea   dwa1, [ebp + dwa1]
;error: invalid address.
;
;
; i thought with the conditional <if rS = 7> this code would be skipped,but it doesn't seem to be.
; anyone know what the problem is?
;
;

format PE console 4.0
entry md4

include 'include/win32a.inc'
                               ; md4 input constants
A_CONSTANT  equ  067452301h
B_CONSTANT  equ  0efcdab89h
C_CONSTANT  equ  098badcfeh
D_CONSTANT  equ  010325476h

d equ   dword

if defined NTLM1
   LOWER_LIMIT   EQU   5*4              ; ntlm1  (10 characters)
else
   LOWER_LIMIT   EQU   3*4              ; md4 (12 characters)
end if

;######################################################
macro FF dwa1,dwb1,dwc1,dwd1,dwx,rS,dwa2,dwb2,dwc2,dwd2
{
 mov   ebp, dwc1
 mov   esp, dwc2

 xor   ebp, dwd1
 xor   esp, dwd2

 and   ebp, dwb1
 and   esp, dwb2

 xor   ebp, dwd1
 xor   esp, dwd2

 if dwx <= LOWER_LIMIT | dwx = 14*4
    add   ebp, d[md_input1+dwx]
    add   esp, d[md_input2+dwx]
 end if

 if rS = 7
    add   ebp, dwa1
    add   esp, dwa2

    rol   ebp, rS
    rol   esp, rS

    mov   dwa1, ebp
    mov   dwa2, esp
 else
    lea   dwa1, [ebp + dwa1]
    lea   dwa2, [esp + dwa2]

    rol  dwa1, rS
    rol  dwa2, rS
 end if
}
;######################################################
macro GG dwa1,dwb1,dwc1,dwd1,dwx,rS,dwa2,dwb2,dwc2,dwd2
{
   mov   ebp, dwc1
   mov   esp, dwc1

   and   ebp, dwd1
   or    esp, dwd1
   and   esp, dwb1
   or    ebp, esp

   if dwx <= LOWER_LIMIT | dwx = 14*4
      add   ebp, d[md_input1+dwx]
   end if

   if rS = 5
      add   ebp, dwa1
      add   ebp, 05a827999h

      rol   ebp, rS
      mov   dwa1, ebp
   else
      ;lea   dwa1, [ebp + dwa1 + 05a827999h]
      rol   dwa1, rS
   end if

   mov   ebp, dwc2
   mov   esp, dwc2

   and   ebp, dwd2
   or    esp, dwd2
   and   esp, dwb2
   or    ebp, esp
   
   if dwx <= LOWER_LIMIT | dwx = 14*4
      add   ebp, d[md_input2+dwx]
   end if

   if rS = 5
      add   ebp, dwa2
      add   ebp, 05a827999h

      rol   ebp, rS
      mov   dwa2, ebp
   else
      lea   dwa2, [ebp + dwa2 + 05a827999h]
      rol  dwa2, rS
   end if
}
;######################################################
macro HH dwa1,dwb1,dwc1,dwd1,dwx,rS,dwa2,dwb2,dwc2,dwd2
{
 mov   ebp, dwb1
 mov   esp, dwb2

 xor   ebp, dwc1
 xor   esp, dwc2

 xor   ebp, dwd1
 xor   esp, dwd2
 
 if dwx <= LOWER_LIMIT | dwx = 14*4
    add   ebp, d[md_input1+dwx]
    add   esp, d[md_input2+dwx]
 end if

 if rS = 9
    add   ebp, dwa1
    add   esp, dwa2

    add   ebp, 06ed9eba1h
    add   esp, 06ed9eba1h

    rol   ebp, rS
    rol   esp, rS

    mov   dwa1, ebp
    mov   dwa2, esp
 else
    lea   dwa1, [ebp + dwa1 + 06ed9eba1h]
    lea   dwa2, [esp + dwa2 + 06ed9eba1h]

    rol  dwa1, rS
    rol  dwa2, rS
 end if
}
;######################################################
section '.data' data readable writeable

hash_format     db   10,'Hash %d:%08x %08x %08x %08x',00

t1   rd      1
t2   rd      1

md_input1    equ     string_a
md_input2    equ     string_b

temp_ebp     rd  1
temp_esp     rd  1

string_a     dw  'a','a','a','a',80h   ; should be FA5664875FFADF0AF61ABF9B097FA46F
string_a_len equ 4*2
             dw  64   dup (0)

string_b     dw  'w','i','l','l','i','a','m',80h  ; should be 6B6E0FB2ED246885B98586C73B5BFB77
string_b_len equ 7*2
             dw  64   dup (0)

section '.code' code readable executable

md4:
      mov   d[temp_ebp], ebp
      mov   d[temp_esp], esp

      mov   eax, string_a_len
      mov   ebx, string_b_len            ; calc bits
      shl   eax, 3
      shl   ebx, 3

      mov   d[string_a+14*4], eax ;string_a_len*8         ; number of bits
      mov   d[string_b+14*4], ebx ;string_b_len*8

      ;mov   eax, A_CONSTANT
      ;mov   ebx, A_CONSTANT

      ;mov   ecx, B_CONSTANT
      ;mov   edx, B_CONSTANT

      ;mov   esi, C_CONSTANT
      ;mov   edi, C_CONSTANT

      ;mov   d[t1], D_CONSTANT
      ;mov   d[t2], D_CONSTANT

      ;FF   eax, ecx, esi, d[t1], (00*4), 03, ebx,edx,edi,d[t2]
      ;FF   d[t1], eax, ecx, esi, (01*4), 07, d[t2],ebx,edx,edi
      ;FF   esi, d[t1], eax, ecx, (02*4), 11, edi,d[t2],ebx,edx
      ;FF   ecx, esi, d[t1], eax, (03*4), 19, edx,edi,d[t2],ebx

      ;=======================================
      mov   eax, d[md_input1+00*04]
      mov   ebx, d[md_input2+00*04]

      mov   esi, d[md_input1+01*04]
      mov   edi, d[md_input2+01*04]

      mov   ecx, d[md_input1+02*04]
      mov   edx, d[md_input2+02*04]

      lea   eax, [eax-1]
      lea   ebx, [ebx-1]

      rol   eax, 3
      rol   ebx, 3
      ;=======================================
      mov   ebp, (B_CONSTANT xor C_CONSTANT)
      mov   esp, (B_CONSTANT xor C_CONSTANT)

      and   ebp, eax
      and   esp, ebx

      xor   ebp, C_CONSTANT
      xor   esp, C_CONSTANT

      lea   ebp, [ebp + esi + D_CONSTANT]
      lea   esp, [esp + edi + D_CONSTANT]

      rol   ebp, 7
      rol   esp, 7

      mov   [t1], ebp
      mov   [t2], esp
      ;=======================================
      mov   esi, eax
      mov   edi, ebx

      xor   esi, B_CONSTANT
      xor   edi, B_CONSTANT

      and   esi, ebp
      and   edi, esp

      xor   esi, B_CONSTANT
      xor   edi, B_CONSTANT

      lea   esi, [esi + ecx + C_CONSTANT]
      lea   edi, [edi + edx + C_CONSTANT]

      rol   esi, 11
      rol   edi, 11
      ;=======================================
      mov   ecx, ebp
      mov   edx, esp

      xor   ecx, eax
      xor   edx, ebx

      and   ecx, esi
      and   edx, edi

      xor   ecx, eax
      xor   edx, ebx

      lea   ecx, [ecx + B_CONSTANT]
      lea   edx, [edx + B_CONSTANT]

      rol   ecx, 19
      rol   edx, 19

      FF   eax, ecx, esi, d[t1], (04*4), 03, ebx,edx,edi,d[t2]
      FF   d[t1], eax, ecx, esi, (05*4), 07, d[t2],ebx,edx,edi
      FF   esi, d[t1], eax, ecx, (06*4), 11, edi,d[t2],ebx,edx
      FF   ecx, esi, d[t1], eax, (07*4), 19, edx,edi,d[t2],ebx

      FF   eax, ecx, esi, d[t1], (08*4), 03, ebx,edx,edi,d[t2]
      FF   d[t1], eax, ecx, esi, (09*4), 07, d[t2],ebx,edx,edi
      FF   esi, d[t1], eax, ecx, (10*4), 11, edi,d[t2],ebx,edx
      FF   ecx, esi, d[t1], eax, (11*4), 19, edx,edi,d[t2],ebx

      FF   eax, ecx, esi, d[t1], (12*4), 03, ebx,edx,edi,d[t2]
      FF   d[t1], eax, ecx, esi, (13*4), 07, d[t2],ebx,edx,edi
      FF   esi, d[t1], eax, ecx, (14*4), 11, edi,d[t2],ebx,edx
      FF   ecx, esi, d[t1], eax, (15*4), 19, edx,edi,d[t2],ebx

      ;=======================================================

      GG   eax, ecx, esi, d[t1], (00*4), 03, ebx,edx,edi,d[t2]
      GG   d[t1], eax, ecx, esi, (04*4), 05, d[t2],ebx,edx,edi
      GG   esi, d[t1], eax, ecx, (08*4), 09, edi,d[t2],ebx,edx
      GG   ecx, esi, d[t1], eax, (12*4), 13, edx,edi,d[t2],ebx

      GG   eax, ecx, esi, d[t1], (01*4), 03, ebx,edx,edi,d[t2]
      GG   d[t1], eax, ecx, esi, (05*4), 05, d[t2],ebx,edx,edi
      GG   esi, d[t1], eax, ecx, (09*4), 09, edi,d[t2],ebx,edx
      GG   ecx, esi, d[t1], eax, (13*4), 13, edx,edi,d[t2],ebx

      GG   eax, ecx, esi, d[t1], (02*4), 03, ebx,edx,edi,d[t2]
      GG   d[t1], eax, ecx, esi, (06*4), 05, d[t2],ebx,edx,edi
      GG   esi, d[t1], eax, ecx, (10*4), 09, edi,d[t2],ebx,edx
      GG   ecx, esi, d[t1], eax, (14*4), 13, edx,edi,d[t2],ebx

      GG   eax, ecx, esi, d[t1], (03*4), 03, ebx,edx,edi,d[t2]
      GG   d[t1], eax, ecx, esi, (07*4), 05, d[t2],ebx,edx,edi
      GG   esi, d[t1], eax, ecx, (11*4), 09, edi,d[t2],ebx,edx
      GG   ecx, esi, d[t1], eax, (15*4), 13, edx,edi,d[t2],ebx

      ;=======================================================

      HH   eax, ecx, esi, d[t1], (00*4), 03, ebx,edx,edi,d[t2]
      HH   d[t1], eax, ecx, esi, (08*4), 09, d[t2],ebx,edx,edi
      HH   esi, d[t1], eax, ecx, (04*4), 11, edi,d[t2],ebx,edx
      HH   ecx, esi, d[t1], eax, (12*4), 15, edx,edi,d[t2],ebx

      HH   eax, ecx, esi, d[t1], (02*4), 03, ebx,edx,edi,d[t2]
      HH   d[t1], eax, ecx, esi, (10*4), 09, d[t2],ebx,edx,edi
      HH   esi, d[t1], eax, ecx, (06*4), 11, edi,d[t2],ebx,edx
      HH   ecx, esi, d[t1], eax, (14*4), 15, edx,edi,d[t2],ebx

      HH   eax, ecx, esi, d[t1], (01*4), 03, ebx,edx,edi,d[t2]
      HH   d[t1], eax, ecx, esi, (09*4), 09, d[t2],ebx,edx,edi
      HH   esi, d[t1], eax, ecx, (05*4), 11, edi,d[t2],ebx,edx
      HH   ecx, esi, d[t1], eax, (13*4), 15, edx,edi,d[t2],ebx

      HH   eax, ecx, esi, d[t1], (03*4), 03, ebx,edx,edi,d[t2]
      HH   d[t1], eax, ecx, esi, (11*4), 09, d[t2],ebx,edx,edi
      HH   esi, d[t1], eax, ecx, (07*4), 11, edi,d[t2],ebx,edx
      HH   ecx, esi, d[t1], eax, (15*4), 15, edx,edi,d[t2],ebx
      
      mov  ebp, d[temp_ebp]
      mov  esp, d[temp_esp]

      bswap     eax
      bswap     ebx
      bswap     ecx
      bswap     edx
      bswap     esi
      bswap     edi
      pushad
      mov   eax, d[t1]
      mov   ebx, d[t2]
      bswap eax
      bswap ebx
      mov   d[t1],eax
      mov   d[t2],ebx
      popad

      push   edx
      invoke printf,hash_format,1,eax,ecx,esi,d[t1]
      add    esp, 6*4

      pop    edx
      invoke printf,hash_format,2,ebx,edx,edi,d[t2]
      add    esp, 6*4

      invoke ExitProcess,0

section '.idata' import data readable writeable

   library kernel,'kernel32.dll',msvcrt,'msvcrt.dll'

   import kernel,\
	  ExitProcess,'ExitProcess'

   import msvcrt,\
          printf,'printf'