flat assembler
Message board for the users of flat assembler.

Index > Windows > [HELP] - Reading formatted text file

Author
Thread Post new topic Reply to topic
ctl3d32



Joined: 30 Dec 2009
Posts: 206
Location: Brazil
ctl3d32 24 Mar 2011, 02:21
Hi folks!

Below is the source of my formatted text file reader (comma delimited array elements reader) and Data.txt file. If you have the patience to read it, could you, please, tell me if it is the right way to do it or if i'll have to take a completely new path?
The program reads the comma delimited numbers in 'Data.txt', calculates their sum and print the result in a messagebox.

Thanks,
ctl3d32

Code:
format PE GUI 4.0
entry start

include 'win32ax.inc'

section '.text' code readable executable

  start:
        invoke  GetModuleHandle,0
        mov     [hProcess],eax
        invoke  CreateFile,lpFileName,\
                           GENERIC_READ,\
                           FILE_SHARE_READ,\
                           NULL,\
                           OPEN_EXISTING,\
                           FILE_ATTRIBUTE_NORMAL,\
                           NULL
        cmp     eax,INVALID_HANDLE_VALUE
        je      exit
        mov     [hFile],eax
        invoke  ReadFile,[hFile],\
                         lpBuffer,\
                         [nNumberOfBytesToRead],\
                         lpNumberOfBytesRead,\
                         NULL
        test    eax,eax
        je      .closehandle
        invoke  HeapCreate,HEAP_GENERATE_EXCEPTIONS,\
                           [dwInitialSize],\
                           [dwMaximumSize]
        test    eax,eax
        je      .closehandle
        mov     [hHeap],eax
        invoke  HeapAlloc,[hHeap],\
                          HEAP_ZERO_MEMORY,\
                          [dwInitialSize]
        test    eax,eax
        je      .destroyheap
        mov     [hAllocHeap],eax
        stdcall GetArrayFromFile,lpBuffer,[hAllocHeap],MyArray
        mov     ebx,[MyArray.length]
  .loop:
        cmp     ebx,0
        je      .printresult
        stdcall getArrayElem,MyArray,ebx,value
        movss   xmm0,[value]
        movss   xmm1,[sum]
        addss   xmm1,xmm0
        movss   [sum],xmm1
        dec     ebx
        jmp     .loop
  .printresult:
        fld     dword [sum]
        fstp    qword [sum2]
        invoke  sprintf,buff,fmt,double [sum2]
        invoke  MessageBox,NULL,buff,NULL,MB_OK
  .freeheap:
        invoke  HeapFree,[hHeap],NULL,[hAllocHeap]
  .destroyheap:
        invoke  HeapDestroy,[hHeap]
  .closehandle:
        invoke  CloseHandle,[hFile]
  exit:
        invoke  ExitProcess,0

proc GetArrayFromFile uses ebx ecx edx edi esi,lpbuffer,hMemory,hArray
  locals
    buffer rb 64h
  endl
        mov     ebx,[lpbuffer]
        lea     edx,[buffer]
        mov     edi,[hMemory]
        mov     eax,[hArray]
        mov     [eax+Array.lpbufferArray],edi
        xor     eax,eax
        xor     ecx,ecx
        xor     esi,esi
  .loop1:
        mov     al,byte [ebx]
        cmp     al,2Ch
        je      .delimiter
        cmp     al,20h
        je      .space
        cmp     al,0Dh
        je      .crlf
        cmp     al,00h
        je      .processbuffer
        jmp     .text
  .delimiter:
        inc     ebx
        jmp     .processbuffer
  .crlf:
        add     ebx,2
        jmp     .processbuffer
  .space:
        inc     ebx
        jmp     .loop1
  .text:
        mov     byte [edx],al
        inc     ebx
        inc     edx
        jmp     .loop1
  .processbuffer:
        mov     byte [edx],00h
        cmp     byte [buffer],0 ;Check if empty buffer (end of file)
        je      .exitloop1
        lea     edx,[buffer]
        invoke  atof,edx
        mov     eax,[hMemory]
        fstp    dword [eax+esi*4]
        mov     eax,[hArray]
        add     [eax+Array.length],1
        add     esi,1
        lea     edx,[buffer]
        jmp     .loop1
  .exitloop1:
        ret
endp

proc getArrayElem A,i,v ;OK
     ; Inputs: A - Vector (by Ref)
     ;         i - index
     ; Output: v - Value at A(i) (by Ref)
     push    ebx     ;save registers
     mov     ebx,[A] ;ebx holds vector's structure base address
     mov     ebx,[ebx+(4h*1)] ;ebx holds the array's address
     mov     eax,[i] ;eax holds line coordinate
     dec     eax
     movss   xmm0,[ebx+(4h*eax)] ;move to xmm0 the value at A(i)
     mov     ebx,[v] ;ebx holds address at v
     movss   [ebx],xmm0 ;move value to address that is in ebx
     pop     ebx ;restore saved registers
     ret
endp

section '.bss' readable writeable

  align 16

  struct Array
    length        dd ?
    lpbufferArray dd ?
  ends

  hProcess            dd ?
  hFile               dd ?
  lpNumberOfBytesRead dd ?
  lpBuffer            rb 50h
  hHeap               dd ?
  hAllocHeap          dd ?
  MyArray             Array
  value               dd ?
  sum                 dd ?
  sum2                dq ?
  buff                rb 10h

section '.data' data readable writeable

  nNumberOfBytesToRead dd 200h
  lpFileName           db 'Data.txt',0
  dwInitialSize        dd 2048
  dwMaximumSize        dd 4096
  fmt                  db 'Sum is: %.3f',0

section '.idata' import data readable writeable

  library kernel32,'KERNEL32.DLL',\
          user32,'USER32.DLL',\
          msvcrt,'MSVCRT.DLL'

  include 'api\kernel32.inc'
  include 'api\user32.inc'

  import  msvcrt,\
          atof,'atof',\
          sprintf,'sprintf'
    


Data.txt
Code:
1.34, 3.45, 6.78
2.00,-45.1, 1.00
    
Post 24 Mar 2011, 02:21
View user's profile Send private message Reply with quote
madmatt



Joined: 07 Oct 2003
Posts: 1045
Location: Michigan, USA
madmatt 24 Mar 2011, 16:48
a simpler way would be to use 'fscanf':

local x:DWORD, y:DWORD, z:DWORD
cinvoke fscanf, "%f %f %f", addr x, addr y, addr z

do this for each set of 3 floats.
Post 24 Mar 2011, 16:48
View user's profile Send private message Reply with quote
vid
Verbosity in development


Joined: 05 Sep 2003
Posts: 7105
Location: Slovakia
vid 24 Mar 2011, 17:13
Using format string " %f , %f , %f" should also skip commas from input. Value other than 3 returned by fscanf() means there was some input error. Note that this way treats all whitespace the same - all spaces, tabs, and end-of-line characters are skipped by space in format string.
Post 24 Mar 2011, 17:13
View user's profile Send private message Visit poster's website AIM Address MSN Messenger ICQ Number Reply with quote
ctl3d32



Joined: 30 Dec 2009
Posts: 206
Location: Brazil
ctl3d32 24 Mar 2011, 17:27
Thanks guys, but let's consider that 'Data.txt' contains an unknown ammount of elements, i.e., 'Data.txt' could be any of, for example, this:
Code:
1.34, 3.45, 6.78, 2.00,-45.1, 1.00, 4.65 
    

or this:
Code:
1.34, 3.45, 6.78, 2.00
3.34, 7.45, 4.78, 1.00
1.94, 3.49, 9.79, 7.00
    

"%f, %f, %f" means to me that my input data structure has to be fixed and known before using fscanf.

What i mean is: if i want to read the first example, should i use "%f, %f, %f, %f, %f, %f, %f"?
If yes, that is not what i want. Because i don't know how many elements my arrays will have.

Thanks,
ctl3d32
Post 24 Mar 2011, 17:27
View user's profile Send private message Reply with quote
madmatt



Joined: 07 Oct 2003
Posts: 1045
Location: Michigan, USA
madmatt 24 Mar 2011, 19:50
If you don't know the count or the count keeps changing you can read in one float at a time.

Code:
local floatdata:DWORD, count:DWORD

mov [count],0
.repeat
   cinvoke fscanf, "%f", addr floatdata
   inc [count]
   mov   edx, [count]
   fld [floatdata]
   fstp [floatarraydata + edx*4]
.until eax = 0 | eax = EOF ;don't know what the value is for EOF.    


Not sure how fast this would go on large data sets.
Post 24 Mar 2011, 19:50
View user's profile Send private message Reply with quote
JohnFound



Joined: 16 Jun 2003
Posts: 3499
Location: Bulgaria
JohnFound 24 Mar 2011, 20:38
Recently I tend to use binary file formats, as more asm-adequate. This simple transition can save you tons of code lines and to speed up the program (depending on the size of the files processed).

At the same time, using binary files makes them hard to be manually edited.
So, I though about some common binary data format, that to be easy for human reading and editing (with any hex editor, or with very simple specialized editor) and in the same time to allow natural machine processing.
I don't have solution for such format (or even suggestion) but wrote this just as a ground for meditation. Smile
Post 24 Mar 2011, 20:38
View user's profile Send private message Visit poster's website ICQ Number Reply with quote
vid
Verbosity in development


Joined: 05 Sep 2003
Posts: 7105
Location: Slovakia
vid 24 Mar 2011, 22:16
madmatt: You forgot to include space in your input format string. Should be " %f", in order to skip blanks between numbers (or is this done by default? I don't think so).

JohnFound: Depends on what you are willing to use. Compared to no library at all, I agree with you. Compared to using libc, I think direct binary format is less optimal solution.
Post 24 Mar 2011, 22:16
View user's profile Send private message Visit poster's website AIM Address MSN Messenger ICQ Number Reply with quote
JohnFound



Joined: 16 Jun 2003
Posts: 3499
Location: Bulgaria
JohnFound 24 Mar 2011, 23:01
vid: Using text files to store data have only two real advantages 1. The format is human readable and editable, so the programmer can leave the editing to the user, not bothering with the complex data editors.
2. The format can be complex and thus flexible and expandable.

On the other hand, the programs deal with binary formats very fast and easy, what is not true for text formats.
Using reusable code (libc for example) only makes creating the program easy, not the execution.
As a price for the programmer laziness the program have to make unnecessary complex processing only to read (write) several bytes of information.
My opinion is that maybe it is possible to be created format that is equally comfortable for the machine and for the human.
Post 24 Mar 2011, 23:01
View user's profile Send private message Visit poster's website ICQ Number Reply with quote
vid
Verbosity in development


Joined: 05 Sep 2003
Posts: 7105
Location: Slovakia
vid 24 Mar 2011, 23:20
100% agree about pros and cons of each way, just my priorities are apparently different Smile
Post 24 Mar 2011, 23:20
View user's profile Send private message Visit poster's website AIM Address MSN Messenger ICQ Number Reply with quote
typedef



Joined: 25 Jul 2010
Posts: 2909
Location: 0x77760000
typedef 25 Mar 2011, 00:27
@ctl3d32, you can just use SQLLite to keep records.
Post 25 Mar 2011, 00:27
View user's profile Send private message Reply with quote
revolution
When all else fails, read the source


Joined: 24 Aug 2004
Posts: 20486
Location: In your JS exploiting you and your system
revolution 25 Mar 2011, 00:31
typedef wrote:
@ctl3d32, you can just use SQLLite to keep records.
And maybe SQLite also. Razz
Post 25 Mar 2011, 00:31
View user's profile Send private message Visit poster's website Reply with quote
ctl3d32



Joined: 30 Dec 2009
Posts: 206
Location: Brazil
ctl3d32 25 Mar 2011, 00:59
SQLite is too much for me. Too complex. Embarassed
Post 25 Mar 2011, 00:59
View user's profile Send private message Reply with quote
madmatt



Joined: 07 Oct 2003
Posts: 1045
Location: Michigan, USA
madmatt 25 Mar 2011, 23:32
JohnFound wrote:
Recently I tend to use binary file formats, as more asm-adequate. This simple transition can save you tons of code lines and to speed up the program (depending on the size of the files processed).

At the same time, using binary files makes them hard to be manually edited.
So, I though about some common binary data format, that to be easy for human reading and editing (with any hex editor, or with very simple specialized editor) and in the same time to allow natural machine processing.
I don't have solution for such format (or even suggestion) but wrote this just as a ground for meditation. Smile


If fasm is used to generate the binary file, you would also be able to edit it easily.

vid wrote:
madmatt: You forgot to include space in your input format string. Should be " %f", in order to skip blanks between numbers (or is this done by default? I don't think so).


Not sure about this, ctl3d32 will just have to try it and see.

_________________
Gimme a sledge hammer! I'LL FIX IT!
Post 25 Mar 2011, 23:32
View user's profile Send private message Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2025, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.