flat assembler
Message board for the users of flat assembler.

Index > Windows > Good console input/output on Win32

Author
Thread Post new topic Reply to topic
Grom PE



Joined: 13 Mar 2008
Posts: 114
Location: i@grompe.org.ru
Grom PE 19 Feb 2017, 09:23
There's a lot of misinformation floating around the internet claiming that Windows console doesn't fully support Unicode, or broken implementations, or obscure C/C++ workarounds, so I decided to make a showcase for proper and pure WinAPI handling of Unicode in console.

Code:
; Good console input/output on Win32
; for flat assembler by Grom PE
; What it does:
; - reads unicode text from keyboard and outputs unicode text to console
; - if redirects are used, the redirected input or output is assumed to
;   be in the the current console codepage, as UTF-16 files and
;   stdin/stdout communication on Windows is not a standard or expected
;   thing. In this case, as chcp doesn't work with UTF-16, UTF-8 is the
;   only way to have full range unicode support. Beware that UTF-8
;   "codepage" will break every single program that's not specifically
;   made to handle this.
;
; Notes:
; - obviously. your console font should be set to unicode
; - to support unicode input/output independent from current locale,
;   need to use console-specific functions instead of file-specific.
; - but console functions cannot work with redirected input/output, so
;   need to detect the redirect and use file functions if so.
; - could use GetFileType to detect redirects (classic way) instead of 
;   GetConsoleScreenBufferInfo & GetConsoleMode, but then redirect
;   to/from NUL won't be detected.
; - it is assumed that you want redirected output and/or output in
;   current console codepage (chcp to change, 65001 for UTF-8 is also
;   supported)
; - cmd /u/c "echo [text]|program" still won't work with full unicode
;   because cmd thinks we won't be able to work with it and transmits
;   in current locale anyway. Only UTF-8 "codepage" can be used to
;   transmit full unicode via pipes.

format PE console

include 'win32a.inc'

BUFFER_SIZE = 1024

  invoke GetStdHandle, STD_OUTPUT_HANDLE
  mov    [h_out], eax
  invoke GetConsoleScreenBufferInfo, eax, buffer ; ignore data received
  test   eax, eax
  jne    @f
  mov    ebx, output_redirected
  call   printcstr
  mov    [redir_out], 1
@@:

  invoke GetStdHandle, STD_INPUT_HANDLE
  mov    [h_in], eax
  invoke GetConsoleMode, eax, buffer ; ignore data received
  test   eax, eax
  jne    @f
  mov    ebx, input_redirected
  call   printcstr
  mov    [redir_in], 1
@@:
  
  mov    ebx, reading
  call   printcstr

  push   0
  push   charsrw
  push   BUFFER_SIZE
  push   buffer
  push   [h_in] ; exact same arguments for both ReadConsoleW and ReadFile
  cmp    [redir_in], 1
  je     .file_in
.console_in:
  invoke ReadConsoleW
  jmp    @f
.file_in:
  invoke ReadFile

  mov    esi, buffer
  mov    edi, buffer_copy
  mov    ecx, [charsrw]
  rep    movsb

  invoke GetConsoleCP
  invoke MultiByteToWideChar, eax, 0, buffer_copy, [charsrw], buffer, BUFFER_SIZE/2
  test   eax, eax
  jz     mb_error
  mov    [charsrw], eax
@@:

  mov    ebx, [charsrw]
  call   printnum
  
  mov    ebx, writing
  call   printcstr

  push   0
  push   charsrw
  push   [charsrw]
  push   buffer
  push   [h_out] ; same arguments for both WriteConsoleW and WriteFile
  cmp    [redir_out], 1
  je     .file_out
.console_out:
  invoke WriteConsoleW
  jmp    @f
.file_out:
  mov    esi, buffer
  mov    edi, buffer_copy
  mov    ecx, [charsrw]
  rep    movsw

  invoke GetConsoleOutputCP
  invoke WideCharToMultiByte, eax, 0, buffer_copy, [charsrw], buffer, BUFFER_SIZE, 0, 0
  test   eax, eax
  jz     mb_error
  mov    [charsrw], eax
  mov    [esp+8], eax ; fixes UTF-8 character count

  invoke WriteFile
@@:

  mov    ebx, [charsrw]
  call   printnum
  
  invoke ExitProcess, 0

mb_error:
  mov    ebx, error_while_converting
  call   printcstr
  invoke ExitProcess, 1

; ebx = number to output
printnum:
  push  ebx
  push  fmt
  push  buffer_printnum
  call  [wsprintfA]
  add   esp, 12
  mov   ebx, buffer_printnum
  jmp   printcstr

; ebx = string to output
printcstr:
  push  0
  push  esp
  push  ebx
  call  [lstrlen]
  push  eax
  push  ebx
  push  [h_out]
  call  [WriteFile]
  ret

align 4

data import

 library kernel32,'kernel32.DLL',user32,'user32.dll'

 import kernel32,\
  ExitProcess,'ExitProcess',\
  GetStdHandle,'GetStdHandle',\
  GetConsoleScreenBufferInfo,'GetConsoleScreenBufferInfo',\
  GetConsoleMode,'GetConsoleMode',\
  GetConsoleCP,'GetConsoleCP',\
  GetConsoleOutputCP,'GetConsoleOutputCP',\
  WriteFile,'WriteFile',\
  ReadFile,'ReadFile',\
  WriteConsoleW,'WriteConsoleW',\
  ReadConsoleW,'ReadConsoleW',\
  MultiByteToWideChar,'MultiByteToWideChar',\
  WideCharToMultiByte,'WideCharToMultiByte',\
  lstrlen,'lstrlen'

 import user32, \
  wsprintfA, 'wsprintfA'

end data

input_redirected db ':: input redirected',13,10,0
output_redirected db ':: output redirected',13,10,0
reading db ':: reading console input:',13,10,0
writing db ':: writing console output:',13,10,0
error_while_converting db 'Error while converting the string!',13,10,0
fmt db '%d characters or bytes',13,10,13,10,0

align 4

buffer rb BUFFER_SIZE
buffer_copy rb BUFFER_SIZE ; being safe: haven't tested if multibyte conversion
                           ; functions can handle overlapping or same buffers
buffer_printnum rb 128
charsrw rd 1
h_in rd 1
h_out rd 1
redir_in rb 1
redir_out rb 1
    


Attached the same source file for convenience.


Description: goodconsoleio.asm
Download
Filename: goodconsoleio.zip
Filesize: 1.81 KB
Downloaded: 511 Time(s)

Post 19 Feb 2017, 09:23
View user's profile Send private message Visit poster's website Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2024, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.