flat assembler
Message board for the users of flat assembler.

Index > Windows > Good console input/output on Win32

Thread Post new topic Reply to topic
Grom PE

Joined: 13 Mar 2008
Posts: 114
Location: i@grompe.org.ru
Grom PE
There's a lot of misinformation floating around the internet claiming that Windows console doesn't fully support Unicode, or broken implementations, or obscure C/C++ workarounds, so I decided to make a showcase for proper and pure WinAPI handling of Unicode in console.

; Good console input/output on Win32
; for flat assembler by Grom PE
; What it does:
; - reads unicode text from keyboard and outputs unicode text to console
; - if redirects are used, the redirected input or output is assumed to
;   be in the the current console codepage, as UTF-16 files and
;   stdin/stdout communication on Windows is not a standard or expected
;   thing. In this case, as chcp doesn't work with UTF-16, UTF-8 is the
;   only way to have full range unicode support. Beware that UTF-8
;   "codepage" will break every single program that's not specifically
;   made to handle this.
; Notes:
; - obviously. your console font should be set to unicode
; - to support unicode input/output independent from current locale,
;   need to use console-specific functions instead of file-specific.
; - but console functions cannot work with redirected input/output, so
;   need to detect the redirect and use file functions if so.
; - could use GetFileType to detect redirects (classic way) instead of 
;   GetConsoleScreenBufferInfo & GetConsoleMode, but then redirect
;   to/from NUL won't be detected.
; - it is assumed that you want redirected output and/or output in
;   current console codepage (chcp to change, 65001 for UTF-8 is also
;   supported)
; - cmd /u/c "echo [text]|program" still won't work with full unicode
;   because cmd thinks we won't be able to work with it and transmits
;   in current locale anyway. Only UTF-8 "codepage" can be used to
;   transmit full unicode via pipes.

format PE console

include 'win32a.inc'


  invoke GetStdHandle, STD_OUTPUT_HANDLE
  mov    [h_out], eax
  invoke GetConsoleScreenBufferInfo, eax, buffer ; ignore data received
  test   eax, eax
  jne    @f
  mov    ebx, output_redirected
  call   printcstr
  mov    [redir_out], 1

  invoke GetStdHandle, STD_INPUT_HANDLE
  mov    [h_in], eax
  invoke GetConsoleMode, eax, buffer ; ignore data received
  test   eax, eax
  jne    @f
  mov    ebx, input_redirected
  call   printcstr
  mov    [redir_in], 1
  mov    ebx, reading
  call   printcstr

  push   0
  push   charsrw
  push   BUFFER_SIZE
  push   buffer
  push   [h_in] ; exact same arguments for both ReadConsoleW and ReadFile
  cmp    [redir_in], 1
  je     .file_in
  invoke ReadConsoleW
  jmp    @f
  invoke ReadFile

  mov    esi, buffer
  mov    edi, buffer_copy
  mov    ecx, [charsrw]
  rep    movsb

  invoke GetConsoleCP
  invoke MultiByteToWideChar, eax, 0, buffer_copy, [charsrw], buffer, BUFFER_SIZE/2
  test   eax, eax
  jz     mb_error
  mov    [charsrw], eax

  mov    ebx, [charsrw]
  call   printnum
  mov    ebx, writing
  call   printcstr

  push   0
  push   charsrw
  push   [charsrw]
  push   buffer
  push   [h_out] ; same arguments for both WriteConsoleW and WriteFile
  cmp    [redir_out], 1
  je     .file_out
  invoke WriteConsoleW
  jmp    @f
  mov    esi, buffer
  mov    edi, buffer_copy
  mov    ecx, [charsrw]
  rep    movsw

  invoke GetConsoleOutputCP
  invoke WideCharToMultiByte, eax, 0, buffer_copy, [charsrw], buffer, BUFFER_SIZE, 0, 0
  test   eax, eax
  jz     mb_error
  mov    [charsrw], eax
  mov    [esp+8], eax ; fixes UTF-8 character count

  invoke WriteFile

  mov    ebx, [charsrw]
  call   printnum
  invoke ExitProcess, 0

  mov    ebx, error_while_converting
  call   printcstr
  invoke ExitProcess, 1

; ebx = number to output
  push  ebx
  push  fmt
  push  buffer_printnum
  call  [wsprintfA]
  add   esp, 12
  mov   ebx, buffer_printnum
  jmp   printcstr

; ebx = string to output
  push  0
  push  esp
  push  ebx
  call  [lstrlen]
  push  eax
  push  ebx
  push  [h_out]
  call  [WriteFile]

align 4

data import

 library kernel32,'kernel32.DLL',user32,'user32.dll'

 import kernel32,\

 import user32, \
  wsprintfA, 'wsprintfA'

end data

input_redirected db ':: input redirected',13,10,0
output_redirected db ':: output redirected',13,10,0
reading db ':: reading console input:',13,10,0
writing db ':: writing console output:',13,10,0
error_while_converting db 'Error while converting the string!',13,10,0
fmt db '%d characters or bytes',13,10,13,10,0

align 4

buffer rb BUFFER_SIZE
buffer_copy rb BUFFER_SIZE ; being safe: haven't tested if multibyte conversion
                           ; functions can handle overlapping or same buffers
buffer_printnum rb 128
charsrw rd 1
h_in rd 1
h_out rd 1
redir_in rb 1
redir_out rb 1

Attached the same source file for convenience.

Description: goodconsoleio.asm
Filename: goodconsoleio.zip
Filesize: 1.81 KB
Downloaded: 172 Time(s)

Post 19 Feb 2017, 09:23
View user's profile Send private message Visit poster's website Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  

< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum

Copyright © 1999-2020, Tomasz Grysztar.

Powered by rwasa.