Grom PE

Grom PE
There's a lot of misinformation floating around the internet claiming that Windows console doesn't fully support Unicode, or broken implementations, or obscure C/C++ workarounds, so I decided to make a showcase for proper and pure WinAPI handling of Unicode in console.

; Good console input/output on Win32
; for flat assembler by Grom PE
; What it does:
; - reads unicode text from keyboard and outputs unicode text to console
; - if redirects are used, the redirected input or output is assumed to
;   be in the the current console codepage, as UTF-16 files and
;   stdin/stdout communication on Windows is not a standard or expected
;   thing. In this case, as chcp doesn't work with UTF-16, UTF-8 is the
;   only way to have full range unicode support. Beware that UTF-8
;   "codepage" will break every single program that's not specifically
;   made to handle this.
; Notes:
; - obviously. your console font should be set to unicode
; - to support unicode input/output independent from current locale,
;   need to use console-specific functions instead of file-specific.
; - but console functions cannot work with redirected input/output, so
;   need to detect the redirect and use file functions if so.
; - could use GetFileType to detect redirects (classic way) instead of 
;   GetConsoleScreenBufferInfo & GetConsoleMode, but then redirect
;   to/from NUL won't be detected.
; - it is assumed that you want redirected output and/or output in
;   current console codepage (chcp to change, 65001 for UTF-8 is also
;   supported)
; - cmd /u/c "echo [text]|program" still won't work with full unicode
;   because cmd thinks we won't be able to work with it and transmits
;   in current locale anyway. Only UTF-8 "codepage" can be used to
;   transmit full unicode via pipes.

format PE console

include 'win32a.inc'


  invoke GetStdHandle, STD_OUTPUT_HANDLE
  mov    [h_out], eax
  invoke GetConsoleScreenBufferInfo, eax, buffer ; ignore data received
  test   eax, eax
  jne    @f
  mov    ebx, output_redirected
  call   printcstr
  mov    [redir_out], 1

  invoke GetStdHandle, STD_INPUT_HANDLE
  mov    [h_in], eax
  invoke GetConsoleMode, eax, buffer ; ignore data received
  test   eax, eax
  jne    @f
  mov    ebx, input_redirected
  call   printcstr
  mov    [redir_in], 1
  mov    ebx, reading
  call   printcstr

  push   0
  push   charsrw
  push   BUFFER_SIZE
  push   buffer
  push   [h_in] ; exact same arguments for both ReadConsoleW and ReadFile
  cmp    [redir_in], 1
  je     .file_in
  invoke ReadConsoleW
  jmp    @f
  invoke ReadFile

  mov    esi, buffer
  mov    edi, buffer_copy
  mov    ecx, [charsrw]
  rep    movsb

  invoke GetConsoleCP
  invoke MultiByteToWideChar, eax, 0, buffer_copy, [charsrw], buffer, BUFFER_SIZE/2
  test   eax, eax
  jz     mb_error
  mov    [charsrw], eax

  mov    ebx, [charsrw]
  call   printnum
  mov    ebx, writing
  call   printcstr

  push   0
  push   charsrw
  push   [charsrw]
  push   buffer
  push   [h_out] ; same arguments for both WriteConsoleW and WriteFile
  cmp    [redir_out], 1
  je     .file_out
  invoke WriteConsoleW
  jmp    @f
  mov    esi, buffer
  mov    edi, buffer_copy
  mov    ecx, [charsrw]
  rep    movsw

  invoke GetConsoleOutputCP
  invoke WideCharToMultiByte, eax, 0, buffer_copy, [charsrw], buffer, BUFFER_SIZE, 0, 0
  test   eax, eax
  jz     mb_error
  mov    [charsrw], eax
  mov    [esp+8], eax ; fixes UTF-8 character count

  invoke WriteFile

  mov    ebx, [charsrw]
  call   printnum
  invoke ExitProcess, 0

  mov    ebx, error_while_converting
  call   printcstr
  invoke ExitProcess, 1

; ebx = number to output
  push  ebx
  push  fmt
  push  buffer_printnum
  call  [wsprintfA]
  add   esp, 12
  mov   ebx, buffer_printnum
  jmp   printcstr

; ebx = string to output
  push  0
  push  esp
  push  ebx
  call  [lstrlen]
  push  eax
  push  ebx
  push  [h_out]
  call  [WriteFile]

align 4

data import

 library kernel32,'kernel32.DLL',user32,'user32.dll'

 import kernel32,\

 import user32, \
  wsprintfA, 'wsprintfA'

end data

input_redirected db ':: input redirected',13,10,0
output_redirected db ':: output redirected',13,10,0
reading db ':: reading console input:',13,10,0
writing db ':: writing console output:',13,10,0
error_while_converting db 'Error while converting the string!',13,10,0
fmt db '%d characters or bytes',13,10,13,10,0

align 4

buffer rb BUFFER_SIZE
buffer_copy rb BUFFER_SIZE ; being safe: haven't tested if multibyte conversion
                           ; functions can handle overlapping or same buffers
buffer_printnum rb 128
charsrw rd 1
h_in rd 1
h_out rd 1
redir_in rb 1
redir_out rb 1

Attached the same source file for convenience.

Description: goodconsoleio.asm
Filename: goodconsoleio.zip
Filesize: 1.81 KB
Downloaded: 172 Time(s)

Post 19 Feb 2017, 09:23
