Grom PE
Joined: 13 Mar 2008
Posts: 114
Location: i@grompe.org.ru
|
There's a lot of misinformation floating around the internet claiming that Windows console doesn't fully support Unicode, or broken implementations, or obscure C/C++ workarounds, so I decided to make a showcase for proper and pure WinAPI handling of Unicode in console.
; Good console input/output on Win32
; for flat assembler by Grom PE
; What it does:
; - reads unicode text from keyboard and outputs unicode text to console
; - if redirects are used, the redirected input or output is assumed to
; be in the the current console codepage, as UTF-16 files and
; stdin/stdout communication on Windows is not a standard or expected
; thing. In this case, as chcp doesn't work with UTF-16, UTF-8 is the
; only way to have full range unicode support. Beware that UTF-8
; "codepage" will break every single program that's not specifically
; made to handle this.
;
; Notes:
; - obviously. your console font should be set to unicode
; - to support unicode input/output independent from current locale,
; need to use console-specific functions instead of file-specific.
; - but console functions cannot work with redirected input/output, so
; need to detect the redirect and use file functions if so.
; - could use GetFileType to detect redirects (classic way) instead of
; GetConsoleScreenBufferInfo & GetConsoleMode, but then redirect
; to/from NUL won't be detected.
; - it is assumed that you want redirected output and/or output in
; current console codepage (chcp to change, 65001 for UTF-8 is also
; supported)
; - cmd /u/c "echo [text]|program" still won't work with full unicode
; because cmd thinks we won't be able to work with it and transmits
; in current locale anyway. Only UTF-8 "codepage" can be used to
; transmit full unicode via pipes.
format PE console
include 'win32a.inc'
BUFFER_SIZE = 1024
invoke GetStdHandle, STD_OUTPUT_HANDLE
mov [h_out], eax
invoke GetConsoleScreenBufferInfo, eax, buffer ; ignore data received
test eax, eax
jne @f
mov ebx, output_redirected
call printcstr
mov [redir_out], 1
@@:
invoke GetStdHandle, STD_INPUT_HANDLE
mov [h_in], eax
invoke GetConsoleMode, eax, buffer ; ignore data received
test eax, eax
jne @f
mov ebx, input_redirected
call printcstr
mov [redir_in], 1
@@:
mov ebx, reading
call printcstr
push 0
push charsrw
push BUFFER_SIZE
push buffer
push [h_in] ; exact same arguments for both ReadConsoleW and ReadFile
cmp [redir_in], 1
je .file_in
.console_in:
invoke ReadConsoleW
jmp @f
.file_in:
invoke ReadFile
mov esi, buffer
mov edi, buffer_copy
mov ecx, [charsrw]
rep movsb
invoke GetConsoleCP
invoke MultiByteToWideChar, eax, 0, buffer_copy, [charsrw], buffer, BUFFER_SIZE/2
test eax, eax
jz mb_error
mov [charsrw], eax
@@:
mov ebx, [charsrw]
call printnum
mov ebx, writing
call printcstr
push 0
push charsrw
push [charsrw]
push buffer
push [h_out] ; same arguments for both WriteConsoleW and WriteFile
cmp [redir_out], 1
je .file_out
.console_out:
invoke WriteConsoleW
jmp @f
.file_out:
mov esi, buffer
mov edi, buffer_copy
mov ecx, [charsrw]
rep movsw
invoke GetConsoleOutputCP
invoke WideCharToMultiByte, eax, 0, buffer_copy, [charsrw], buffer, BUFFER_SIZE, 0, 0
test eax, eax
jz mb_error
mov [charsrw], eax
mov [esp+8], eax ; fixes UTF-8 character count
invoke WriteFile
@@:
mov ebx, [charsrw]
call printnum
invoke ExitProcess, 0
mb_error:
mov ebx, error_while_converting
call printcstr
invoke ExitProcess, 1
; ebx = number to output
printnum:
push ebx
push fmt
push buffer_printnum
call [wsprintfA]
add esp, 12
mov ebx, buffer_printnum
jmp printcstr
; ebx = string to output
printcstr:
push 0
push esp
push ebx
call [lstrlen]
push eax
push ebx
push [h_out]
call [WriteFile]
ret
align 4
data import
library kernel32,'kernel32.DLL',user32,'user32.dll'
import kernel32,\
ExitProcess,'ExitProcess',\
GetStdHandle,'GetStdHandle',\
GetConsoleScreenBufferInfo,'GetConsoleScreenBufferInfo',\
GetConsoleMode,'GetConsoleMode',\
GetConsoleCP,'GetConsoleCP',\
GetConsoleOutputCP,'GetConsoleOutputCP',\
WriteFile,'WriteFile',\
ReadFile,'ReadFile',\
WriteConsoleW,'WriteConsoleW',\
ReadConsoleW,'ReadConsoleW',\
MultiByteToWideChar,'MultiByteToWideChar',\
WideCharToMultiByte,'WideCharToMultiByte',\
lstrlen,'lstrlen'
import user32, \
wsprintfA, 'wsprintfA'
end data
input_redirected db ':: input redirected',13,10,0
output_redirected db ':: output redirected',13,10,0
reading db ':: reading console input:',13,10,0
writing db ':: writing console output:',13,10,0
error_while_converting db 'Error while converting the string!',13,10,0
fmt db '%d characters or bytes',13,10,13,10,0
align 4
buffer rb BUFFER_SIZE
buffer_copy rb BUFFER_SIZE ; being safe: haven't tested if multibyte conversion
; functions can handle overlapping or same buffers
buffer_printnum rb 128
charsrw rd 1
h_in rd 1
h_out rd 1
redir_in rb 1
redir_out rb 1
Attached the same source file for convenience.
Description: |
goodconsoleio.asm |
 Download |
Filename: |
goodconsoleio.zip |
Filesize: |
1.81 KB |
Downloaded: |
603 Time(s) |
|
|