flat assembler
Message board for the users of flat assembler.
![]() Goto page Previous 1, 2, 3 Next |
Author |
|
vivik 24 Mar 2018, 06:50
Tried this, it compiled:
Code: #define UNICODE #define _UNICODE #include <windows.h> #include <tchar.h> #pragma comment(lib,"User32.lib") //void mainCRTStartup(void) int WinMainCRTStartup() { SetThreadAffinityMask(GetCurrentThread(),1); { LARGE_INTEGER t, f; QueryPerformanceCounter(&t); QueryPerformanceFrequency(&f); } return 30; } More ugly than a C++ way, but oh well. I actually want to have a "del" keyword, so that I can delete local variables without those code blocks. I can dream. Also, looks like I can't use (double) anywhere either. Not a big deal though, since I mostly work with float. Options I had to set: linker -> advanced -> entry point linker -> system -> subsysstem -> windows linker -> manifest file -> generate -> NO for debug: linker -> debugging -> generate debug info: YES c/c++ -> general -> debug information format: edit and continue 1>cl : Command line error D8016 : '/O2' and '/ZI' command-line options are incompatible c/c++ -> optimization -> optimization: disabled ...nope, need to view optimized assembly c/c++ -> general -> debug information format: program database |
|||
![]() |
|
vivik 25 Mar 2018, 16:50
Wrote a simple C-style program, it works without runtime, but it rearranges functions for some reason. I wanted main() to be the first function in my exe, but it actually goes last.
Either I should write in assembly, or I should make my own compiler. Here is the code: Code: #define STRICT #define WIN32_LEAN_AND_MEAN #define UNICODE #define _UNICODE #include <windows.h> #include <windowsx.h> #pragma warning( disable : 4133 ) //incompatible types - from 'const char *' to 'LPCWSTR' //VARS const int g_paranoid = 1; const int g_window_w = 320; const int g_window_h = 240; //used by: //int_to_wstring wchar_t g_tempzero[12]; const wchar_t* g_tempzero_endzero = &g_tempzero[10]; //[10] and [11] will always be 0, for wchar_t and for char. const char* g_tempzero_hexstart = &g_tempzero[1]; //10 - 1 - 8 //(char*)g_tempzero_endzero - 8; const HINSTANCE g_hInstance = 0x400000; //g_hInstance = GetModuleHandle(NULL); //g_hInstance = __ImageBase; const wchar_t g_appname[] = L"showjpg"; //FUNCAHEAD LRESULT CALLBACK WndProc( HWND hwnd, UINT Message, WPARAM wParam, LPARAM lParam ); void FUCK(); wchar_t* wcscpy_return_end(wchar_t* dst, wchar_t* src); char* strcpy_return_end(char* dst, char* src); wchar_t* int_to_wstring(int the_int); char* int_to_string(int the_int); char* int_to_hexstring(UINT32 the_int); int wstring_to_int(wchar_t* the_string); void main() { MSG msg; BOOL res; ZeroMemory( &msg, sizeof( msg ) ); //MessageBox(0, L"HAHA", L"HAHA", 0); { WNDCLASS WndClass; WndClass.style = CS_HREDRAW | CS_VREDRAW; WndClass.lpfnWndProc = WndProc; WndClass.cbClsExtra = 0; WndClass.cbWndExtra = 0; WndClass.hInstance = g_hInstance; WndClass.hIcon = NULL; WndClass.hCursor = LoadCursor(NULL, IDC_ARROW); WndClass.hbrBackground = (HBRUSH)(COLOR_WINDOW + 1); WndClass.lpszMenuName = NULL/*MAKEINTRESOURCE(IDR_MENU)*/; WndClass.lpszClassName = g_appname; res = RegisterClass (&WndClass); if (!res) FUCK(); } { HWND hwnd; RECT rc; // Calculate size of window based on desired client window size rc.left = 0; rc.top = 0; rc.right = g_window_w; rc.bottom = g_window_h; AdjustWindowRect( &rc, WS_OVERLAPPEDWINDOW, FALSE ); /*HWND */hwnd = CreateWindow(g_appname, g_appname, WS_OVERLAPPEDWINDOW, CW_USEDEFAULT, CW_USEDEFAULT, rc.right-rc.left, rc.bottom-rc.top, NULL, NULL, g_hInstance, NULL); if (hwnd == NULL) FUCK(); ShowWindow(hwnd, SW_SHOWDEFAULT); UpdateWindow(hwnd); } while( msg.message != WM_QUIT ) { if( PeekMessage( &msg, NULL, 0U, 0U, PM_REMOVE ) ) { //TranslateMessage( &msg ); DispatchMessage( &msg ); } else { Sleep(1); } } /*while (1) { while (PeekMessage(&Msg, g_hwndMain, 0, 0, PM_NOREMOVE)) { BOOL bGetResult = GetMessage(&Msg, NULL, 0, 0); TranslateMessage(&Msg); DispatchMessage(&Msg); if (bGetResult==0) g_bRunning = false; } if (g_bRunning) { CheckSurfaces(); HeartBeat(); } }*/ } LRESULT CALLBACK WndProc(HWND hwnd, UINT msg, WPARAM wParam, LPARAM lParam) { switch( msg ) { case WM_KEYDOWN: case WM_RBUTTONDOWN: case WM_CLOSE: { DestroyWindow(hwnd); return 0; } case WM_DESTROY: { PostQuitMessage( 0 ); return 0; } } return DefWindowProc(hwnd, msg, wParam, lParam); //return 0; } void FUCK() { wchar_t* numstring; int numstring_length; wchar_t* buf; wchar_t* buf2; wchar_t* buf3; wchar_t* buf4; wchar_t* buf5; DWORD error = GetLastError(); wchar_t* errorString; FormatMessageW( FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, error, MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), (LPWSTR)&errorString, 0, NULL ); /*wchar_t* */numstring = int_to_wstring(error); /*int */numstring_length = (wchar_t*)g_tempzero_endzero - numstring; /*wchar_t* */buf = (wchar_t*)LocalAlloc(LMEM_ZEROINIT, (lstrlenW((LPCWSTR)errorString) + numstring_length) * sizeof(TCHAR)); /*wchar_t* */buf2 = wcscpy_return_end(buf, L"FUCK | "); /*wchar_t* */buf3 = wcscpy_return_end(buf2, numstring); /*wchar_t* */buf4 = wcscpy_return_end(buf3, L" | "); /*wchar_t* */buf5 = wcscpy_return_end(buf4, errorString); //StringCchPrintf((LPTSTR)lpDisplayBuf, // LocalSize(lpDisplayBuf) / sizeof(TCHAR), // TEXT("FUCK: %d: %s"), // error, errorString); MessageBox(0, buf, L"!!!!", 0); LocalFree(buf); LocalFree(errorString); __debugbreak(); } wchar_t* wcscpy_return_end(wchar_t* dst, wchar_t* src) { while(1) { wchar_t temp = *src++; *dst++ = temp; if (! temp) return dst-1; } }; char* strcpy_return_end(char* dst, char* src) { while(1) { char temp = *src++; *dst++ = temp; if (! temp) return dst-1; } }; wchar_t* int_to_wstring(int the_int) { //if (the_int > 9999999999) int ostatok; wchar_t* cur = g_tempzero_endzero; cur--; while (the_int) { ostatok = the_int % 10; the_int = the_int / 10; *cur = L'0' + ostatok; cur--; } cur++; return cur; } char* int_to_string(int the_int) { //if (the_int > 9999999999) //LATER: add overflow check int ostatok; char* cur = (char*)g_tempzero_endzero; cur--; while (the_int) { ostatok = the_int % 10; the_int = the_int / 10; *cur = '0' + ostatok; cur--; } cur++; return cur; } char* int_to_hexstring(UINT32 the_int) { //if (the_int > 9999999999) int ostatok; char* cur = (char*)g_tempzero_endzero; cur--; while (the_int) { ostatok = the_int % 16; the_int = the_int / 16; if (ostatok < 10) { *cur = '0' + ostatok; } else { *cur = 'a' + ostatok - 10; } cur--; } while(cur != g_tempzero_hexstart) { *cur--=' '; } cur++; return cur; } int wstring_to_int(wchar_t* the_string) { wchar_t* cur = the_string; int the_int = 0; int x_num = 1; //find the zero while (*cur!=0) { cur++; } cur--; while (cur >= the_string) { the_int += (*cur - L'0')*x_num; x_num *= 10; cur--; } return the_int; } |
|||
![]() |
|
donn 25 Mar 2018, 20:49
Regarding: "I don't want to keep variable declarations at the beginning of a function"
I used to feel the same way and found separation of instructions and data somewhat arbitrary. I think this separation improves performance, however, and I now like being reminded of the fact. I can't find where I originally read this performance note, but AMD states Quote: "Avoid placing code and data together within a cache line, especially if the data becomes modified." |
|||
![]() |
|
vivik 26 Mar 2018, 05:37
>>I don't want to keep variable declarations at the beginning of a function
>I think this separation improves performance It doesn't, it just makes life for compilers easier. I haven't seen a compiler that does a good job at creating local variables. Usually they just increase esp once and access them like [ebp-4] [ebp-8], instead of actually using push pop instructions. (This needs checking though.) >Avoid placing code and data together within a cache line, especially if the data becomes modified. That doesn't apply here. C code and local variables are separated no matter what. C code is in the code section, and local variables are in the stack. They are always at least 0x1000 bytes apart. |
|||
![]() |
|
DimonSoft 26 Mar 2018, 06:50
vivik wrote: It doesn't, it just makes life for compilers easier. I haven't seen a compiler that does a good job at creating local variables. Usually they just increase esp once and access them like [ebp-4] [ebp-8], instead of actually using push pop instructions. (This needs checking though.) But add esp, XX is the effective way to do that. The only reason to do something more complex is when the total size of local variables exceeds page size, but even then push/pop is not necessarily the best way to go, and even if they’re chosen, they’re not performed thousands of times anyway. And creating/destroying local variables at the machine-code level is the bad job. Code: for (int i = 0; i < 1000000; i++) { if (condition) { int x, y; ... } else { int v, w; ... } } The effective way is to allocate the piece of stack large enough to hold all the local variables that can exist simultaneously. Simplified version—sum of all the local variable sizes. You don’t want to increment and decrement stack pointer 1000000 times, you push/pop series are even worse from both performance and code size points of view. Besides, one allocation per procedure makes it possible to use the same ebp+XX offsets throughout the procedure which might be useful for code compression later or even make it easier for CPU to perform some optimizations. |
|||
![]() |
|
DimonSoft 26 Mar 2018, 07:01
P.S.
With most modern C/C++ compilers the place one declares local variables seems to make little to no difference since the compilers are going to put the all in one place anyway. But… 1) A C++ compiler would be required to call constructors and destructors for the stack-allocated objects on each enter/exit to/from the block. I can’t see how this can be eliminated without the knowledge of the actual algorithm that a compiler is not really capable to capture. 2) Declaring variables once at the top of a procedure makes code more self-documented and makes a programmer pay more attention to whether his/her procedure is good from design point of view (say, SRP). This what makes one of the differences between Pascal/Delphi and C/C++ communities, IMHO: some of us like languages which insist on proper thinking and design, the others prefer the languages that let you nearly everything including shooting your own feet. Wise ones take the best from the two worlds: use practices insisted by Pascal/Delphi while taking advantage of the freedom C/C++ gives in the corner cases. |
|||
![]() |
|
vivik 26 Mar 2018, 12:58
So, the question we are talking about now is, choosing between code like this:
Code: sub esp, c mov [esp+4],eax mov [esp+8],ecx mov [esp+c],edx ... add esp, c and this: Code: push eax
push ecx
push edx
...
add esp, c In short, as I understand it, first is faster, but second is smaller. Second is smaller, because "push eax" takes only one byte to encode (0x50). First is faster, because on cpu, operations are often executed in parallel. Second "push" has to wait for the first "push" to complete, because they both read and write the same register, esp. (Edit: push after push isn't an issue in all intel processors since 2003 (year the Pentium-M was released), but accessing some value by esp register right after push is still an issue). Personally, I'll choose the smaller way for the most of the program, and faster way for places where I can notice the difference. Because I can measure program size without a problem, but I can't even notice the speed difference for the most part, even on my computer. The real problem is using registers properly, and using good calling conventions. This requires either a "whole program optimization" (which is quite slow to compile), or to specify a custom calling convention for each function manually (which you have a quite limited choice of, in c at least), or writing the whole program manually in assembly (which most people never bother to do, because it's super slow). I will try the whole program optimization and optimization for size, need to see how good this is. By the way, got reminded about "cpu instruction parallelization" here https://stackoverflow.com/questions/49485395/what-c-c-compiler-can-use-push-pop-instructions-for-creating-local-variables . Out of 4 people who commented, one (edit: 3.5) was actually helpful. Good ratio, huh. Kind of hard of getting this problem solved by asking a bunch of completely unrelated questions, need to introduce people to the problem again and again and again and again and again. Got the attention of one super knowledgeable guy eventually, really happy for that. He sort of retold this post though (with extra info and confirmation, unvaluable), and I stole this method from fasm sources. Last edited by vivik on 27 Mar 2018, 19:05; edited 3 times in total |
|||
![]() |
|
revolution 26 Mar 2018, 13:11
vivik wrote: Because I can measure program size without a problem, but I can't even notice the speed difference for the most part, even on my computer. |
|||
![]() |
|
DimonSoft 26 Mar 2018, 15:40
vivik wrote: So, the question we are talking about now is, choosing between code like this: Push is definitely the way to go for relatively small-typed local initialized variables. Sorry, if this was what you meant here: vivik wrote: It doesn't, it just makes life for compilers easier. I haven't seen a compiler that does a good job at creating local variables. Usually they just increase esp once and access them like [ebp-4] [ebp-8], instead of actually using push pop instructions. (This needs checking though.) But then I wonder where have you found a compiler which doesn’t use push for initialized local variables. Even Delphi compiler with optimizations off (which is known to generate code in a very straightforward manner) seems to use push in this rare case. |
|||
![]() |
|
vivik 26 Mar 2018, 17:09
Anybody knows how this option works in vs2005? The "Function Order (/ORDER:[file])", in "Configuration Properties -> Linker -> Optimization".
I've set it to "functionorder.txt", with this inside: Code: _main _WndProc The error is: Code: fatal error LNK1117: syntax error in option 'ORDER:functionorder.txt' I only found this page about it: https://msdn.microsoft.com/en-us/library/00kh39zz.aspx @DimonSoft I didn't realize there was an actual reason for not using push pop, thought it's just easier to generate code with esp and ebp that don't change. I'm still learning all this, right now. By the way, this is the reason the creation (and deletion) of variables mid function is a big deal for me, it will allow to use push pop more neatly, and will allow to reuse the stack space once its value no longer needed. Though, compilers already can detect the last use of a variable without the explicit deletion. And I'm not sure what code compilers choose for mid function variable creation. I don't know anything, I need to study the output. |
|||
![]() |
|
donn 26 Mar 2018, 21:14
Regarding:
Quote: think this separation improves performance I meant the underlying instructions, not C, but beneath. Thinking about where they go ultimately or with best performance is how I like to make sense of language constructs (C in this case) that seem semantically arbitrary at times. Focusing on performance makes things less frustrating at times (or more if you get obsessed with it). There's a lot of value and room for optimization with continuous memory locations and unit-stride memory (think you were working with Direct2D images), along with prefetching if possible. The function reordering technique seemed interesting, as well... |
|||
![]() |
|
rugxulo 27 Mar 2018, 04:10
vivik wrote:
https://en.wikipedia.org/wiki/Register_renaming vivik wrote:
FreePascal uses register calling convention by default (like Delphi). Its IDE has good Windows support and a built-in debugger. It also supports nested functions (although so do GCC, TCC, maybe others??). Just saying, if you're constantly fighting against your tools, try something else. Hey, DimonSoft agrees with me. ![]() |
|||
![]() |
|
rugxulo 27 Mar 2018, 04:22
vivik wrote:
Nested functions can heavily simplify your code. If you really want to reuse stack space more wisely, factor your code into smaller functions. (Or use a union??) There was some comment somewhere where even Walter Bright bragged about the usefulness of nested functions, but I can't find it. EDIT: found it! Walter Bright wrote:
|
|||
![]() |
|
vivik 27 Mar 2018, 06:38
@rugxulo
Sorry, can you please compile this program with free pascal (or anything else), for 32bit windows? I want to see if the result will fit in 4 kilobytes. This probably requires to add some flags, to cut out runtime. Code: void main() { MessageBoxW(0, L"hello", L"world", 0); } Also, I heard here https://en.wikipedia.org/wiki/X86_calling_conventions about safecall, that encapsulates com exceptions. I'm not sure that this is a good thing, because while playing with direct3d for a bit I haven't seen any exceptions. I don't know if it's possible for direct3d functions to raise an exception, so it's inconvenient if safecall will generate extra code to catch them. I probably should register on some free pascal forum for this... |
|||
![]() |
|
DimonSoft 27 Mar 2018, 15:00
vivik wrote: Also, I heard here https://en.wikipedia.org/wiki/X86_calling_conventions about safecall, that encapsulates com exceptions. I'm not sure that this is a good thing, because while playing with direct3d for a bit I haven't seen any exceptions. I don't know if it's possible for direct3d functions to raise an exception, so it's inconvenient if safecall will generate extra code to catch them. The whole idea of safecall is to avoid the case when a procedure is called by COM (i.e. is a callback) and throws an exception. COM requires that error codes are used instead of exceptions. This is required because of different ways exceptions are implemented in different languages while COM is intended to be language-agnostic. So, safecall basically means that a compiler inserts a large try…except clause which returns an error code corresponding to any exception thrown within the procedure. UPD. I’ve forgotten that the reverse conversion is also performed which basically goes like Code: hr := SomeSafecallProcedure(…); if Failed(hr) then raise … i.e. throws an exception if the procedure being called is safecall and the return value means failure. rugxulo wrote: FreePascal uses register calling convention by default (like Delphi). Its IDE has good Windows support and a built-in debugger. It also supports nested functions (although so do GCC, TCC, maybe others??). Just saying, if you're constantly fighting against your tools, try something else. Hey, DimonSoft agrees with me. Well, I wouldn’t say I’m a big fan of nested functions since it’s difficult to choose a consistent coding style for them (paddings, etc.). What I definitely agree is that fighting against tools is evil and that Pascal descendants are generally better for creating high-quality software (from the ISO/IEC 25010 point of view). |
|||
![]() |
|
vivik 27 Mar 2018, 20:17
Can't make the "main" be the first function in the exe. Tried a different linker for this "golink main.obj user32.dll kernel32.dll /entry _main", but no luck, I guess the order is changed in the object file itself...
|
|||
![]() |
|
vivik 02 Apr 2018, 18:42
Interesting, if I replace WinMain with something else, malloc breaks (program doesn't return from malloc, it just closes). I guess there is some initialization of it at the beginning, which tampering with entry point removes.
|
|||
![]() |
|
rugxulo 03 Apr 2018, 17:54
vivik wrote: I probably should register on some free pascal forum for this... Well, yes, please do register on some appropriate forum because I'm not a good reference since I don't understand or target native Windows. I just meant, in general, that FPC is very very good and should do what you want. vivik wrote: @rugxulo I don't really do Windows, so I'm a total noob regarding that. But a quick search online finds this: Code: Program Hello; uses windows; begin MessageBox(0,'Hello','Hello',0); end. I'm not 100% sure which switches are optimal, but "fpc -CX -XXs -O3 -Mdelphi hello.pas" gives me 31,232 bytes (PE/Win32, static, no third-party .DLLs needed). UPX 3.94 --ultra-brute halves that to 15,360 bytes. I know that's not quite 4 kb, but I don't know how else to improve that. Certainly there is a way, but I dunno what! But I did also find an interesting website (circa 2011) about slimming Delphi, which claims, "Now you have an application of approximately 1Kb written in Delphi". (AFAIK, there is a 32-bit freeware Delphi Starter version nowadays, but I'd blindly prefer FPC instead.) P.S. Also see FPC's wiki about Size Matters for more details. |
|||
![]() |
|
vivik 08 Apr 2018, 10:30
How can I import kernel32 functions by ordinal? This link https://msdn.microsoft.com/en-us/library/e7tsx612.aspx says something about .def files, where are they in vs2005? What should I change in project options?
|
|||
![]() |
|
Goto page Previous 1, 2, 3 Next < Last Thread | Next Thread > |
Forum Rules:
|
Copyright © 1999-2023, Tomasz Grysztar. Also on GitHub, YouTube.
Website powered by rwasa.