Usualy HLL wraps our usefull source code in HLL stuff, that growing from version to version.
This topic will always contain 6 posts:_DllMainCRTStartup and its callee tree, DllCanUnloadNow, DllGetClassObject, DllRegisterServer, DllUnregisterServer with theirs subcalls tree. For 6th post left definition of used structures.
You can see how same tasks realized in x86 & x64.
Note 1:Explored binaries has specific: projects in VC++6.0 both x86 & x64 are with disabled thread library calls, project in VC++7.0 only for x86 and thread library calls enabled in it.
Note 2:attached asm sources not identical to that VC++ creates, but they has exactly functionality.
Note 3: my first 6 posts will contain inaccuracyes and assumptions however they will be continously edited.
_DllMainCRTStartup - it is included in every VC++ DLL with invariable realization (invariable no matter VC++ version, invariable no matter payload code specific (such as disablingThreadLibraryCalls, or realization DLL as simple DLL, or as COM server), completely invariable)
BOOL __stdcall _DllMainCRTStartup(HANDLE hinstDLL, DWORD fdwReason, VOID *lpReserved)
x86 VC++6.0, 7.0: DllEntryPoint:
.hinstDLL = _stackarg(0) ; where "_stackarg equ esp+ptrsize+ptrsize*" and ptrsize for x86 equal to 4
.fdwReason = _stackarg(1)
.lpReserved = _stackarg(2)
mov edi, edi ; x86 function start signature
push ebx
push esi
push edi
mov ebx, [.hinstDLL+3*_push] ; where "_push equ ptrsize" and ptrsize for x86 equal to 4
mov esi, [.fdwReason+3*_push]
mov edi, [.lpReserved+3*_push]
push edi
push esi
push ebx
mov edi, [rawDllMain]
neg esi ; with negating it is simple to separate attaches & detaches, process & thread in 1 test operation
test esi, 2
jnz .attachOnly
jnp .common
mov eax, [skip_DLL_PROCESS_DETACH]
test eax, eax
jnz .common
add esp, 12
jmp .exit
test edi, edi
jz .rawskip1
call edi
test eax, eax
jz .exit
sub esp, 12
call _CRT_INIT
test eax, eax
jz .exit
sub esp, 12
call DllMain
mov ebx, eax
test esi, 2
jz .dettachOnly
jp .exit
test eax, eax
jz .exit
mov [_stackarg(1) +_beforesettingparameters(3) +_beforecall], eax ; where "_beforesettingparameters equ (-ptrsize)*", "_beforecall equ -ptrsize" and ptrsize for x86 equal to 4
sub esp, 12
call _CRT_INIT
and eax, ebx
jz .exit
test esi, 2
jnz .exit
test edi, edi
jz .exit
sub esp, 12
call edi
pop edi
pop esi
pop ebx
ret $0C
in every x64 fastcall proc we have a room for storing 5 registers
4 of them can be stored in stackshadow that caller create according to calling convention
and 1 register stored as callee stack alignment
BOOL __fastcall _DllMainCRTStartup(HANDLE hinstDLL, DWORD fdwReason, VOID *lpReserved)
x64 VC++6.0: DllEntryPoint:
add rsp, $28 ;return stack to state before placing stack shadow (that always 4*8=$20 bytes size) and placing 8 byte retaddres
push rbx ;use shadow for storing register
push rbp ;use shadow for storing register
push rsi ;use shadow for storing register
push r10 ;use shadow for storing register
sub rsp, 8 ;skip retaddr
push rdi ;use alignment of stack for storing register, instead of useless analogue "and rsp,not $F"
sub rsp, $20 ;make minimal room 4*8 for first 4 param shadow in stack for subcalls
mov rbx, rdx
mov rbp, rcx
mov rsi, r8
mov r10, [rawDllMain]
push rbp
neg rbp
test rbp, 2
pop rbp
jnz .attachOnly
jnp .common
mov rax, [skip_DLL_PROCESS_DETACH]
test rax, rax
jnz .common
jmp .exit
test r10, r10
jz .rawskip1
call r10
test eax, eax
jz .exit
mov rdx, rbx
mov rcx, rbp
mov r8, rsi
call _CRT_INIT
test eax, eax
jz .exit
mov rdx, rbx
mov rcx, rbp
mov r8, rsi
call DllMain
mov edi, eax
push rbp
neg rbp
test rbp, 2
pop rbp
jz .dettachOnly
jp .exit
test eax, eax
jz .exit
mov ebp, eax
mov rdx, rbx
mov rcx, rbp
mov r8, rsi
call _CRT_INIT
and eax, edi
jz .exit
push rbp
neg rbp
test rbp, 2
pop rbp
jnz .exit
test r10, r10
jz .exit
mov rdx, rbx
mov rcx, rbp
mov r8, rsi
call r10
add rsp, $20
pop rdi
add rsp, 8
pop r10
pop rsi
pop rbp
pop rbx
sub rsp, $28
subcall _CRT_INIT of _DllMainCRTStartup
BOOL __stdcall _CRT_INIT(HANDLE hinstDLL, DWORD fdwReason, VOID *lpReserved)
x86 VC++6.0: _CRT_INIT:
.hinstDLL = _stackarg(0) ; where "_stackarg equ esp+ptrsize+ptrsize*" and ptrsize for x86 equal to 4
.fdwReason = _stackarg(1)
.lpReserved = _stackarg(2)
mov edi, edi ; x86 function start signature
cmp [.fdwReason], 1
jb .ProcDetach
ja .ThreadCases
mov ecx, 32*sizeof.ptr
call _malloc_crt
mov [___onexitbegin], eax
mov [___onexitend], eax
test eax, eax
jz .exit
mov [eax], 0
;ccall msvcrt.dll.__initterm (___xc_a,___xc_z)
;description: call list of functions from ___xc_a to ___xc_z
;body: list has only one item
call ___security_init_cookie
;end body
inc [crt_init_done]
jmp .retTrue
sub [crt_init_done],1
js .retFalse
mov eax, [___onexitbegin]
test eax, eax
jz .retTrue
sub [___onexitend], sizeof.ptr
mov ecx, [___onexitend]
cmp ecx, eax
jb .Free
mov eax, [ecx]
test eax, eax
jz .loop___onexit
call eax
mov eax, [___onexitbegin]
jmp .loop___onexit
push eax
call msvcrt.dll.__free
pop ecx
mov [___onexitbegin], 0
xor eax, eax
inc eax
jmp .exit
xor eax, eax
ret $0C
BOOL __fastcall _CRT_INIT(HANDLE hinstDLL, DWORD fdwReason, VOID *lpReserved)
x64 VC++6.0: _CRT_INIT:
sub rsp, $28
cmp rdx, 1
jb .ProcDetach
ja .ThreadCases
mov ecx, 32*sizeof.ptr
call _malloc_crt
mov [___onexitbegin], rax
mov [___onexitend], rax
test rax, rax
jz .exit
mov [rax], 0
;fastcall msvcrt.dll.__initterm (___xc_a,___xc_z)
;description: call list of functions from ___xc_a to ___xc_z
;body: list has only one item
call ___security_init_cookie
;end body
inc [crt_init_done]
jmp .retTrue
sub [crt_init_done],1
js .retFalse
mov rax, [___onexitbegin]
test rax, rax
jz .retTrue
sub [___onexitend], sizeof.ptr
mov rcx, [___onexitend]
cmp rcx, rax
jb .Free
mov rax, [rcx]
test rax, rax
jz .loop___onexit
call rax
mov rax, [___onexitbegin]
jmp .loop___onexit
mov rcx, rax
call msvcrt.dll.__free
mov [___onexitbegin], 0
xor eax, eax
inc eax
jmp .exit
xor eax, eax
add rsp, $28
As we can see _CRT_INIT for variant with disabled thread library calls makes only one usefull action - calls ___security_init_cookie.
Of cource it initialize table of up to 32 procedure entries, that called one by one on _CRT_INIT(...,dllProcessDetach,...), after all such calls made - that table freed. But this table initialy free and looks like it never filled anywhere in code.
BOOL __stdcall _CRT_INIT(HANDLE hinstDLL, DWORD fdwReason, VOID *lpReserved)
x86 VC++7.0: _CRT_INIT:
.hinstDLL = _stackarg(0) ; where "_stackarg equ esp+ptrsize+ptrsize*" and ptrsize for x86 equal to 4
.fdwReason = _stackarg(1)
.lpReserved = _stackarg(2)
mov edi, edi ; x86 function start signature
cmp [.fdwReason], 1
jb .ProcDetach
ja .ThreadCases
push OSverINFOa ;in original OSverINFOa located in stack, and its border tested with alloca_probe
call [GetVersionExA] ; require win2000+
test eax, eax
jz .retFalse
mov eax,[OSverINFOa.dwPlatformId]
jz .skip_BuildNumber_patch_for_nonNT
or [OSverINFOa.dwBuildNumber], $8000
call __heap_init
pop ecx
test eax, eax
jz .exit
call multythreading_init
test eax, eax
jz .heap_terminate
call sub_10008198
call [GetCommandLineA]
call ___crtGetEnvironmentStringsA
mov [lpEnvStringsA], eax
call __ioinit
test eax, eax
jl .multythreading_terminate
call sub_10007FD4
test eax, eax
jl .io_terminate
call __setenvp
test eax, eax
jl .io_terminate
call sub_100078CE
test eax, eax
jnz .io_terminate
inc [crt_init_done]
jmp .retTrue
call __ioterm
call __mtterm
call __heap_term
jmp .retFalse
sub [crt_init_done],1
js .retFalse
cmp [skip__cexit],0
jnz .after__cexit
call __cexit
call __ioterm
call __mtterm
call __heap_term
jmp .retTrue
test [.fdwReason], 1
jnz .ThreadDetach
ccall sub_10007A9C, 1, $88
test eax, eax
jz .exit
push eax
push [dwTlsIndex]
call [TlsSetValue]
sub esp,4 ; place 1st param same as in previous
test eax, eax
jz .Free
call __initptd
;pop ecx - make it later
call [GetCurrentThreadId]
pop ecx
or [ecx+4],-1
mov [ecx+0],eax
jmp .retTrue
call msvcrt.dll.__free
pop ecx
xor eax, eax
jmp .exit
push 0
call sub_10007657
pop ecx
xor eax, eax
inc eax
ret $0C
subcall DllMain of _DllMainCRTStartup
BOOL __stdcall DllMain(HANDLE hinstDLL, DWORD fdwReason, VOID *lpReserved)
x86 VC++6.0: DllMain:
.hinstDLL = _stackarg(0) ; where "_stackarg equ esp+ptrsize+ptrsize*" and ptrsize for x86 equal to 4
.fdwReason = _stackarg(1)
.lpReserved = _stackarg(2)
mov edi, edi ; x86 function start signature
cmp [.fdwReason], 1
jb .ProcDetach
ja .retTrue
push [.hinstDLL]
push ATLObjMap
mov ecx, CLayerUIModule
push ecx
;mov [lpCLayerUIModule], ecx ; such tasks for design time in assembly but not for real time code
mov [ecx+ATL_BASE_MODULE30.cbSize], sizeof.ATL_BASE_MODULE30
mov [ecx+ATL_BASE_MODULE30.pguidVer], GUID_ATLVer30
mov [ecx+ATL_BASE_MODULE30.dwAtlBuildVer], ver3.0
call [AtlModuleInit]
;test eax, eax
;js .skip_ATLModule_ver_3_1_specific
push [.hinstDLL]
call SHFusionInitializeFromModuleID
sub esp,8 ; keep .hinstDLL saved in stack from trashing by calling of LinkWindow_RegisterClass
call LinkWindow_RegisterClass
call [DisableThreadLibraryCalls]
pop eax
pop eax
jmp .retTrue
call SHFusionUninitialize
push CLayerUIModule
call [AtlModuleTerm]
xor eax, eax
inc eax
ret $0C
BOOL __fastcall DllMain(HANDLE hinstDLL, DWORD fdwReason, VOID *lpReserved)
x64 VC++6.0: DllMain:
push rbx
sub rsp, $20
mov rbx, rcx
cmp rdx, 1
jb .ProcDetach
ja .retTrue
mov r8, rbx
mov rdx, ATLObjMap
mov rcx, CLayerUIModule
;mov [lpCLayerUIModule], rcx ; such tasks for design time in assembly but not for real time code
mov [rcx+ATL_BASE_MODULE31.cbSize], sizeof.ATL_BASE_MODULE31
mov [rcx+ATL_BASE_MODULE31.pguidVer], GUID_ATLVer30
mov [rcx+ATL_BASE_MODULE31.dwAtlBuildVer], ver3.1
call [ATLModuleInit]
test eax, eax
js .skip_ATLModule_ver_3_1_specific
mov [rcx+ATL_BASE_MODULE31.field3_1], 1
mov rcx, rbx
call SHFusionInitializeFromModuleID
call LinkWindow_RegisterClass
mov rcx, rbx
call [DisableThreadLibraryCalls]
jmp .retTrue
call SHFusionUninitialize
lea rcx, [CLayerUIModule]
call [AtlModuleTerm]
xor eax, eax
inc eax
add rsp, $20
pop rbx
BOOL __stdcall DllMain(HANDLE hinstDLL, DWORD fdwReason, VOID *lpReserved)
x86 VC++7.0: DllMain:
.hinstDLL = _stackarg(0) ; where "_stackarg equ esp+ptrsize+ptrsize*" and ptrsize for x86 equal to 4
.fdwReason = _stackarg(1)
.lpReserved = _stackarg(2)
mov edi, edi ; x86 function start signature
xor eax, eax
inc eax
ret $0C
In case of VC++7.0 all initialization happened in CRT_INIT. Nothing left to DllMain.
AtlModuleInit exported from ATL.DLL
HRESULT __stdcall AtlModuleInit(struct _ATL_MODULE_30 *ATL__CComModule, struct _ATL_OBJMAP_ENTRY *ATLObjMap, HANDLE hinstDLL)
x86 VC++6.0: AtlModuleInit:
.ATL__CComModule = _stackarg(0) ; where "_stackarg equ esp+ptrsize+ptrsize*" and ptrsize for x86 equal to 4
.ATLObjMap = _stackarg(1)
.hinstDLL = _stackarg(2)
virtual at ebp-$18
end virtual
push $0C
push offset _SCOPETABLE_AtlModuleInit
call _SEH_prolog
mov esi, [.ATL__CComModule]
test esi, esi
jz AtlModuleInit.retE_INVALIDARG
mov eax, [esi+_ATL_MODULE_30.cbSize]
cmp eax,$64
jz AtlModuleInit.sizeOK
cmp eax,$84
jz AtlModuleInit.sizeOK
jmp AtlModuleInit.Epilog
mov eax, [.ATLObjMap]
mov [esi+_ATL_MODULE_30.m_pObjMap], eax
mov eax, [.hinstDLL]
mov [esi+_ATL_MODULE_30.m_hInst], eax
mov [esi+_ATL_MODULE_30.m_hInstResource], eax
mov [esi+_ATL_MODULE_30.m_hInstTypeLib], eax
xor ebx,ebx
mov [esi+_ATL_MODULE_30.m_nLockCnt], ebx
mov [esi+_ATL_MODULE_30.m_hHeap], ebx
mov [.ms_exc.registration.TryLevel], ebx
lea eax, [esi+_ATL_MODULE_30.m_csTypeInfoHolder]
push eax
call [InitializeCriticalSection]
or [.ms_exc.registration.TryLevel], -1
mov [.ms_exc.registration.TryLevel], 1
lea eax, [esi+_ATL_MODULE_30.m_csWindowCreate]
push eax
call [InitializeCriticalSection]
or [.ms_exc.registration.TryLevel], -1
mov [.ms_exc.registration.TryLevel], 2
lea eax, [esi+_ATL_MODULE_30.m_csObjMap]
push eax
call [InitializeCriticalSection]
or [.ms_exc.registration.TryLevel], -1
cmp [esi+_ATL_MODULE_30.cbSize], $64
je AtlModuleInit.retHRESULT_S_OK
mov [esi+_ATL_MODULE_30.m_pCreateWndList], ebx
mov [esi+_ATL_MODULE_30.m_bDestroyHeap], 1
mov [esi+_ATL_MODULE_30.m_dwHeaps], ebx
mov [esi+_ATL_MODULE_30.m_phHeaps], ebx
mov [esi+_ATL_MODULE_30.m_nHeap], ebx
mov [esi+_ATL_MODULE_30.m_pTermFuncs], ebx
mov edi, [esi+_ATL_MODULE_30.m_pObjMap]
test edi, edi
jz AtlModuleInit.retHRESULT_S_OK
mov eax, [edi+_ATL_OBJMAP_ENTRY.pclsid]
test eax, eax
jz AtlModuleInit.Epilog
push TRUE
call [edi+_ATL_OBJMAP_ENTRY.RevokeClassObject]
push edi
push esi
call _NextObjectMapEntry;_NextObjectMapEntry(_ATL_MODULE_30 *,_ATL_OBJMAP_ENTRY *)
mov edi, eax
jmp AtlModuleInit.ForEachObjEntry
xor eax, eax
call __SEH_epilog
ret $0C
AtlModuleInit SEH part _SCOPETABLE_AtlModuleInit:
._0.EnclosingLevel dd -1
._0.FilterFunc dd AtlModuleInit.ExceptionFilter
._0.HandlerFunc dd AtlModuleInit.Level0Handler
._1.EnclosingLevel dd -1
._1.FilterFunc dd AtlModuleInit.ExceptionFilter
._1.HandlerFunc dd AtlModuleInit.Level1Handler
._2.EnclosingLevel dd -1
._2.FilterFunc dd AtlModuleInit.ExceptionFilter
._2.HandlerFunc dd AtlModuleInit.Level2Handler
dd 3 dup 0
virtual at ebp-$18
end virtual
mov eax, [.ms_exc.exc_ptr]
mov eax, [eax+_EXCEPTION_POINTERS.ExceptionRecord]
mov eax, [eax+EXCEPTION_RECORD.ExceptionCode]
xor ecx, ecx
setz cl
mov eax, ecx
virtual at ebp-$18
end virtual
mov esp, [.ms_exc.old_esp]
mov esi, [AtlModuleInit.ATL__CComModule]
lea eax, [esi+_ATL_MODULE_30.m_csWindowCreate]
push eax
call [DeleteCriticalSection]
lea eax, [esi+_ATL_MODULE_30.m_csTypeInfoHolder]
push eax
call [DeleteCriticalSection]
inDWORDS equ /4
mov ecx, 3*sizeof.RTL_CRITICAL_SECTION inDWORDS;
xor eax, eax
lea edi, [esi+_ATL_MODULE_30.m_csTypeInfoHolder]
rep stosd
or [.ms_exc.registration.TryLevel], -1
jmp AtlModuleInit.Epilog
virtual at ebp-$18
end virtual
mov esp, [.ms_exc.old_esp]
mov esi, [AtlModuleInit.ATL__CComModule]
lea eax, [esi+_ATL_MODULE_30.m_csTypeInfoHolder]
push eax
call [DeleteCriticalSection]
inDWORDS equ /4
mov ecx, 2*sizeof.RTL_CRITICAL_SECTION inDWORDS;
xor eax, eax
lea edi, [esi+_ATL_MODULE_30.m_csTypeInfoHolder]
rep stosd
or [.ms_exc.registration.TryLevel], -1
jmp AtlModuleInit.Epilog
virtual at ebp-$18
end virtual
mov esp, [.ms_exc.old_esp]
mov esi, [AtlModuleInit.ATL__CComModule]
inDWORDS equ /4
mov ecx, 1*sizeof.RTL_CRITICAL_SECTION inDWORDS;
xor eax, eax
lea edi, [esi+_ATL_MODULE_30.m_csTypeInfoHolder]
rep stosd
or [.ms_exc.registration.TryLevel], -1
jmp AtlModuleInit.Epilog
HRESULT __fastcall AtlModuleInit(struct _ATL_MODULE_30 *ATL__CComModule, struct _ATL_OBJMAP_ENTRY *ATLObjMap, HANDLE hinstDLL)
x64 VC++6.0: AtlModuleInit:
.ATL__CComModule.shadow = _stackarg(0) ; where "_stackarg equ esp+ptrsize+ptrsize*" and ptrsize for x64 equal to 8
.ATLObjMap.shadow = _stackarg(1)
.hinstDLL.shadow = _stackarg(2)
add rsp, $28 ;return stack to state before placing stack shadow (that always 4*8=$20 bytes size) and placing 8 byte retaddres
push rbx ;use shadow for storing register
push rdi ;use shadow for storing register
push rsi ;use shadow for storing register
push rcx ;use shadow for storing register
sub rsp, $30 ;skip retaddr & alignment & make minimal room 4*8 for first 4 param shadow in stack for subcalls
test rcx, rcx; ? ATL__CComModule
jz AtlModuleInit.retE_INVALIDARG
mov eax, [rcx+_ATL_MODULE_30.cbSize]
cmp eax,$B0
jz AtlModuleInit.sizeOK
cmp eax,$F0
jz AtlModuleInit.sizeOK
cmp eax,$F8
jz AtlModuleInit.sizeOK
jmp AtlModuleInit.Epilog
mov rbx, rcx
mov [rbx+_ATL_MODULE_30.m_pObjMap], rdx ; <ATLObjMap
mov [rbx+_ATL_MODULE_30.m_hInst], r8 ; <hinstDLL
mov [rbx+_ATL_MODULE_30.m_hInstResource], r8 ; <hinstDLL
mov [rbx+_ATL_MODULE_30.m_hInstTypeLib], r8 ; <hinstDLL
xor eax,eax
mov [rbx+_ATL_MODULE_30.m_nLockCnt], eax
mov [rbx+_ATL_MODULE_30.m_hHeap], rax
lea rcx, [rbx+_ATL_MODULE_30.m_csTypeInfoHolder]
call [InitializeCriticalSection]
jnExc equ jmp ; patched by 2 byte nop, or ignored in case of exception
jnExc AtlModuleInit.noException_0
mov rdi, [.ATL__CComModule.shadow+$28]
add rdi, _ATL_MODULE_30.m_csTypeInfoHolder
xor eax,eax
inQWORDS equ /8
mov ecx, 1*sizeof.RTL_CRITICAL_SECTION inQWORDS;
rep stosd
jmp AtlModuleInit.Epilog
lea rcx, [rbx+_ATL_MODULE_30.m_csWindowCreate]
call [InitializeCriticalSection]
jnExc AtlModuleInit.noException_1
mov rdi, [.ATL__CComModule.shadow+$28]
add rdi, _ATL_MODULE_30.m_csTypeInfoHolder
mov rcx, rdi
call [DeleteCriticalSection]
xor eax,eax
inQWORDS equ /8
mov ecx, 2*sizeof.RTL_CRITICAL_SECTION inQWORDS;
rep stosd
jmp AtlModuleInit.Epilog
lea rcx, [rbx+_ATL_MODULE_30.m_csObjMap]
call [InitializeCriticalSection]
jnExc AtlModuleInit.noException_2
mov rdi, [.ATL__CComModule.shadow+$28]
mov rcx, rdi
add rcx, _ATL_MODULE_30.m_csWindowCreate
call [DeleteCriticalSection]
add rdi, _ATL_MODULE_30.m_csTypeInfoHolder
mov rcx, rdi
call [DeleteCriticalSection]
xor eax,eax
inQWORDS equ /8
mov ecx, 3*sizeof.RTL_CRITICAL_SECTION inQWORDS;
rep stosd
jmp AtlModuleInit.Epilog
cmp [rbx+_ATL_MODULE_30.cbSize], $B0
je AtlModuleInit.retHRESULT_S_OK
xor eax, eax
mov [rbx+_ATL_MODULE_30.m_pCreateWndList], rax
mov [rbx+_ATL_MODULE_30.m_bDestroyHeap], 1
mov [rbx+_ATL_MODULE_30.m_dwHeaps], rax
mov [rbx+_ATL_MODULE_30.m_phHeaps], rax
mov [rbx+_ATL_MODULE_30.m_nHeap], rax
mov [rbx+_ATL_MODULE_30.m_pTermFuncs], rax
mov rdi, [rbx+_ATL_MODULE_30.m_pObjMap]
test rdi, rdi
jz AtlModuleInit.retHRESULT_S_OK
mov rax, [rdi+_ATL_OBJMAP_ENTRY.pclsid]
test rax, rax
jz AtlModuleInit.Epilog
mov cl, TRUE
call [rdi+_ATL_OBJMAP_ENTRY.RevokeClassObject]
add rdi, $48
cmp [rdi],0
jnz AtlModuleInit.ForEachObjEntry
cmp [rbx+_ATL_MODULE_30.cbSize], $F8
jb AtlModuleInit.retHRESULT_S_OK
mov [rbx+sizeof._ATL_MODULE_30], 1
xor rax, rax
add rsp, $30
pop rcx ;use shadow for storing register
pop rsi ;use shadow for storing register
pop rdi ;use shadow for storing register
pop rbx
sub rsp, $28
We can summarize that AtlModuleInit does nothing useful over atl module initialization except call of RevokeClassObject(TRUE) (RevokeClassObject body contains only "ret 4"). And all atl module has nothing usefull except 3 initialized critical sections.
