flat assembler
Message board for the users of flat assembler.
 Home   FAQ   Search   Register 
 Profile   Log in to check your private messages   Log in 
flat assembler > High Level Languages > what happened to fibonacci in gcc? huge code produced...

Author
Thread Post new topic Reply to topic
tthsqe



Joined: 20 May 2009
Posts: 653
what happened to fibonacci in gcc? huge code produced...
c code:

Code:
int f(int n) {
  if (n<2)
    return n;
  else
    return f(n-1)+f(n-2);
}



compiled by g++ 4.8.1 -O3 option:

Code:
f(int):
        push    r15
        mov     eaxedi
        push    r14
        push    r13
        push    r12
        push    rbp
        push    rbx
        sub     rsp168
        cmp     edi1
        mov     DWORD PTR [rsp+108], edi
        jle     .L29
        lea     edx, [rdi-3]
        mov     DWORD PTR [rsp+44], 0
        mov     DWORD PTR [rsp+68], edx
.L28:
        lea     edx, [rax-1]
        cmp     edx1
        jle     .L30
        mov     esiDWORD PTR [rsp+68]
        lea     edx, [rax-2]
        sub     eax4
        mov     DWORD PTR [rsp+92], eax
        mov     DWORD PTR [rsp+40], 0
        mov     ecxedx
        mov     DWORD PTR [rsp+112], edx
        shr     esi
        mov     DWORD PTR [rsp+72], ecx
        lea     edx, [rsi+rsi]
        mov     DWORD PTR [rsp+120], esi
        mov     esieax
        sub     esiedx
        mov     DWORD PTR [rsp+116], esi
.L26:
        mov     edxDWORD PTR [rsp+72]
        cmp     edx1
        mov     eaxedx
        jle     .L31
        sub     eax3
        mov     DWORD PTR [rsp+52], 0
        mov     esieax
        mov     DWORD PTR [rsp+96], eax
        mov     eaxDWORD PTR [rsp+92]
        shr     eax
        mov     DWORD PTR [rsp+128], eax
        add     eaxeax
        sub     esieax
        mov     eaxedx
        sub     eax1
        mov     DWORD PTR [rsp+124], esi
        mov     DWORD PTR [rsp+76], eax
.L24:
        mov     esiDWORD PTR [rsp+76]
        cmp     esi1
        mov     eaxesi
        jle     .L32
        sub     eax3
        mov     DWORD PTR [rsp+56], 0
        mov     edxeax
        mov     DWORD PTR [rsp+100], eax
        mov     eaxDWORD PTR [rsp+96]
        shr     eax
        mov     DWORD PTR [rsp+136], eax
        add     eaxeax
        sub     edxeax
        mov     eaxesi
        sub     eax1
        mov     DWORD PTR [rsp+132], edx
        mov     DWORD PTR [rsp+80], eax
.L22:
        mov     edxDWORD PTR [rsp+80]
        cmp     edx1
        mov     eaxedx
        jle     .L33
        sub     eax3
        mov     DWORD PTR [rsp+48], 0
        mov     esieax
        mov     DWORD PTR [rsp+104], eax
        mov     eaxDWORD PTR [rsp+100]
        shr     eax
        mov     DWORD PTR [rsp+144], eax
        add     eaxeax
        sub     esieax
        mov     eaxedx
        sub     eax1
        mov     DWORD PTR [rsp+140], esi
        mov     DWORD PTR [rsp+84], eax
.L20:
        mov     esiDWORD PTR [rsp+84]
        cmp     esi1
        mov     eaxesi
        jle     .L34
        sub     eax3
        mov     DWORD PTR [rsp+60], 0
        mov     edxeax
        mov     DWORD PTR [rsp+64], eax
        mov     eaxDWORD PTR [rsp+104]
        shr     eax
        mov     DWORD PTR [rsp+152], eax
        add     eaxeax
        sub     edxeax
        mov     eaxesi
        sub     eax1
        mov     DWORD PTR [rsp+148], edx
        mov     DWORD PTR [rsp+88], eax
.L18:
        mov     edxDWORD PTR [rsp+88]
        cmp     edx1
        mov     eaxedx
        jle     .L35
        sub     eax3
        mov     DWORD PTR [rsp+32], 0
        mov     esieax
        mov     DWORD PTR [rsp+20], eax
        mov     eaxDWORD PTR [rsp+64]
        shr     eax
        mov     DWORD PTR [rsp+156], eax
        add     eaxeax
        sub     esieax
        mov     eaxedx
        sub     eax1
        mov     DWORD PTR [rsp+36], esi
        mov     DWORD PTR [rsp+24], eax
.L16:
        mov     ecxDWORD PTR [rsp+24]
        cmp     ecx1
        mov     eaxecx
        jle     .L36
        mov     eaxDWORD PTR [rsp+20]
        lea     r13d, [rcx-3]
        lea     r12d, [rcx-1]
        mov     DWORD PTR [rsp+12], 0
        mov     esir13d
        shr     eax
        mov     DWORD PTR [rsp+28], eax
        add     eaxeax
        sub     esieax
        mov     DWORD PTR [rsp+16], esi
.L14:
        cmp     r12d1
        mov     eaxr12d
        jle     .L37
        mov     r14dr13d
        lea     ebp, [r12-3]
        lea     r15d, [r12-1]
        shr     r14d
        xor     ebxebx
        lea     eax, [r14+r14]
        sub     ebpeax
.L12:
        mov     edir15d
        sub     r15d2
        call    f(int)
        add     ebxeax
        cmp     r15debp
        jne     .L12
        neg     r14d
        lea     eax, [r13+0+r14*2]
.L10:
        add     ebxeax
        sub     r12d2
        add     DWORD PTR [rsp+12], ebx
        sub     r13d2
        cmp     r12dDWORD PTR [rsp+16]
        jne     .L14
        mov     eaxDWORD PTR [rsp+28]
        mov     edxDWORD PTR [rsp+20]
        neg     eax
        lea     eax, [rdx+rax*2]
.L9:
        add     eaxDWORD PTR [rsp+12]
        sub     DWORD PTR [rsp+24], 2
        add     DWORD PTR [rsp+32], eax
        mov     eaxDWORD PTR [rsp+36]
        sub     DWORD PTR [rsp+20], 2
        cmp     DWORD PTR [rsp+24], eax
        jne     .L16
        mov     eaxDWORD PTR [rsp+156]
        mov     edxDWORD PTR [rsp+64]
        neg     eax
        lea     eax, [rdx+rax*2]
.L8:
        add     eaxDWORD PTR [rsp+32]
        sub     DWORD PTR [rsp+88], 2
        add     DWORD PTR [rsp+60], eax
        mov     eaxDWORD PTR [rsp+148]
        sub     DWORD PTR [rsp+64], 2
        cmp     DWORD PTR [rsp+88], eax
        jne     .L18
        mov     eaxDWORD PTR [rsp+152]
        mov     edxDWORD PTR [rsp+104]
        neg     eax
        lea     eax, [rdx+rax*2]
.L7:
        add     eaxDWORD PTR [rsp+60]
        sub     DWORD PTR [rsp+84], 2
        add     DWORD PTR [rsp+48], eax
        mov     eaxDWORD PTR [rsp+140]
        sub     DWORD PTR [rsp+104], 2
        cmp     DWORD PTR [rsp+84], eax
        jne     .L20
        mov     eaxDWORD PTR [rsp+144]
        mov     esiDWORD PTR [rsp+100]
        neg     eax
        lea     eax, [rsi+rax*2]
.L6:
        add     eaxDWORD PTR [rsp+48]
        sub     DWORD PTR [rsp+80], 2
        add     DWORD PTR [rsp+56], eax
        mov     eaxDWORD PTR [rsp+132]
        sub     DWORD PTR [rsp+100], 2
        cmp     DWORD PTR [rsp+80], eax
        jne     .L22
        mov     eaxDWORD PTR [rsp+136]
        mov     edxDWORD PTR [rsp+96]
        neg     eax
        lea     eax, [rdx+rax*2]
.L5:
        add     eaxDWORD PTR [rsp+56]
        sub     DWORD PTR [rsp+76], 2
        add     DWORD PTR [rsp+52], eax
        mov     eaxDWORD PTR [rsp+124]
        sub     DWORD PTR [rsp+96], 2
        cmp     DWORD PTR [rsp+76], eax
        jne     .L24
        mov     eaxDWORD PTR [rsp+128]
        mov     esiDWORD PTR [rsp+92]
        neg     eax
        lea     eax, [rsi+rax*2]
        jmp     .L4
.L37:
        xor     ebxebx
        jmp     .L10
.L36:
        mov     DWORD PTR [rsp+12], 0
        jmp     .L9
.L35:
        mov     DWORD PTR [rsp+32], 0
        jmp     .L8
.L34:
        mov     DWORD PTR [rsp+60], 0
        jmp     .L7
.L33:
        mov     DWORD PTR [rsp+48], 0
        jmp     .L6
.L32:
        mov     DWORD PTR [rsp+56], 0
        jmp     .L5
.L30:
        sub     eax2
        mov     DWORD PTR [rsp+40], 0
        mov     DWORD PTR [rsp+112], eax
.L3:
        add     edxDWORD PTR [rsp+40]
        sub     DWORD PTR [rsp+68], 2
        add     DWORD PTR [rsp+44], edx
        cmp     eax1
        jg      .L28
        and     DWORD PTR [rsp+108], 1
        jmp     .L2
.L29:
        mov     DWORD PTR [rsp+44], 0
.L2:
        mov     eaxDWORD PTR [rsp+108]
        add     eaxDWORD PTR [rsp+44]
        add     rsp168
        pop     rbx
        pop     rbp
        pop     r12
        pop     r13
        pop     r14
        pop     r15
        ret
.L31:
        mov     DWORD PTR [rsp+52], 0
.L4:
        add     eaxDWORD PTR [rsp+52]
        sub     DWORD PTR [rsp+72], 2
        add     DWORD PTR [rsp+40], eax
        mov     eaxDWORD PTR [rsp+116]
        sub     DWORD PTR [rsp+92], 2
        cmp     DWORD PTR [rsp+72], eax
        jne     .L26
        mov     eaxDWORD PTR [rsp+120]
        mov     edxDWORD PTR [rsp+68]
        neg     eax
        lea     edx, [rdx+rax*2]
        mov     eaxDWORD PTR [rsp+112]
        jmp     .L3


Can any one confirm this in their own g++ version?
Post 28 Nov 2013, 12:12
View user's profile Send private message Reply with quote
dogman



Joined: 18 Jul 2013
Posts: 114
Yep! 300 lines.
Post 28 Nov 2013, 15:28
View user's profile Send private message Reply with quote
AsmGuru62



Joined: 28 Jan 2004
Posts: 1384
Location: Toronto, Canada
Maybe GCC inlined every call to f()?
Post 28 Nov 2013, 16:25
View user's profile Send private message Send e-mail Reply with quote
bitRAKE



Joined: 21 Jul 2003
Posts: 2628
Location: dank orb
-Os -m32

ICC is the only one to switch to register passing.
Post 28 Nov 2013, 19:16
View user's profile Send private message Visit poster's website Reply with quote
dogman



Joined: 18 Jul 2013
Posts: 114
Intel makes good compilers. I'm real happy with their Fortran.

bitrake, nice site you found! I'll compile it under a few other compilers and post the results later. I have some that are not on that site.
Post 29 Nov 2013, 07:17
View user's profile Send private message Reply with quote
TmX



Joined: 02 Mar 2006
Posts: 783
Location: Jakarta, Indonesia

dogman wrote:
Intel makes good compilers. I'm real happy with their Fortran.



AFAIK Intel C++ is also praised.
Fortunately there are non commercial versions of their compiler.
Unfortunately, no Windows version. Wink

Quite pricey, though.
Post 29 Nov 2013, 10:15
View user's profile Send private message Reply with quote
ASM-Man



Joined: 11 Jan 2013
Posts: 65

dogman wrote:
Intel makes good compilers. I'm real happy with their Fortran.

bitrake, nice site you found! I'll compile it under a few other compilers and post the results later. I have some that are not on that site.



And I with their C/C++. Vey nice really.

_________________
I'm not a native speaker of the english language. So, if you find any mistake what I have written, you are free to fix for me or tell me on. Smile
Post 30 Nov 2013, 15:17
View user's profile Send private message Reply with quote
dogman



Joined: 18 Jul 2013
Posts: 114
Here's the output from 3 other compilers.

CC (Solaris Studio 12.3)


Code:
        .file "test.c"
        .code32

        .set .simple_nop0x90
        .globl __1cBf6Fi_i_
        .type __1cBf6Fi_i_@function
        .local Dlrodata.lrodata
        .local Dldata.ldata

        .ident     "iropt: Sun Compiler Common 12.3 Linux_i386 2011/11/16"
        .ident     "ir2hf: Sun Compiler Common 12.3 Linux_i386 2011/11/16"
        .ident     "ube: Sun Compiler Common 12.3 Linux_i386 2011/11/16"

        .section .text,"ax"
{                   1  }        .align 16,.simple_nop


/================================================================================
FUNCTION __1cBf6Fi_i_

/               %rbp - used as frame pointer.

/               arg0 "n":                4(%esp),

__1cBf6Fi_i_:

BLOCK3pred1succ7 8count17.3027

FILE test.c

/    1                !int f(int n) {

int3]           1  }        push    %ebp            
int1]           1  }        movl    %esp,%ebp               
int3]           1  }        push    %ebx            
int3]           1  }        push    %esi            

/    2                !  if (n<2)

int4]           2  }        movl    8(%ebp),%ebx            / sym=n
int1]           2  }        cmpl    $2,%ebx         
{                   2  }        jl      .CG2.14         
{                   5  } .CG3.15:

BLOCK8pred3succ10 11count12.4580


/    3                !    return n;
/    4                !  else
/    5                !    return f(n-1)+f(n-2);

int1]           5  }        leal    -2(%ebx),%esi           
int1]           2  }        cmpl    $2,%esi         
{                   2  }        jl      .CG4.16         
{                   5  } .CG5.17:

BLOCK11pred8succ13 14count11.2266

int1]              }        leal    -4(%ebx),%esi           
int1]           2  }        cmpl    $2,%esi         
{                   2  }        jl      .CG6.18         
{                   5  } .CG7.19:

BLOCK14pred11succ16 17count10.1169

int1]              }        leal    -6(%ebx),%esi           
int1]           2  }        cmpl    $2,%esi         
{                   2  }        jl      .CG8.20         
{                   5  } .CG9.21:

BLOCK17pred14succ19 20count9.11688

int1]              }        leal    -8(%ebx),%esi           
int1]           2  }        cmpl    $2,%esi         
{                   2  }        jl      .CGA.22         
{                   5  } .CGB.23:

BLOCK20pred17succ9count4.55844

int1]           5  }        subl    $12,%esp                
int1]              }        leal    -9(%ebx),%eax           
int3]           5  }        push    %eax            
{                   5  }        call    __1cBf6Fi_i_            

BLOCK9pred20succ6count4.55844

int1]           5  }        addl    $4,%esp         
int1]           5  }        movl    %eax,%esi               
int1]              }        leal    -10(%ebx),%eax          
int3]           5  }        push    %eax            
{                   5  }        call    __1cBf6Fi_i_            
int1]           5  }        addl    $16,%esp                

BLOCK6pred9succ19count4.55844

int1]           5  }        addl    %eax,%esi               
{                   3  } .CGA.22:
{                   5  } .CGC.24:

BLOCK21pred19succ5count9.11688

int1]           5  }        subl    $12,%esp                
int1]              }        leal    -7(%ebx),%eax           
int3]           5  }        push    %eax            
{                   5  }        call    __1cBf6Fi_i_            
int1]           5  }        addl    $16,%esp                

BLOCK5pred21succ16count9.11688

int1]           5  }        addl    %eax,%esi               
{                   3  } .CG8.20:
{                   5  } .CGD.25:

BLOCK22pred16succ18count10.1169

int1]           5  }        subl    $12,%esp                
int1]              }        leal    -5(%ebx),%eax           
int3]           5  }        push    %eax            
{                   5  }        call    __1cBf6Fi_i_            
int1]           5  }        addl    $16,%esp                

BLOCK18pred22succ13count10.1169

int1]           5  }        addl    %eax,%esi               
{                   3  } .CG6.18:
{                   5  } .CGE.26:

BLOCK23pred13succ4count11.2266

int1]           5  }        subl    $12,%esp                
int1]              }        leal    -3(%ebx),%eax           
int3]           5  }        push    %eax            
{                   5  }        call    __1cBf6Fi_i_            
int1]           5  }        addl    $16,%esp                

BLOCK4pred23succ10count11.2266

int1]           5  }        addl    %eax,%esi               
{                   3  } .CG4.16:
{                   5  } .CGF.27:

BLOCK24pred10succ26count12.4580

int1]           5  }        subl    $12,%esp                
int1]           5  }        addl    $-1,%ebx                
int3]           5  }        push    %ebx            
{                   5  }        call    __1cBf6Fi_i_            
int1]           5  }        addl    $16,%esp                

BLOCK26pred24succ7count12.4580

int1]           5  }        leal    (%eax,%esi),%ebx                
{                   3  } .CG2.14:
{                   5  } .CG10.28:

BLOCK25pred7succ2count4.84477

int1]           5  }        movl    %ebx,%eax               
int3]           5  }        pop     %esi            
int3]           5  }        pop     %ebx            
{                   5  }        leave                   
{                   5  }        ret                     
        .size __1cBf6Fi_i_. - __1cBf6Fi_i_
.CG0:



        .section .data,"aw"
Ddata.data: / Offset 0



        .section .bss,"aw"

Bbss.bss:


        .section .bssf,"aw"


        .section .rodata,"a"
Drodata.rodata: / Offset 0



        .section .picdata,"aw"
Dpicdata.picdata: / Offset 0



        .section .lbss,"awh"
        .type Blbss.lbss@object

Blbss.lbss:


        .section .ldata,"awh"
Dldata.ldata: / Offset 0
        .type Dldata.ldata@object



        .section .lrodata,"ah"
Dlrodata.lrodata: / Offset 0
        .type Dlrodata.lrodata@object




OpenUH (University of Houston version of Open64 compiler)


Code:
        #  /opt/openuh-3.0.29/lib/gcc-lib/x86_64-open64-linux/5.0/be::5.0

        #-----------------------------------------------------------
        # Compiling test.c (/tmp/ccI#.s4ieOz)
        #-----------------------------------------------------------

        #-----------------------------------------------------------
        # Options:
        #-----------------------------------------------------------
        #  Target:WolfdaleISA:ISA_1Endian:littlePointer Size:32
        #  -O3  (Optimization level)
        #  -g0  (Debug level)
        #  -m2  (Report advisories)
        #-----------------------------------------------------------

        .file   1       "/tmp/test.c"


        .text
        .align  2

        .section .except_table_supp"a",@progbits

        .section .except_table"a",@progbits
        .section .text
        .p2align 5,,

        # Program Unit_Z1fi
.globl  _Z1fi
        .type   _Z1fi@function
_Z1fi:  # 0x0
        # .frame        %esp20%esp
        # _temp_gra_spill1 = 8
        .loc    1       1       0
 #   1  int f(int n) {
.LBB1__Z1fi:
.LEH_adjustsp__Z1fi:
        addl $-20,%esp                  # [0
.L_0_1282:
        .loc    1       2       0
 #   2    if (n<2)
        movl 24(%esp),%edx              # [0n
        cmpl $1,%edx                    # [3
        jg .Lt_0_770                    # [4
.LBB3__Z1fi:
        .loc    1       3       0
 #   3      return n;
        movl %edx,%eax                  # [0
        addl $20,%esp                   # [0
        ret                             # [0
        .p2align 4,,15
.Lt_0_770:
        .loc    1       5       0
 #   4    else
 #   5      return f(n-1)+f(n-2);
        movl 24(%esp),%eax              # [0n
        addl $-1,%eax                   # [3
        movl %eax,0(%esp)               # [4id:8
        call _Z1fi                      # [4_Z1fi
.LBB5__Z1fi:
        movl %eax,8(%esp)               # [0_temp_gra_spill1
        movl 24(%esp),%eax              # [0n
        addl $-2,%eax                   # [3
        movl %eax,0(%esp)               # [4id:9
        call _Z1fi                      # [4_Z1fi
.LBB6__Z1fi:
        movl %eax,%edx                  # [0
        movl 8(%esp),%eax               # [0_temp_gra_spill1
        addl %edx,%eax                  # [3
        addl $20,%esp                   # [3
        ret                             # [3
.L_0_1538:
.LDWend__Z1fi:
        .size _Z1fi.LDWend__Z1fi-_Z1fi

        .section .except_table
        .align  0
        .type   .range_table._Z1fi@object
.range_table._Z1fi:     # 0x0
        # offset 0
        .byte   255
        # offset 1
        .byte   0
        .uleb128        .LSDATTYPEB1-.LSDATTYPED1
.LSDATTYPED1:
        # offset 6
        .byte   1
        .uleb128        .LSDACSE1-.LSDACSB1
.LSDACSB1:
        .uleb128        .L_0_1282-_Z1fi
        .uleb128        .L_0_1538-.L_0_1282
        # offset 17
        .uleb128        0
        # offset 21
        .uleb128        0
.LSDACSE1:
        # offset 25
        .sleb128        0
        # offset 29
        .sleb128        0
.LSDATTYPEB1:
        # end of initialization for .range_table._Z1fi
        .section .text
        .align  4
        .section .except_table_supp
        .align  4
        .section .except_table
        .align  4

        .section .eh_frame"a",@progbits
.LEHCIE:
        .4byte  .LEHCIE_end - .LEHCIE_begin
.LEHCIE_begin:
        .4byte 0x0
        .byte   0x010x7a0x500x4c0x000x010x7c0x08
        .byte   0x060x00
        .4byte  __gxx_personality_v0
        .byte   0x000x0c0x040x040x880x01
        .align 4
.LEHCIE_end:
        .4byte  .LFDE1_end - .LFDE1_begin
.LFDE1_begin:
        .4byte  .LFDE1_begin - .LEHCIE
        .4byte  .LBB1__Z1fi
        .4byte  .LDWend__Z1fi - .LBB1__Z1fi
        .byte   0x04
        .4byte  .range_table._Z1fi
        .byte   0x04
        .4byte  .LEH_adjustsp__Z1fi - .LBB1__Z1fi
        .byte   0x0e0x18
        .align 4
.LFDE1_end:

        .section .debug_line""
        .section        .note.GNU-stack,"",@progbits
        .ident  "#Open64 Compiler Version 5.0 : test.c compiled with : -g0 -O3 -march=wolfdale -msse2 -msse3 -mno-3dnow -mno-sse4a -mssse3 -mno-sse41 -mno-sse42 -mno-aes -mno-pclmul -mno-avx -mno-xop -mno-fma -mno-fma4 -m32"





Open64


Code:
        #  /opt/open64-5.0/lib/gcc-lib/x86_64-open64-linux/5.0/be::5.0

        #-----------------------------------------------------------
        # Compiling test.c (/tmp/ccI#.Bc1ETQ)
        #-----------------------------------------------------------

        #-----------------------------------------------------------
        # Options:
        #-----------------------------------------------------------
        #  Target:WolfdaleISA:ISA_1Endian:littlePointer Size:32
        #  -O3  (Optimization level)
        #  -g0  (Debug level)
        #  -m2  (Report advisories)
        #-----------------------------------------------------------

        .file   1       "test.c"


        .text
        .align  2

        .section .except_table_supp"a",@progbits

        .section .except_table"a",@progbits
        .section .text
        .p2align 5,,

        # Program Unit_Z1fi
.globl  _Z1fi
        .type   _Z1fi@function
_Z1fi:  # 0x0
        # .frame        %esp20%esp
        # _temp_gra_spill1 = 8
        .loc    1       1       0
 #   1  int f(int n) {
.LBB1__Z1fi:
.LEH_adjustsp__Z1fi:
        addl $-20,%esp                  # [0
.L_0_1282:
        .loc    1       2       0
 #   2    if (n<2)
        movl 24(%esp),%edx              # [0n
        cmpl $1,%edx                    # [3
        jg .Lt_0_770                    # [4
.LBB3__Z1fi:
        .loc    1       3       0
 #   3      return n;
        movl %edx,%eax                  # [0
        addl $20,%esp                   # [0
        ret                             # [0
        .p2align 4,,15
.Lt_0_770:
        .loc    1       5       0
 #   4    else
 #   5      return f(n-1)+f(n-2);
        movl 24(%esp),%eax              # [0n
        addl $-1,%eax                   # [3
        movl %eax,0(%esp)               # [4id:8
        call _Z1fi                      # [4_Z1fi
.LBB5__Z1fi:
        movl %eax,8(%esp)               # [0_temp_gra_spill1
        movl 24(%esp),%eax              # [0n
        addl $-2,%eax                   # [3
        movl %eax,0(%esp)               # [4id:9
        call _Z1fi                      # [4_Z1fi
.LBB6__Z1fi:
        movl %eax,%edx                  # [0
        movl 8(%esp),%eax               # [0_temp_gra_spill1
        addl %edx,%eax                  # [3
        addl $20,%esp                   # [3
        ret                             # [3
.L_0_1538:
.LDWend__Z1fi:
        .size _Z1fi.LDWend__Z1fi-_Z1fi

        .section .except_table
        .align  0
        .type   .range_table._Z1fi@object
.range_table._Z1fi:     # 0x0
        # offset 0
        .byte   255
        # offset 1
        .byte   0
        .uleb128        .LSDATTYPEB1-.LSDATTYPED1
.LSDATTYPED1:
        # offset 6
        .byte   1
        .uleb128        .LSDACSE1-.LSDACSB1
.LSDACSB1:
        .uleb128        .L_0_1282-_Z1fi
        .uleb128        .L_0_1538-.L_0_1282
        # offset 17
        .uleb128        0
        # offset 21
        .uleb128        0
.LSDACSE1:
        # offset 25
        .sleb128        0
        # offset 29
        .sleb128        0
.LSDATTYPEB1:
        # end of initialization for .range_table._Z1fi
        .section .text
        .align  4
        .section .except_table_supp
        .align  4
        .section .except_table
        .align  4

        .section .eh_frame"a",@progbits
.LEHCIE:
        .4byte  .LEHCIE_end - .LEHCIE_begin
.LEHCIE_begin:
        .4byte 0x0
        .byte   0x010x7a0x500x4c0x000x010x7c0x08
        .byte   0x060x00
        .4byte  __gxx_personality_v0
        .byte   0x000x0c0x040x040x880x01
        .align 4
.LEHCIE_end:
        .4byte  .LFDE1_end - .LFDE1_begin
.LFDE1_begin:
        .4byte  .LFDE1_begin - .LEHCIE
        .4byte  .LBB1__Z1fi
        .4byte  .LDWend__Z1fi - .LBB1__Z1fi
        .byte   0x04
        .4byte  .range_table._Z1fi
        .byte   0x04
        .4byte  .LEH_adjustsp__Z1fi - .LBB1__Z1fi
        .byte   0x0e0x18
        .align 4
.LFDE1_end:

        .section .debug_line""
        .section        .note.GNU-stack,"",@progbits
        .ident  "#Open64 Compiler Version 5.0 : test.c compiled with : -g0 -O3 -march=wolfdale -msse2 -msse3 -mno-3dnow -mno-sse4a -mssse3 -mno-sse41 -mno-sse42 -mno-aes -mno-pclmul -mno-avx -mno-xop -mno-fma4 -m32"





All three look better than gcc at -O3...

_________________
Sources? Ahahaha! We don't need no stinkin' sources!
Post 30 Nov 2013, 18:12
View user's profile Send private message Reply with quote
cod3b453



Joined: 25 Aug 2004
Posts: 616
Well the only way to settle it is to race them Laughing
Post 30 Nov 2013, 18:24
View user's profile Send private message Reply with quote
tthsqe



Joined: 20 May 2009
Posts: 653
cod3b43,
we have already had extensive speed tests (http://board.flatassembler.net/topic.php?t=10158&postdays=0&postorder=asc&start=20) for this academic example. I'm just always interested what the latest compiler tech is doing. At least for intel cpu's, I think the fastest human implementation was what I have below. On amd, you might want to replace the xch with a push and pop. Though, I wouldn't mind if you did run a speed test, to see if all of that code that gcc wrote actually does improve performance.


Code:
f:     ;argument passed in eax
    cmp eax,1
    jbe .1
    push ebx
    lea ebx,[eax-2]
    dec eax
    call f
    xchg eax,ebx
    call f
    add eax,ebx
    pop ebx
.1  ret   

Post 06 Dec 2013, 18:52
View user's profile Send private message Reply with quote
bitRAKE



Joined: 21 Jul 2003
Posts: 2628
Location: dank orb
No compiler I know of will generate the simple loop using the XADD instruction.

http://www.asmcommunity.net/forums/topic/?id=14206

_________________
The generation of random numbers is too important to be left to chance - Robert R Coveyou
Post 07 Dec 2013, 07:46
View user's profile Send private message Visit poster's website Reply with quote
tthsqe



Joined: 20 May 2009
Posts: 653
bitrake, could you give a precise rule-based deduction of the fact that your xadd loop correctly implements

Code:
unsigned int f(unsigned int n) { 
  if (n<2
    return n
  else 
    return f(n-1)+f(n-2);}


I think compilers technology has not focused on such optimizations. If you bag of tricks is precise enough, you should be able to teach it to the computer.
Post 07 Dec 2013, 11:50
View user's profile Send private message Reply with quote
bitRAKE



Joined: 21 Jul 2003
Posts: 2628
Location: dank orb
The first thing for the compiler to understand is the range of numbers involved, and the function utility (how it is used). Next is to understand the sequential nature of the recursion - caching values is beneficial. XADD is a special case of that value caching which the instruction selector should find.
Post 07 Dec 2013, 20:19
View user's profile Send private message Visit poster's website Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >

Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Powered by phpBB © 2001-2005 phpBB Group.

Main index   Download   Documentation   Examples   Message board
Copyright © 2004-2016, Tomasz Grysztar.