flat assembler
Message board for the users of flat assembler.

Index > High Level Languages > Sphinx C-- , speed test problem

Author
Thread Post new topic Reply to topic
emil



Joined: 16 Dec 2003
Posts: 76
Location: egypt
emil 26 Sep 2011, 17:44
Hi all

I made this code to test the fastest procedure strlen1 or strlen4.

the problem here is , each time i ran the code got deffrint reslutes , so how to make it more accurate ?

Code:
/***************************************
*             Sphinx C--               *  
*                                      *
*    TimerTest demo  By Emil Halim     *
*           26 / 9 / 2011              *
***************************************/
#speed
?useMMX 
#pragma option w32         //create Windows GUI EXE.
#pragma option OBJ         //create OBJ file
#pragma option OS          //speed optimization 
#pragma option J0          //no startup code.

#include <Windows.h> 

#pragma option ia          // allow inline asm
#pragma option LST

extern cdecl _printf();   
#define printf  _printf

int strlen1(char* pStr)
{
    EAX=0;
    while(byte *pStr !=0 )
     {
        pStr++;
        EAX++;
     }
} 

// *** SSE2 version  from MASM forum***
int  fastcall strlen4(EAX) 
{     
    EBX = EAX ;                 // get the string pointer
       LEA ECX, DSDWORD[EAX+16]        // save pointer to string, on par with eax after first loop
 EAX &= 0xFFFFFFF0;          // align for use with SSE2
@shiftOK:     
    XORPS XMM0, XMM0                // zero xmm0 for finding zero bytes
@a1: 
    PCMPEQB XMM0, DSQWORD[EAX]      // ---- inner loop -----
    PMOVMSKB EDX, XMM0              // set byte mask in edx
     EAX += 16;                      // len counter (best position here)
 TEST EDX,EDX
        JE a1
       if(ECX<=EAX) goto a2;
    ECX -= EAX;                 // get difference, and cancel "misalign flag"
 SHR EDX, CL                 // shift invalid
        SHL EDX, CL                     // bits out
 JE shiftOK
@a2:  
    BSF EDX, EDX                // bit scan for the index
       SUB EAX, EBX                    // subtract original src pointer
    LEA EAX, DSDWORD[EAX+EDX-16]    // add scan index
}

char* testStr = "SPHINX C-- is so easy (an intermediate position between Assembler and C)";

qword  EAX_EDX_;

main()
int i;
{
_start: 

  SetPriorityClass( GetCurrentProcess(), HIGH_PRIORITY_CLASS);          
  rdtsc
  EAX_EDX_ = EDX:EAX;
  
          strlen1( testStr );
            
  rdtsc
  EDX:EAX -= EAX_EDX_;
  i = EAX;
  SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
 
  printf("time is %i",  i );
  MessageBox(0,"","",0);   
}
    
Post 26 Sep 2011, 17:44
View user's profile Send private message Reply with quote
ronware



Joined: 08 Jan 2004
Posts: 179
Location: Israel
ronware 26 Sep 2011, 18:12
Emil,

The problem is that there is always other activity in other processes and threads. The only way to get consistant results is to make your test last long enough (tens of seconds), and repeat it a number of times.
Post 26 Sep 2011, 18:12
View user's profile Send private message Visit poster's website AIM Address Yahoo Messenger Reply with quote
emil



Joined: 16 Dec 2003
Posts: 76
Location: egypt
emil 26 Sep 2011, 19:32
ok ronware.

so i put a long loop to get your idea , but..............

have the same problem & get negative results

here is the code.
Code:
/***************************************
*             Sphinx C--               *  
*                                      *
*    TimerTest demo  By Emil Halim     *
*           26 / 9 / 2011              *
***************************************/

#pragma option w32         //create Windows GUI EXE.
#pragma option OBJ         //create OBJ file
#pragma option OS          //speed optimization 
#pragma option J0          //no startup code.

#include <Windows.h> 

#pragma option ia          // allow inline asm
#pragma option LST

extern cdecl _printf();   
#define printf  _printf

int strlen1(char* pStr)
{
    EAX=0;
    while(byte *pStr !=0 )
     {
        pStr++;
        EAX++;
     }
} 

// *** SSE2 version  from MASM forum***
int  fastcall strlen4(EAX) 
{    
    EBX = EAX ;                 
        LEA ECX, DSDWORD[EAX+16]        
    EAX &= 0xFFFFFFF0;          
@shiftOK:       
    XORPS XMM0, XMM0                
@a1:    
    PCMPEQB XMM0, DSQWORD[EAX]      
    PMOVMSKB EDX, XMM0              
    EAX += 16;                      
    TEST EDX,EDX
        JE a1
       if(ECX<=EAX) goto a2;
    ECX -= EAX;                 
        SHR EDX, CL                 
        SHL EDX, CL                     
    JE shiftOK
@a2:  
    BSF EDX, EDX                
        SUB EAX, EBX                    
    LEA EAX, DSDWORD[EAX+EDX-16]    
}


char* testStr = "SPHINX C-- is so easy (an intermediate position between Assembler and C)";

qword  EAX_EDX_1;
qword  EAX_EDX_2;

main()
int i;
int  reslt1, reslt2;
int count; 
{
_start: 

  count = 1000000;

  SetPriorityClass( GetCurrentProcess(), HIGH_PRIORITY_CLASS);          
  rdtsc
  EAX_EDX_1 = EDX:EAX;
  
     for(i=0; i < count; i++)
      {
          strlen1( testStr );
      }      
  rdtsc
  EAX_EDX_1 = EDX:EAX - EAX_EDX_1;
  
  rdtsc
  EAX_EDX_2 = EDX:EAX;
  
     for(i=0; i < count; i++)
      {
          strlen4( testStr );
      }      
  rdtsc
  EAX_EDX_2 = EDX:EAX - EAX_EDX_2;
  
  SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS);
 
  ST(0) = EAX_EDX_1 / count; 
  fistp   reslt1
  ST(0) = EAX_EDX_2 / count; 
  fistp   reslt2
  printf("strlen1 is %d\nstrlen2 is %d",   reslt1 , reslt2 );
  MessageBox(0,"","",0);   
}
    
Post 26 Sep 2011, 19:32
View user's profile Send private message Reply with quote
ronware



Joined: 08 Jan 2004
Posts: 179
Location: Israel
ronware 27 Sep 2011, 07:22
emil wrote:
ok ronware.

so i put a long loop to get your idea , but..............

have the same problem & get negative results


Emil -

What exactly are "negative results" in this context? Is it that the difference between them seems to vary up and down? Is the variance very large, or not so much?

Please quantify (I cannot currently run the code so I can't see for myself)

best regards
Ron
Post 27 Sep 2011, 07:22
View user's profile Send private message Visit poster's website AIM Address Yahoo Messenger Reply with quote
emil



Joined: 16 Dec 2003
Posts: 76
Location: egypt
emil 27 Sep 2011, 16:03
well,


i will demonstrate that what the program did.

first i used SetPriorityClass to reduce the effect of multitasking stuff to get most accurate resultes then call rdtsc to get the clock time and save it in EAX_EDX_1 variable.

then i put a test loop to measure it's time , so after the loop finished i call rdtsc to get the new clock time then i subtract it from first value EAX_EDX_1 to get the actual time of our test.

the differnt time we get is for entir loop , so to get the time pre one cycle i divid it by loop counter.

the problem is that the last result is negative !!!!!!!!!!

not well , the result of rdtsc in second call is greater than the result of first call , so the subtraction will always postive.
Post 27 Sep 2011, 16:03
View user's profile Send private message Reply with quote
emil



Joined: 16 Dec 2003
Posts: 76
Location: egypt
emil 30 Sep 2011, 18:05
Ok ,

I get a good results if i change the int type of reslt1, reslt2 variables to double type , and change fistp to fstp instruction.

So here is the correct code , hope it will be useful for someone.

Code:

/*************************************** 
*             Sphinx C--               *   
*                                      * 
*    TimerTest demo  By Emil Halim     * 
*           30 / 9 / 2011              * 
***************************************/ 

#pragma option w32         //create Windows GUI EXE. 
#pragma option OBJ         //create OBJ file 
#pragma option OS          //speed optimization  
#pragma option J0          //no startup code. 

#include <Windows.h>  

#pragma option ia          // allow inline asm 
#pragma option LST 

extern cdecl _printf();    
#define printf  _printf 

int strlen1(char* pStr) 
{ 
    EAX=0; 
    while(byte *pStr !=0 ) 
     { 
        pStr++; 
        EAX++; 
     } 
}  

// *** SSE2 version  from MASM forum*** 
int  fastcall strlen4(EAX)  
{        
        EBX = EAX ;                  
        LEA ECX, DSDWORD[EAX+16]         
        EAX &= 0xFFFFFFF0;               
@shiftOK:        
    XORPS XMM0, XMM0             
@a1:     
    PCMPEQB XMM0, DSQWORD[EAX]   
        PMOVMSKB EDX, XMM0               
        EAX += 16;                       
        TEST EDX,EDX 
        JE a1 
        if(ECX<=EAX) goto a2; 
        ECX -= EAX;                  
        SHR EDX, CL                  
        SHL EDX, CL                      
        JE shiftOK 
@a2:     
    BSF EDX, EDX                     
        SUB EAX, EBX                     
        LEA EAX, DSDWORD[EAX+EDX-16]     
} 


char* testStr = "SPHINX C-- is so easy (an intermediate position between Assembler and C)"; 

qword  EAX_EDX_1; 
qword  EAX_EDX_2; 

main() 
int i; 
double  reslt1, reslt2; 
int count;  
{ 
_start:  

  count = 1000000; 

  SetPriorityClass( GetCurrentProcess(), HIGH_PRIORITY_CLASS);           
  rdtsc 
  EAX_EDX_1 = EDX:EAX; 
   
     for(i=0; i < count; i++) 
      { 
          strlen1( testStr ); 
      }       
  rdtsc 
  EAX_EDX_1 = EDX:EAX - EAX_EDX_1; 
   
  rdtsc 
  EAX_EDX_2 = EDX:EAX; 
   
     for(i=0; i < count; i++) 
      { 
          strlen4( testStr ); 
      }       
  rdtsc 
  EAX_EDX_2 = EDX:EAX - EAX_EDX_2; 
   
  SetPriorityClass(GetCurrentProcess(), NORMAL_PRIORITY_CLASS); 
  
  ST(0) = EAX_EDX_1 / count;  
  fstp   reslt1 
  ST(0) = EAX_EDX_2 / count;  
  fstp   reslt2 
  printf("strlen1 is %f\nstrlen2 is %f",   reslt1 , reslt2 ); 
  MessageBox(0,"","",0);    
} 
    
Post 30 Sep 2011, 18:05
View user's profile Send private message Reply with quote
ronware



Joined: 08 Jan 2004
Posts: 179
Location: Israel
ronware 02 Oct 2011, 04:26
Good, Emil --

Sorry, I was unavailable for the past several days. Glad you were able to work it out!

Best regards,
Ron
Post 02 Oct 2011, 04:26
View user's profile Send private message Visit poster's website AIM Address Yahoo Messenger Reply with quote
Matrix



Joined: 04 Sep 2004
Posts: 1164
Location: Overflow
Matrix 04 Nov 2011, 16:23
sorry wrong thread, never written windows program
Post 04 Nov 2011, 16:23
View user's profile Send private message Visit poster's website Reply with quote
Display posts from previous:
Post new topic Reply to topic

Jump to:  


< Last Thread | Next Thread >
Forum Rules:
You cannot post new topics in this forum
You cannot reply to topics in this forum
You cannot edit your posts in this forum
You cannot delete your posts in this forum
You cannot vote in polls in this forum
You cannot attach files in this forum
You can download files in this forum


Copyright © 1999-2025, Tomasz Grysztar. Also on GitHub, YouTube.

Website powered by rwasa.