windows schneller als linux stdlib ...

A

anhadikal

hallo,

ich habe was seltsammes erlebt:

ergebnisse am amd athlon x2 4800+:

im linux(2 builds ...):
------------------------
Code:
cmp_i686 
-------------- 
C strcpy:                         3764.7 MB/second (2560.0 MB in 680000 clocks) 
our strcpy1:                      2976.7 MB/second (2560.0 MB in 860000 clocks) 
our strcpy2:                      3084.3 MB/second (2560.0 MB in 830000 clocks) 
C memcpy:                         3084.3 MB/second (2560.0 MB in 830000 clocks) 
our memcpy:                       1969.2 MB/second (2560.0 MB in 1300000 clocks) 

cmp_athlon 
----------------- 
C strcpy:                         3764.7 MB/second (2560.0 MB in 680000 clocks) 
our strcpy1:                      4000.0 MB/second (2560.0 MB in 640000 clocks) 
our strcpy2:                      3122.0 MB/second (2560.0 MB in 820000 clocks) 
C memcpy:                         3122.0 MB/second (2560.0 MB in 820000 clocks) 
our memcpy:                       1984.5 MB/second (2560.0 MB in 1290000 clocks)

nun windows(2 build)
---------------------
Code:
C:\DundE\anh\Eigene Dateien\downloads>cmp_athlon.exe 
C strcpy:                4821.1 MB/second (2560.0 MB in 531 clocks) 
our strcpy1:             4196.7 MB/second (2560.0 MB in 610 clocks) 
our strcpy2:             3148.8 MB/second (2560.0 MB in 813 clocks) 
C memcpy:                2873.2 MB/second (2560.0 MB in 891 clocks) 
our memcpy:              1973.8 MB/second (2560.0 MB in 1297 clocks) 

C:\DundE\anh\Eigene Dateien\downloads>cmp_i686.exe 
C strcpy:                4812.0 MB/second (2560.0 MB in 532 clocks) 
our strcpy1:             2976.7 MB/second (2560.0 MB in 860 clocks) 
our strcpy2:             3091.8 MB/second (2560.0 MB in 828 clocks) 
C memcpy:                2825.6 MB/second (2560.0 MB in 906 clocks) 
our memcpy:              1949.7 MB/second (2560.0 MB in 1313 clocks)

nun noch krasser auf meinem i5 430m:

linux(2 builds):
-------------------
Code:
[dd@lappy Downloads]$ ./cmp_686 
C strcpy:                1113.0 MB/second (2560.0 MB in 2300000 clocks) 
our strcpy1:             1497.1 MB/second (2560.0 MB in 1710000 clocks) 
our strcpy2:             1523.8 MB/second (2560.0 MB in 1680000 clocks) 
C memcpy:                1630.6 MB/second (2560.0 MB in 1570000 clocks) 
our memcpy:              1207.5 MB/second (2560.0 MB in 2120000 clocks) 

[dd@lappy Downloads]$ ./cmp_core2 
C strcpy:                1075.6 MB/second (2560.0 MB in 2380000 clocks) 
our strcpy1:             1741.5 MB/second (2560.0 MB in 1470000 clocks) 
our strcpy2:             1706.7 MB/second (2560.0 MB in 1500000 clocks) 
C memcpy:                1600.0 MB/second (2560.0 MB in 1600000 clocks) 
our memcpy:              1213.3 MB/second (2560.0 MB in 2110000 clocks)

windows(2builds)
-------------------
Code:
C:\Users\Lappi\Desktop\xcp>cmp_686.exe 
C strcpy:                3731.8 MB/second (2560.0 MB in 686 clocks) 
our strcpy1:             3417.9 MB/second (2560.0 MB in 749 clocks) 
our strcpy2:             3417.9 MB/second (2560.0 MB in 749 clocks) 
C memcpy:                2562.6 MB/second (2560.0 MB in 999 clocks) 
our memcpy:              1823.4 MB/second (2560.0 MB in 1404 clocks) 

C:\Users\Lappi\Desktop\xcp>cmp_core2.exe 
C strcpy:                4238.4 MB/second (2560.0 MB in 604 clocks) 
our strcpy1:             3699.4 MB/second (2560.0 MB in 692 clocks) 
our strcpy2:             3459.5 MB/second (2560.0 MB in 740 clocks) 
C memcpy:                2552.3 MB/second (2560.0 MB in 1003 clocks) 
our memcpy:              2051.3 MB/second (2560.0 MB in 1248 clocks)

Compiliert wurde überall mit gcc 4.5.1 -O2 -s -march=i686 bzw core2/athlon

Wie kann man sich dieses Ergebniss erklären? Es kann doch nicht sein, das Linux so schlecht abschneidet ... das macht mich schon skeptisch gegenüber linux ...


hier noch der Code vom benchmark:
PHP:
/**** 
 * 
 * modified version from Preston L. Bannister 
 * 
 **/ 

#include <stdlib.h> 
#include <stdio.h> 
#include <time.h> 
#include <string.h> 


/******************************************************************************* 
 ******************************************************************************* 
 * configs 
 ******************************************************************************* 
 ******************************************************************************/ 
#define LOOPS 10000000 
static const char sOut1[] = "QBTnetfnh8TpTWvPzARBNWr2gMFofe3AzwMXVOGbdL2xOOACwMefrMxpxZ62qakW"; 
static const char sOut2[] = "ct6V7lZ42RoryDlvM1EzT54T5qV3DGUA4UIIhVv0TSK0lTx0TKIFc4E4YIdfjfKp"; 





/******************************************************************************* 
 ******************************************************************************* 
 * engine 
 ******************************************************************************* 
 ******************************************************************************/ 
unsigned int nLength = ::strlen(sOut1); 
unsigned int dtLoop = 0; 
unsigned int nTotal = 0; 
char sWork[256]; 
typedef void (*doit)(const char*, const char*); 

void report_times(const char* s, unsigned int dt) 
{ 
    double ts = (double)dt / CLOCKS_PER_SEC; 
    double mb = (double)(nTotal) / 1000000; 
    double rate = mb / ts; 
    printf("%s:\t\t %0.1f MB/second (%0.1f MB in %u clocks)\n", s, rate, mb, dt); 
} 

int time_function(doit fn) 
{ 
    clock_t t0 = ::clock(); 
    for (int i=0; i<LOOPS; ++i) { 
        const char* s1 = sOut1 + (15 & i); 
        const char* s2 = sOut2 + nLength - (15 & i); 
        (*fn)(s1,s2); 
    } 
    return (int)(::clock() - t0) - dtLoop; 
} 

void do_total(const char* s1,const char* s2) 
{ 
    nTotal += 4 * nLength; 
} 
/******************************************************************************* 
 * end engine 
 ******************************************************************************/ 





/******************************************************************************* 
 ******************************************************************************* 
 * benchmark 
 ******************************************************************************* 
 ******************************************************************************/ 
void do_c_strcpy(const char* s1,const char* s2) 
{ 
    ::strcpy(sWork,s1); 
    ::strcpy(sWork,s2); 
} 


void our_strcpy1(char* s1,const char* s2) 
{ 
    while (*s1++ = *s2++); 
} 

void our_strcpy2(char* s1,const char* s2) 
{ 
    register unsigned int i; 

    for (i = 0; s2[i] != 0; ++i) 
        s1[i] = s2[i]; 
    s1[i] = 0; 
} 

void do_our_strcpy1(const char* s1,const char* s2) 
{ 
    our_strcpy1(sWork,s1); 
    our_strcpy1(sWork,s2); 
} 
void do_our_strcpy2(const char* s1,const char* s2) 
{ 
    our_strcpy2(sWork,s1); 
    our_strcpy2(sWork,s2); 
} 

void do_c_memcpy(const char* s1, const char* s2) 
{ 
    int l1 = strlen(s1); 
    int l2 = strlen(s2); 
    ::memcpy(sWork, s1, l1); 
    ::memcpy(sWork, s2, l2); 
} 

void our_memcpy(char* dest, const char* src, int size) 
{ 
    for(int i=0; i<size; ++i) 
        dest[i] = src[i]; 
} 


void do_our_memcpy(const char* s1, const char* s2) 
{ 
    int l1 = strlen(s1); 
    int l2 = strlen(s2); 
    our_memcpy(sWork, s1, l1); 
    our_memcpy(sWork, s2, l2); 
} 

/******************************************************************************* 
 * end benchmark 
 ******************************************************************************/ 





/******************************************************************************* 
 ******************************************************************************* 
 * main programm 
 ******************************************************************************* 
 ******************************************************************************/ 
int main(int ac,char** av) 
{ 
    dtLoop = time_function(do_total); 

    report_times("C strcpy", time_function(do_c_strcpy)); 
    report_times("our strcpy1", time_function(do_our_strcpy1)); 
    report_times("our strcpy2", time_function(do_our_strcpy2)); 
    report_times("C memcpy", time_function(do_c_memcpy)); 
    report_times("our memcpy", time_function(do_our_memcpy)); 

    return 0; 
}

MfG
 
void report_times(const char* s, unsigned int dt)
{
double ts = (double)dt / CLOCKS_PER_SEC;
double mb = (double)(nTotal) / 1000000;
double rate = mb / ts;
printf("%s:\t\t %0.1f MB/second (%0.1f MB in %u clocks)\n", s, rate, mb, dt);
}

Du gibts hier nur die Clock-Ticks aus (dt). Je nachdem wie viele CLOCKS_PER_SEC vom System generiert werden, sind es mehr oder weniger. Deswegen wird ts auch mit dt/CLOCKS_PER_SEC berechnet. Wenn du also als Berechnungszeit ts an Stelle von dt ausgibst, lässt das schon genauere Rückschlüsse zu.
 

Ähnliche Themen

Xubuntu - AVR32-linux crosscompile sqlite

Displayport + externer Monitor zeigt bei startx nichts erst bei DVI

sem_open - Problem mit Semaphoren

Modulfehler?

Festplatte friert ein nach suspend/resume

Zurück
Oben