Bench result;
orignal:
-> FLOPS 3.00
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 27.768 sec
MFLOPS: 38.65
FLOPS 3.00
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 28.359 sec
MFLOPS: 37.84
soft-hard-float:
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 14.874 sec
MFLOPS: 72.15
FLOPS 3.00
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 14.249 sec
MFLOPS: 75.32
direct-hard-float:
-> FLOPS 3.00
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 13.021 sec
MFLOPS: 82.42
FLOPS 3.00
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 12.472 sec
MFLOPS: 86.05
FLOPS 3.00
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 11.803 sec
MFLOPS: 90.93
FLOPS 3.00
GCC version: 4.3.3
Ops count: 1073217024
Time spent: 11.945 sec
MFLOPS: 89.85
bench program:
```
#include <stdio.h>
#include <stdlib.h>
#ifdef __vxworks
#include <sys/resource.h>
#include <vxworks.h>
#include <timers.h>
#include <time.h>
#elif defined(_MSC_VER)
#include <Windows.h>
#include <time.h>
#else
#include <time.h>
#endif
/*
cl -O2 test_flops.c
gcc -O2 test_flops.c -o test_flops
*/
#ifndef DIM
#define DIM 1024
const long long int nop = 1073217024;
#else
#define COUNT
long long int nop = 0;
#endif
void printm(double A[DIM][DIM])
{
int i,j;
for (i=0; i<DIM; i++) {
for (j=0; j<DIM; j++)
printf("%6.3f", A[i][j]);
printf("\n");
}
}
void initm(double A[DIM][DIM])
{
int i,j;
srand(38741);
for (i = 0; i < DIM; i++)
for (j = 0; j < DIM; j++)
A[i][j] = (double)rand() / (double)RAND_MAX - 0.5;
}
void dge(double A[DIM][DIM])
{
int i, j, k;
double c;
for (k = 1; k < DIM; k++) {
for (i = k; i < DIM; i++) {
c = A[i][k-1] / A[k-1][k-1];
#ifdef COUNT
nop += 1;
#endif
for (j = 0; j < DIM; j++) {
A[i][j] -= c * A[k-1][j];
#ifdef COUNT
nop += 2;
#endif
}
}
}
}
double X[DIM][DIM];
/*
* return a timestamp with sub-second precision
* QueryPerformanceCounter and clock_gettime have an undefined starting point (null/zero)
* and can wrap around, i.e. be nulled again.
*/
double get_seconds()
{
#ifdef _MSC_VER
static LARGE_INTEGER frequency;
if (frequency.QuadPart == 0)
QueryPerformanceFrequency(&frequency);
LARGE_INTEGER now;
QueryPerformanceCounter(&now);
return (now.QuadPart * 1.0) / frequency.QuadPart;
#else
struct timespec now;
clock_gettime(CLOCK_REALTIME, &now);
return now.tv_sec + now.tv_nsec * 1e-9;
#endif
}
int main (int argc, char **argv)
{
double a = 1.0;
double b = 2.0;
double c = a + b;
double t;
int count = 1;
int i;
printf("FLOPS %.2lf\n", c);
#ifdef _MSC_VER
printf("MSC_VER version: %d\n", _MSC_VER);
#else
printf("GCC version: " __VERSION__ "\n");
#endif
initm(X);
t = get_seconds();
#ifndef __vxworks
if (argc > 1) {
sscanf(argv[1], "%d", &count);
}
#endif
for (i = 0; i < count; i += 1) {
dge(X);
}
t = get_seconds() - t;
printf("Ops count: %llu\n", nop * count);
printf("Time spent: %.3lf sec\n", t);
printf("MFLOPS: %.2f\n", 1e-6 * nop * count / t );
#ifdef PRINTM
printm(X);
#endif
return 0;
}
```