1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
// Copyright 2005 Nanorex, Inc. See LICENSE file for details.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <sys/vtimes.h>
#include "interp_sqrt.c"
static inline double
extendSqrt(double x)
{
// this adds 4 nanoseconds
// extend range to [0.01,10000]
if (x < 1.0) {
return 0.1 * interpolator_sqrt(100.0 * x);
} else if (x > 100.0) {
return 10.0 * interpolator_sqrt(0.01 * x);
} else {
return interpolator_sqrt(x);
}
}
#define TIMEDIFF(late,early) ((late).tv_sec - (early).tv_sec + \
1.0e-9 * ((late).tv_nsec - (early).tv_nsec))
#define ELAPSED_NANOSECONDS() (1.e9 * TIMEDIFF(ts2, ts1) / N)
struct timespec ts1, ts2, before, after;
volatile double x, xsq, y;
#define THOUSAND 1000
#define MILLION (THOUSAND * THOUSAND)
#define N (100 * MILLION)
//#define N (100 * THOUSAND)
/*
* 0.6 to 0.7 nanoseconds for a floating-point comparison, 14
* nanoseconds for library sqrt
*
* 4 nanoseconds for just the polynomial
*
* It takes four comparisons to get down to the polynomial, so I'd
* assume it would take 4*0.7 + 4 nsecs = ~7 nsec for the lookup sqrt,
* but instead it takes 11 or 12 nsecs. Why?
*
* The library sqrt takes 14 nsecs.
*/
int main(int argc, char **argv)
{
int i;
double tbaseline;
x = 25.0;
xsq = x * x;
/* Reference: a loop that just does an assignment */
clock_gettime(CLOCK_REALTIME, &before);
clock_gettime(CLOCK_REALTIME, &ts1);
for (i = 0; i < N; i++) {
y = x;
}
clock_gettime(CLOCK_REALTIME, &ts2);
tbaseline = ELAPSED_NANOSECONDS();
/* Floating-point comparisons take 1.8 nsecs */
clock_gettime(CLOCK_REALTIME, &ts1);
for (i = 0; i < N; i++) {
if (x > M_PI)
y = 0.0;
else
y = 1.0;
}
clock_gettime(CLOCK_REALTIME, &ts2);
printf("%f nanoseconds for float comparison\n",
ELAPSED_NANOSECONDS() - tbaseline);
/* The library sqrt() function takes 14 nsecs */
clock_gettime(CLOCK_REALTIME, &ts1);
for (i = 0; i < N; i++) {
y = sqrt(x);
}
clock_gettime(CLOCK_REALTIME, &ts2);
printf("%f nanoseconds for library sqrt\n",
ELAPSED_NANOSECONDS() - tbaseline);
printf("sqrt(%f) = %f\n", x, y);
/* Tinker with the lookup-table sqrt until it's fast */
clock_gettime(CLOCK_REALTIME, &ts1);
for (i = 0; i < N; i++) {
//y = extendSqrt(xsq);
y = interpolator_sqrt(xsq);
}
clock_gettime(CLOCK_REALTIME, &ts2);
printf("%f nanoseconds for lookup-table sqrt\n",
ELAPSED_NANOSECONDS() - tbaseline);
clock_gettime(CLOCK_REALTIME, &after);
printf("sqrtlut(%f) = %f\n", x, y);
printf("%f seconds for the whole thing\n", TIMEDIFF(after, before));
return 0;
}
|