/* * Compile with: gcc -o rse rse.c * and run without any arguments * */ #include /* These functions have been borrowed from OpenOffice.org code */ int approxEqual(double a, double b) { if ( a == b ) return 1; double x = a - b; return ((x < 0.0 ? -x : x) < ((a < 0.0 ? -a : a) * (1.0 / (16777216.0 * 16777216.0))) ? 1 : 0); } double approxAdd(double a, double b) { if ( ((a < 0.0 && b > 0.0) || (b < 0.0 && a > 0.0)) && approxEqual( a, -b ) ) return 0.0; return a + b; } double approxSub(double a, double b) { if ( ((a < 0.0 && b < 0.0) || (a > 0.0 && b > 0.0)) && approxEqual( a, b ) ) return 0.0; return a - b; } /* The following main() function has three different sections, where the N - 1 variance is calculated three times using different methods. First duplicates the OpenOffice.org default behaviour. The second duplicates the Gnumeric behavior. The third is a fixed OpenOffice.org behaviour. The OpenOffice.org loss of precision happens, probably intentionally due to the approxAdd(), approxEqual(), approxSub() and similar functions which drop precision. The fixed OpenOffice.org behaviour generates results which are slightly different from Gnumeric results. This is due to the formula used. There is loss of precision in both the Gnumeric way and the OpenOffice way, due to the inherent nature of floating point architecture. They both produce slightly different results. Gnumeric uses the following formula to find the N - 1 variance: s^2 = (1 / (N - 1)) * Sum1..N((Xi - M)^2) where N is the count of the set, Xi is an element of the set, M is the mean of the set. Due to the stack based parsing nature of the OpenOffice.org code, it uses a different formula derived from the above formula, as it does not know the mean M in advance: s^2 = (1 / (N - 1)) * [Sum1..N(Xi^2) - {(Sum1..N(Xi))^2 / N}] */ int main(int argc, char *argv[]) { double values[2] = {30000, 30000.001}; double count = 2; double sum, sumsqr; double val, var, vartemp, mean; int i; /* Default OpenOffice.org behaviour */ sum = 0; sumsqr = 0; val = 0; for (i = 0; i < count; i++) { sum += values[i]; sumsqr += values[i] * values[i]; } val = approxSub(sumsqr, ((sum * sum) / count)); printf("Default OpenOffice.org behaviour\n"); printf("\t%20.10g\n\n", val); /* Gnumeric behaviour */ mean = sum / count; vartemp = 0; var = 0; for (i = 0; i < count; i++) { vartemp = (values[i] - mean); var += vartemp * vartemp; } var = var * (1.0 / (count - 1)); printf("Gnumeric behaviour\n"); printf("\t%20.10g\n\n", var); /* Fixed OpenOffice.org behaviour */ sum = 0; sumsqr = 0; val = 0; for (i = 0; i < count; i++) { sum += values[i]; sumsqr += values[i] * values[i]; } // printf("sumsquared: %30.15f\n", sumsqr); // printf("sum: %30.15f\n", sum); // printf("count: %30.15f\n", count); val = (sumsqr - ((sum * sum) / count)); printf("Fixed OpenOffice.org behaviour\n"); printf("\t%20.10g\n\n", val); return 0; }