// This application tests the ANOVA code, independently of OOo // Just run the application and compare the F-statistic // from the outputted (last value) with that computed // using gnumeric // Compiled with MS VC 6.0 (1998) //#include "stdafx.h" #include #include "iostream" using namespace std; /////////////////// // We emulate some of the OOo routines and functions typedef unsigned int SCSIZE; void SetNoValue() { wcout << L"No Value"; return; } void SetIllegalParameter() { wcout << L"Illegal Parameter"; return; } void PushDouble(double i) { wcout << L"\nF-Statistic: " << i; return; } /////////////////////////////////// // More emulation class ScMatrixRef { private: const SCSIZE ic; const SCSIZE jc; public: ScMatrixRef(SCSIZE i, SCSIZE j): ic(i), jc(j) { return;} void GetDimensions(SCSIZE &iR, SCSIZE &jR) { iR = ic; jR = jc; return; } bool IsString(SCSIZE i, SCSIZE j) { return false; } bool IsMatrix() const {return true;} bool operator ! () const {return !IsMatrix();} double GetDouble(double i, double j) { i = (i > 1)?i:1.1; return j * (i-1)/(i+1); } }; ScMatrixRef* pMatrix; void GetMatrix(ScMatrixRef &MatrixClass) { pMatrix = &MatrixClass; return;} ScMatrixRef &GetMatrix() { return *pMatrix;} /////////////////////////////////// // New OOo CALC FUNCTIONS // // THIS IS THE IMPORTANT CODE void ScANOVAMultiple(SCSIZE iVarNr) { // WE GET MULTIPLE MATRICES, EACH MATRIX CODES ONE VARIABLE // iVarNr STORES ACTUALLY THE NUMBER OF VARIABLES SCSIZE *nVar = new unsigned int[iVarNr]; // THIS WILL STORE THE NUMBER OF DATA VALUES FOR EACH OF THE VARIABLES SCSIZE dfB = 0; // DEGREES OF FREEDOM SCSIZE dfE = 0; // DEGREES OF FREEDOM SCSIZE N = 0; // TOTAL NUMBER OF DATA VALUES SCSIZE nR, nC; // MATRIX DIMENSIONS SCSIZE iCount = 0; // INDEX POINTING TO CURRENT VARIABLE SCSIZE jCount = 0; // NUMBER OF VALUES FOR CURRENT VARIABLE double fSumM = 0.0; // THIS IS THE GRAND MEAN // WE NEED AN ARRAY TO STORE THE MEAN FOR EVERY GROUP (VARIABLE) double *fSumX = new double[iVarNr]; // THE MEANS FOR THE INDIVIDUAL VARS double fValX; // THE INDIVIDUAL VALUES for (iCount = 0; iCount < iVarNr; iCount++) { fSumX[iCount] = 0.0; // INITIALIZE THE SUM ScMatrixRef pMat = GetMatrix(); if (!pMat) { // NO DATA MATRIX - INVALID PARAMETERS SetIllegalParameter(); delete nVar; delete fSumX; return; } pMat.GetDimensions(nC, nR); for (SCSIZE i = 0; i < nC; i++) for (SCSIZE j = 0; j < nR; j++) { if (!pMat.IsString(i,j)) { fSumX[iCount] += pMat.GetDouble(/* i */iCount,j); // FOR TESTING PURPOSES jCount++; } } fSumM += fSumX[iCount]; fSumX[iCount] = fSumX[iCount] / jCount; // THIS IS THE GROUP MEAN nVar[iCount] = jCount; // STORE HOW MANY DATA VALUES EXIST FOR THIS VAR N += jCount; // ADD FOR GRAND TOTAL = TOTAL No OF DATA VALUES jCount = 0; // RESET jCount FOR NEXT VARIABLE } // END OUTER FOR LOOP dfB = iVarNr -1 ; // THESE ARE THE DEGREES OF FREEDOM dfE = N - iVarNr; fSumM = fSumM / N; // THIS IS THE GRAND MEAN double fMSB = 0.0; // THIS IS INTER-GROUP VARIANCE double fMSE = 0.0; // THIS IS INTRA-GROUP VARIANCE (DUE TO ERROR) for(iCount = 0; iCount < iVarNr; iCount++) { ScMatrixRef pMat = GetMatrix(); pMat.GetDimensions(nC, nR); for (SCSIZE i = 0; i < nC; i++) for (SCSIZE j = 0; j < nR; j++) { if (!pMat.IsString(i,j)) { fValX = pMat.GetDouble(/* i */ iCount,j); // FOR TESTING PURPOSES fMSE += (fValX - fSumX[iCount]) * (fValX - fSumX[iCount]); } } fMSB += nVar[iCount] * (fSumM - fSumX[iCount]) * (fSumM - fSumX[iCount]); } // END OUTER FOR LOOP fMSB = fMSB / dfB; fMSE = fMSE / dfE; PushDouble( fMSB/fMSE ); // TODO: // - WE STILL NEED TO INTERPRET fMSB/fMSE USING THE F STATISTICS // - THIS IS DONE USING: =FDIST(fMSB/fMSE; dfB; dfE) delete [] nVar; delete [] fSumX; } ///////////////////////////// void ScANOVAMono() { // WE GOT A SINGLE MATRIX WHERE EVERY COLUMN IS A SEPARATE VARIABLE // DISADVANTAGE: ONLY ONE COLUMN PER VARIABLE // BUT IT IS EASYER TO USE AND IT IS NOT LIMITED TO 30 VARIABLES ScMatrixRef pMat = GetMatrix(); if (!pMat) { // NO DATA MATRIX - INVALID PARAMETERS SetIllegalParameter(); return; } SCSIZE iVarNr /* nC */, nRMax; // WE HAVE ONLY ONE MATRIX // WE CONSIDER EVERY COLUMN AS A SEPARATE DATA SET pMat.GetDimensions(iVarNr /* nC */, nRMax); // iVarNr = nC; // nC IS NOT USED ANY FURTHER if( iVarNr == 1 ) { SetNoValue(); return; // ONLY ONE VARIABLE - ANOVA NOT POSSIBLE } SCSIZE *nR = new unsigned int[iVarNr]; // THIS WILL STORE THE NUMBER OF DATA VALUES FOR EACH VARIABLE SCSIZE dfB = 0; // DEGREES OF FREEDOM SCSIZE dfE = 0; // DEGREES OF FREEDOM SCSIZE N = 0; // TOTAL NUMBER OF DATA VALUES SCSIZE iCount = 0; // INDEX POINTING TO CURRENT VARIABLE SCSIZE jCount = 0; // NUMBER OF VALUES FOR CURRENT VARIABLE double fSumM = 0.0; // THIS IS THE GRAND MEAN // WE NEED AN ARRAY TO STORE THE MEAN FOR EVERY GROUP (VARIABLE) double *fSumX = new double[iVarNr]; // THE MEANS FOR THE INDIVIDUAL VARS double fValX; // THE INDIVIDUAL VALUES for (iCount = 0; iCount < iVarNr; iCount++) { fSumX[iCount] = 0.0; // INITIALIZE THE SUM for (SCSIZE j = 0; j < nRMax; j++) { if (!pMat.IsString(iCount,j)) { fSumX[iCount] += pMat.GetDouble(iCount,j); jCount++; } } fSumM += fSumX[iCount]; // GRAND TOTAL fSumX[iCount] = fSumX[iCount] / jCount; // THIS IS THE MEAN nR[iCount] = jCount; // STORE HOW MANY DATA VALUES WE HAVE N += jCount; // THIS IS THE TOTAL NUMBER OF VALUES jCount = 0; // RESET jCount FOR NEXT VARIABLE wcout << L"Partial Sum: " << fSumX[iCount] << L"\n"; // TEST } // END OUTER FOR LOOP dfB = iCount - 1; // CALCULATE THE DEGREES OF FREEDOM (df) dfE = N - iCount; fSumM = fSumM / N; // THIS IS THE GRAND MEAN double fMSB = 0.0; // THIS IS INTER-GROUP VARIANCE double fMSE = 0.0; // THIS IS INTRA-GROUP VARIANCE (DUE TO ERROR) for (iCount = 0; iCount < iVarNr; iCount++) { for (jCount = 0; jCount < nRMax; jCount++) { if (!pMat.IsString(iCount,jCount)) { fValX = pMat.GetDouble(iCount,jCount); fMSE += (fValX - fSumX[iCount]) * (fValX - fSumX[iCount]); } } fMSB += nR[iCount] * (fSumM - fSumX[iCount]) * (fSumM - fSumX[iCount]); wcout << L"\n"; } // END OUTER FOR LOOP fMSB = fMSB / dfB; fMSE = fMSE / dfE; PushDouble( fMSB/fMSE ); // TODO: // - WE STILL NEED TO INTERPRET fMSB/fMSE USING THE F STATISTICS // - THIS IS DONE USING: =FDIST(fMSB/fMSE; dfB; dfE) delete [] nR; delete [] fSumX; } ///////////////////////////// int main(int argc, char* argv[]) { // TEST THE FUNCTIONS ScMatrixRef MatrixClass(5,10); GetMatrix(MatrixClass); ScANOVAMono(); ScMatrixRef MatrixClassA(1,10); GetMatrix(MatrixClassA); ScANOVAMultiple(5); // {wprintf(L"\n\nHit ENTER to terminate the program ");_flushall();getchar();} // return 0; }