MODULE BITCorrelation EXPORTS Main; (* Last edited on 1999-12-22 21:10:56 by hcgl *) (* Computes the covariances between elements of a bunch of real chains. *) (* Given a set of chains "p_k", all with the same number "N" of elements, this program prints the covariance and correlation matrices for pairs of elements. The covariance "cov[i,j]" of elements "i" and "j" is defined as the average of the products "(p_k[i]-m[i])*(p_k[j]-m[j])", over all chains "p_k", where "m[i]" is a user-provided average chain (all zero by default). The correlation "r[i,j]" between elements "i" and "j" is defined as "cov[i,j]/sqrt(cov[i,i]*cov[j,j])". It can be thought of as the co-sine of the angle between the vector "u", consisting of the "i"th elements of all chains "p_k", and the vector "v" consisting of the "j"th elements of all those chains. Due to memory and time limitations, it is usually necessary to consider only a subset of the elements present in the input chains. Thus the program considers only elements "p_k[iMin..iMax]" where "iMin" and "iMax" are specified by the user. The element indices are taken modulo "N"; thus if the input chains are Fourier transforms in the `smart' basis (see PZFourier.i3), one would usually want to specify "iMin = -iMax" in order to get all Fourier components with frequency up to iMax. *) IMPORT ParseParams, Process, Text, Wr, Fmt, FileRd; IMPORT OSError, Thread; IMPORT PZLRChain; FROM Math IMPORT sqrt; FROM Stdio IMPORT stderr, stdout; FROM PZTypes IMPORT LONG, LONGS, NAT, INT; <* FATAL Wr.Failure, Thread.Alerted, OSError.E *> TYPE Options = RECORD avgFile: TEXT; (* Input average file name (WITH extension) or "". *) iMin: INT; (* First sample to consider in each chain. *) iMax: INT; (* Last sample to consider in each chain. *) inFile: REF FileNames; (* Input file names (WITH extension). *) END; FileNames = ARRAY OF TEXT; Matrix = ARRAY OF LONGS; PROCEDURE Main() = BEGIN WITH o = GetOptions(), iMin = o.iMin, iMax = o.iMax, n = iMax - iMin + 1, nFiles = NUMBER(o.inFile^), p = NEW(REF PZLRChain.T, n)^, m = NEW(REF PZLRChain.T, n)^, cov = NEW(REF Matrix, n, n)^ DO (* Read averages *) IF Text.Empty(o.avgFile) THEN Wr.PutText(stderr, "assuming zero averages.\n"); FOR i := 0 TO n-1 DO m[i] := 0.0d0 END ELSE Wr.PutText(stderr, o.avgFile & " ..."); WITH rd = PZLRChain.Read(FileRd.Open(o.avgFile), headerOnly := FALSE), c = rd.c^ DO <* ASSERT NUMBER(c) >= o.iMax *> ExtractElements(c, iMin, iMax, m) END; Wr.PutText(stderr, "\n"); END; (* Initialize covariance matrix *) FOR i := 0 TO n-1 DO FOR j := 0 TO n-1 DO cov[i,j] := 0.0d0 END END; (* Read strings and accumulate products: *) FOR k := 0 TO nFiles-1 DO Wr.PutText(stderr, o.inFile[k] & " ..."); WITH rp = PZLRChain.Read(FileRd.Open(o.inFile[k]), headerOnly := FALSE) DO <* ASSERT NUMBER(rp.c^) >= n *> ExtractElements(rp.c^, iMin, iMax, p) END; FOR i := 0 TO n-1 DO p[i] := p[i] - m[i] END; FOR i := 0 TO n-1 DO FOR j := 0 TO n-1 DO WITH pipj = p[i]*p[j] DO cov[i,j] := cov[i,j] + pipj END END END; Wr.PutText(stderr, "\n"); END; (* Compute variances and covariances: *) FOR i := 0 TO n-1 DO FOR j := 0 TO n-1 DO WITH cij = cov[i,j] DO cij := cij/FLOAT(nFiles, LONG) END END END; (* Print results: *) PrintCovariances(cov, iMin, iMax); PrintCorrelations(cov, iMin, iMax); END END Main; PROCEDURE ExtractElements(READONLY p: PZLRChain.T; iMin, iMax: INT; VAR x: PZLRChain.T) = BEGIN WITH np = NUMBER(p), nx = NUMBER(x) DO <* ASSERT nx = iMax - iMin + 1 *> FOR j := iMin TO iMax DO x[j - iMin] := p[j MOD np] END END END ExtractElements; PROCEDURE PrintCovariances(READONLY cov: Matrix; iMin, iMax: INT) = VAR style: Fmt.Style; width, prec: NAT; BEGIN ChooseElementFormat(cov, style, width, prec); WITH n = NUMBER(cov) DO <* ASSERT NUMBER(cov[0]) = n *> PrintHeaders("Covariance matrix", iMin, iMax, width); (* Print matrix: *) FOR i := 0 TO n-1 DO Wr.PutText(stdout, Fmt.Pad(Fmt.Int(i + iMin), 3)); Wr.PutText(stdout, " "); FOR j := 0 TO n-1 DO WITH cij = cov[i,j] DO Wr.PutText(stdout, " "); Wr.PutText(stdout, Fmt.Pad(Fmt.LongReal(cij, style, prec), width)); END END; Wr.PutText(stdout, "\n"); END; END END PrintCovariances; PROCEDURE PrintCorrelations(READONLY cov: Matrix; iMin, iMax: INT) = CONST prec = 3; width = prec + 3; BEGIN WITH n = NUMBER(cov) DO <* ASSERT NUMBER(cov[0]) = n *> PrintHeaders("Correlation matrix", iMin, iMax, width); (* Print matrix: *) FOR i := 0 TO n-1 DO Wr.PutText(stdout, Fmt.Pad(Fmt.Int(i + iMin), 3)); Wr.PutText(stdout, " "); FOR j := 0 TO n-1 DO WITH rij = cov[i,j]/sqrt(cov[i,i]*cov[j,j] + 1.0d-100) DO Wr.PutText(stdout, " "); Wr.PutText(stdout, Fmt.Pad(Fmt.LongReal(rij, Fmt.Style.Fix, prec), width)); END END; Wr.PutText(stdout, "\n"); END; END END PrintCorrelations; PROCEDURE PrintHeaders(title: TEXT; iMin, iMax: INT; width: NAT) = BEGIN Wr.PutText(stdout, "\n\n" & title & "\n\n"); (* Print column headers *) Wr.PutText(stdout, " "); FOR j := iMin TO iMax DO Wr.PutText(stdout, " "); Wr.PutText(stdout, Fmt.Pad(Fmt.Int(j), width)); END; Wr.PutText(stdout, "\n"); (* Print dashes *) Wr.PutText(stdout, "--- "); FOR j := iMin TO iMax DO Wr.PutText(stdout, " "); Wr.PutText(stdout, Fmt.Pad("", width, '-')); END; Wr.PutText(stdout, "\n"); END PrintHeaders; PROCEDURE ChooseElementFormat( READONLY m: Matrix; VAR style: Fmt.Style; VAR width: NAT; VAR prec: NAT; ) = CONST sigDigits = 4; minWidth = 3; maxWidth = sigDigits+7; (* Allowing for sign, period, "d", and exponent *) maxPrec = maxWidth - 3; (* Allowing for sign, period, and one integer digit *) (* These values must be consistent with teh above: *) maxFixVal = 9999999999.0d0; (* = 10^maxWidth - 1 *) minFixVal = 0.0001000d0; (* = 10^-(maxPrec - sigDigits) *) minRound = 0.000000005d0; (* = 0.5 * 10^-maxPrec *) maxRound = 0.5d0; VAR maxVal: LONG := 0.0d0; BEGIN (* Find maximum entry: *) FOR i := 0 TO LAST(m) DO FOR j := 0 TO LAST(m[i]) DO maxVal := MAX(maxVal, ABS(m[i,j])) END; END; IF maxVal = 0.0d0 THEN style := Fmt.Style.Fix; width := minWidth; prec := 0; ELSIF maxVal < minFixVal OR maxVal + maxRound > maxFixVal THEN style := Fmt.Style.Sci; width := sigDigits + 7; prec := sigDigits - 1; ELSE style := Fmt.Style.Fix; prec := maxPrec; width := 3 + prec; WHILE maxVal/10.0d0 + minRound > minFixVal DO maxVal := maxVal/10.0d0; IF prec >= sigDigits THEN prec := prec - 1; width := width -1 ELSIF prec > 0 THEN prec := prec - 1 ELSE width := width + 1 END END; END END ChooseElementFormat; PROCEDURE GetOptions(): Options = VAR o: Options; BEGIN WITH pp = NEW(ParseParams.T).init(stderr) DO TRY pp.getKeyword("-iMin"); o.iMin := pp.getNextInt(FIRST(INT),LAST(INT)); pp.getKeyword("-iMax"); o.iMax := pp.getNextInt(o.iMin,o.iMin+30); IF pp.keywordPresent("-avgFile") THEN o.avgFile := pp.getNext() ELSE o.avgFile := "" END; pp.skipParsed(); WITH nFiles = NUMBER(pp.arg^) - pp.next DO IF nFiles = 0 THEN pp.error("no files specified") END; o.inFile := NEW(REF ARRAY OF TEXT, nFiles); FOR i := 0 TO nFiles-1 DO o.inFile[i] := pp.getNext() END END; pp.finish(); EXCEPT | ParseParams.Error => Wr.PutText(stderr, "Usage: BITCorrelation \\\n"); Wr.PutText(stderr, " -iMin INT -iMax INT \\\n"); Wr.PutText(stderr, " [ -avgFile FILE ] \\\n"); Wr.PutText(stderr, " FILE1 FILE2 ... FILEn\n"); Process.Exit(1); END; END; RETURN o END GetOptions; BEGIN Main() END BITCorrelation.