/* Plots graphs of symbol chain mismatch as a function of chain length */ /* Last edited on 2015-01-20 16:47:10 by stolfilocal */ #include #include #include #include #include #include #include #include #include TYPE MismatchOptions == pz_mismatch.Options; Options = struct ??? { char *input; /* Input candidate file (without extensions). */ unsigned maxCands; /* Max number of candidates to consider. */ char *chainDir; /* Directory where chains reside. */ char *chainPrefix; /* Input chain file name prefix. */ unsigned band; /* Nominal band width (used for chain file names). */ double step; /* Nominal sampling step. */ char *output; /* Output plot file name (without extensions). */ double perturb; /* Amount of perturbation to second chain. */ unsigned shift; /* Amount of shifting to apply to second chain. */ double maxShift; /* Max shift to try in alignment. */ MismatchOptions mp; /* Mismatch formula parameters. */ bool_t dontReverse; /* TRUE will not reverse the sense of chain 2. */ bool_t printMatches; /* TRUE to print matches. */ } ???; TYPE VTable == double_vec_t; int main(int argc, char **argv ) { { /* with */ ??? o = pz_get_options(int argc, char **argv); /* do */ { /* with */ ??? candData = pz_candidate_read(open_read(o.input & ".can", TRUE)); ??? cand = candData.c^; ??? nCands = (cand.ne); ??? chainUsed = pz_candidate_chains_used(cand)^; chAllData = pz_symbol_chain_read_all( o.chainPrefix, o.band, sel := chainUsed, extension := ".cvc"; dir := o.chainDir, header_only := FALSE ); ??? plotWr = open_write(o.output & ".plt", TRUE); ??? dch = pz_symbol_make_decode_table(chAllData.sigmaMin)^; ??? ech = pz_symbol_make_error_var_table(chAllData.sigmaMin)^; /* do */ affirm(chAllData.sigmaMin == chAllData.sigmaMax ); fprintf(stderr, Fmt.Int(nCands) & " candidates\n"); for (i = 0; i <= min(o.maxCands, nCands) - 1 ; i++){ fprintf(stderr, "candidate %s\n", Fmt.Int(i) ); ProcessCandidate(plotWr, cand[i], chAllData.chData^, dch, ech, o); }; }; } } /* Main */ void ProcessCandidate( FILE *plotWr, pz_candidate_t *c, ARRAY *OF pz_symbol_chain_read_data chain, VTable *dch, VTable *ech, Options *o ) VAR minAvgDist: REF double_vec_t; { { /* with */ ??? ka = c.seg[0].cvx; ??? ca = chain[ka].c^; ??? ia = c.seg[0].ini; ??? na = c.seg[0].ns; ??? fa = ia + na - 1; ??? kb = c.seg[1].cvx; ??? cb = chain[kb].c^; ??? ib = c.seg[1].ini; ??? nb = c.seg[1].ns; ??? fb = ib + nb - 1; ??? maxSteps = (na-1) + (nb-1); /* do */ pz_candidate_print(stderr, c); if (( na > 3 ) ANDAND ( nb > 3 )){ affirm(ia < fa ); affirm(ib < fb ); affirm(o.mp.maxDistSqr < (double.ne - 1) ); /* Compute best matches per length for all shifts: */ MatchSymbolicChains(o, ca, ia, fa, cb, ib, fb, dch = dch, ech = ech, /*OUT*/ minAvgDist = minAvgDist ); for ( nSteps = 2 TO maxSteps BY 2 ){ WritePlotEntry(plotWr, ka, ia, fa, kb, ib, fb, o.step, nSteps, minAvgDist[nSteps] ); }; WritePlotBreak(plotWr) }else{ fprintf(stderr, "too small, skipped\n");; }; } } /* ProcessCandidate */ void WritePlotEntry( FILE *plotWr, unsigned ka, unsigned ia, unsigned fa, unsigned kb, unsigned ib, unsigned fb, double step, unsigned nSteps, double minAvgDist ) { { /* with */ ??? length = step * ((double)nSteps) / 2.0e0; /* do */ fprintf(plotWr, Fmt.Pad(Fmt.Int(ka), 4, '0')); fprintf(plotWr, " "); fprintf(plotWr, Fmt.Pad(Fmt.Int(ia), 4)); fprintf(plotWr, " "); fprintf(plotWr, Fmt.Pad(Fmt.Int(fa), 4)); fprintf(plotWr, " "); fprintf(plotWr, Fmt.Pad(Fmt.Int(kb), 4, '0')); fprintf(plotWr, " "); fprintf(plotWr, Fmt.Pad(Fmt.Int(ib), 4)); fprintf(plotWr, " "); fprintf(plotWr, Fmt.Pad(Fmt.Int(fb), 4)); fprintf(plotWr, " "); fprintf(plotWr, FLR(length, 8,2)); fprintf(plotWr, " "); fprintf(plotWr, Fmt.Pad(Fmt.Int(nSteps), 8)); fprintf(plotWr, " "); fprintf(plotWr, FLR(minAvgDist, 8,6)); fprintf(plotWr, "\n");; }; fflush(plotWr); } /* WritePlotEntry */ void WritePlotBreak( FILE *plotWr ) { fprintf(plotWr, "\n"); } /* WritePlotBreak */ void MatchSymbolicChains( Options *o, pz_symbol_chain_t *ca, unsigned ia, unsigned fa, pz_symbol_chain_t *cb, unsigned ib, unsigned fb, VTable *dch, VTable *ech, REF *double_vec_t minAvgDist /* (OUT) */ ) VAR mismatch: double; matchedLength: double; length: double; mt: REF pz_match_t; rCost: REF pz_match_cost_matrix_t = NULL; { { /* with */ ??? na = fa - ia + 1; ??? a = pz_symbol_chain_cut(ca, ia, na)^; ??? ma = (a.ne); ??? nb = fb - ib + 1; ??? b = pz_symbol_chain_cut(cb, ib, nb)^; ??? mb = (b.ne); ??? ac = (na-1) DIV 2; ??? bc = (nb-1) DIV 2; ??? rnd = NEW(Random.Default).init(TRUE); ??? mtOld = pz_match_t{pz_match_pair{ac,bc}}; /* do */ affirm(na >= 2 ); affirm(nb >= 2 ); if (( NOT o.dontReverse )){ pz_symbol_chain_reverse_and_complement(b) ;}; if (( o.perturb != 0.0e0 )){ for (i = 0; i <= nb-1 ; i++){ { /* with */ ??? perturb = ROUND(o.perturb * rnd.longreal(-1.0e0, +1.0e0)); /* do */ b[i] = pz_symbol_from_int(pz_symbol_to_int(b[i]) + perturb); }; };; }; if (( o.shift != 0 )){ for (i = 0; i <= nb-1-o.shift ; i++){ b[i] = b[i+o.shift] ;}; for (i = nb-o.shift; i <= nb-1 ; i++){ b[i] = b[i-1] ;};; }; mt = NULL; if (( minAvgDist == NULL ) || ( (minAvgDist^.ne) < ma+mb-1 )){ minAvgDist = double_vec_new(ma+mb-1); }; pz_symbol_chain_match_refine( a, b, decode = dch, errorVar = ech, step = o.step, mtOld = mtOld, maxAdjust = CEILING(o.maxShift / o.step), maxDistSqr = o.mp.maxDistSqr, critDistSqr = o.mp.critDistSqr, skipDistSqr = o.mp.skipDistSqr, /*OUT*/ mt = mt, mismatch = mismatch, length = length, matchedLength = matchedLength, minAvgDisc = SUBARRAY(minAvgDist^, 0, ma+mb-1), /*WORK*/ rCost = rCost ); if (( o.printMatches )){ PrintSymbolicMatch(a, ia, fa, b, ib, fb, mt^, mismatch, length, matchedLength); };; }; } /* MatchSymbolicChains */ void PrintSymbolicMatch( pz_symbol_chain_t *a, unsigned ia, unsigned fa, pz_symbol_chain_t *b, unsigned ib, unsigned fb, pz_match_t *mt, double mismatch, double length, double matchedLength ) CHAR Chain( unsigned j, unsigned i ) { if (( j == 0 )){ return a[i] }else{ return b[i] ;} } /* Chain */ { { /* with */ ??? nm = (mt.ne); /* do */ fprintf(stderr, "\n"); /* Write pairing portions of given chains, unaligned: */ for (j = 0; j <= 1 ; j++){ { /* with */ ??? ig = ARRAY [0..1] OF unsigned{ia, ib}[j]; ??? fg = ARRAY [0..1] OF unsigned{fa, fb}[j]; ??? im = mt[0][j], fm = mt[nm-1][j]; ??? nLetters = fm - im + 1; /* do */ fprintf(stderr, "chain %s:", Fmt.Int(j) ); fprintf(stderr, " given [" & FI(ig,4) & ".." & FI(fg,4) & "]"); fprintf(stderr, " (%s)", FI(fg - ig, 4) ); fprintf(stderr, " matched [" & FI(im, 4) & ".." & FI(fm, 4) & "]"); fprintf(stderr, " (%s)", FI(fm - im, 4) ); fprintf(stderr, " "); for (r = 0; r <= nLetters-1 ; r++){ { /* with */ ??? ch = Chain(j, im + r); /* do */ fprintf(stderr, Fmt.Char(ch)); };; }; fprintf(stderr, "\n");; };; }; fprintf(stderr, "\n"); /* Write matched pairing portions of given chains, aligned: */ for (j = 0; j <= 1 ; j++){ for (r = 0; r <= nm-1 ; r++){ { /* with */ ??? ch = Chain(j, mt[r][j]); /* do */ fprintf(stderr, Fmt.Char(ch)); };; }; fprintf(stderr, "\n");; }; /* Write letter distances for each matched pair: */ for (r = 0; r <= nm-1 ; r++){ { /* with */ ??? cha = Chain(0, mt[r][0]); ??? chb = Chain(1, mt[r][1]); ??? d = ABS(pz_symbol_to_int(cha) - pz_symbol_to_int(chb)); /* do */ if (( d > 9 )){ Wr.PutChar(stderr, '*') }else{ Wr.PutChar(stderr, VAL(ORD('0') + d, CHAR)); }; };; }; fprintf(stderr, "\n"); /* Print statistics: */ fprintf(stderr, "mismatch == %s\n", FLR(mismatch, 8,4) ); fprintf(stderr, "length == %s\n", FLR(length, 8,4) ); fprintf(stderr, "matched == %s\n", FLR(matchedLength, 8,4) ); fprintf(stderr, "\n");; } } /* PrintSymbolicMatch */ Options pz_get_options(int argc, char **argv ) VAR o: Options; { argparser_t *pp = argparser_new(stderr, argc, argv); argparser_set_help(pp, PROG_NAME " version " PROG_VERS ", usage:\n" PROG_HELP); argparser_set_info(pp, PROG_INFO); argparser_process_help_info_options(pp); { /* with */ /* do */ TRY argparser_get_keyword(pp, "-input"); o.input = argparser_get_next(pp); if (( argparser_keyword_present(pp, "-maxCands") )){ o.maxCands = argparser_get_next_int(pp, 1, (unsigned.ne - 1)) }else{ o.maxCands = (unsigned.ne - 1); }; if (( argparser_keyword_present(pp, "-chainDir") )){ o.chainDir = argparser_get_next(pp) }else{ o.chainDir = "."; }; argparser_get_keyword(pp, "-chainPrefix"); o.chainPrefix = argparser_get_next(pp); if (( argparser_keyword_present(pp, "-perturb") )){ o.perturb = argparser_get_next_double(pp, 0.0e0); }else{ o.perturb = 0.0e0;; }; if (( argparser_keyword_present(pp, "-shift") )){ o.shift = argparser_get_next_int(pp, 0, 999); }else{ o.shift = 0; }; argparser_get_keyword(pp, "-output"); o.output = argparser_get_next(pp); argparser_get_keyword(pp, "-band"); o.band = argparser_get_next_int(pp); argparser_get_keyword(pp, "-step"); o.step = argparser_get_next_double(pp, 0.0e0); o.dontReverse = argparser_keyword_present(pp, "-dontReverse"); o.mp = pz_mismatch.ParseOptions(pp); argparser_get_keyword(pp, "-maxShift"); o.maxShift = argparser_get_next_double(pp, 0.0e0); o.printMatches = argparser_keyword_present(pp, "-printMatches"); argparser_finish(pp); EXCEPT | ParseParams.Error ==> fprintf(stderr, "Usage: pz_plot_match_cost \\\n"); fprintf(stderr, " -input FNAME \\\n"); fprintf(stderr, " -output FNAME \\\n"); fprintf(stderr, " [ -chainDir DIR ] -chainPrefix NAME \\\n"); fprintf(stderr, " -band unsigned \\\n"); fprintf(stderr, " -maxShift NSTEPS \\\n"); fprintf(stderr, " %s \\\n", pz_mismatch.OptionsHelp ); fprintf(stderr, " [ -perturb DIST ] [ -shift NUM ] \\\n"); fprintf(stderr, " [ -printMatches ] \n"); Process.Exit(1);; };; }; return o; } GetOptions; <*UNUSED); void DebugInt( char *text, int x, int y, int z ) { fprintf(stderr, text); fprintf(stderr, " " & Fmt.Int(x) & " " & Fmt.Int(y)& " " & Fmt.Int(z)); fprintf(stderr, "\n"); } /* DebugInt */ char *FLR( double x, unsigned w, unsigned p ) { return Fmt.Pad(Fmt.LongReal(x, Fmt.Style.Fix, prec = p), w) } /* FLR */ char *FI( int x, unsigned w ) { return Fmt.Pad(Fmt.Int(x), w) } /* FI */ { /* Copyright © 2001 Universidade Estadual de Campinas (UNICAMP). Authors: Helena C. G. Leitão and Jorge Stolfi. This file can be freely distributed, used, and modified, provided that this copyright and authorship notice is preserved, and that any modified versions are clearly marked as such. This software has NO WARRANTY of correctness or applicability for any purpose. Neither the authors nor their employers chall be held responsible for any losses or damages that may result from its use. */