/* Generate random candidates from a set of contours. */
/* Last edited on 2015-01-20 16:55:48 by stolfilocal */

/*
  Generates a set of random candidates from a given set
  of segments (or their pieces).

  Each candidate will consist of two segments of approximately
  the same length, on two distinct contours, the second one being
  implicitly reversed.
  
  The length of the two segments is restricted to a range
  specified by the user.  (Actually, the specified lengths are 
  automatically reduced to account corner blurring.)

  IMPORTANT: The input segments should preferably not include
  pieces of straight segments, or of blurred corners.
  Check the parameters of "pz_map_segs".
*/
#include <stdio.h>
#include <string.h>

#include <argparser.h>
#include <jsrandom.h>

#include <pz_symbol_chain.h>
#include <pz_symbol.h>
#include <pz_proc.h>
#include <pz_candidate.h>
#include <pz_segment.h>
#include <pz_types.h>


TYPE

  Options = struct ??? {
    char *segments;        /* Filename of given segments. */
    char *chainDir;        /* Directory where to find chain data. */
    char *chainPrefix;     /* Invariant chain file name prefix. */
    unsigned nChains;          /* Number of chains. */
    char *output;          /* Output candidate file name. */
    unsigned band;             /* Filtering "band" (wavelength parameter). */
    double step;            /* Sampling step. */
    unsigned maxCands;         /* Max number of candidates to generate */
    double minLength;       /* Min length of matched segments. */
    double maxLength;       /* Max length of matched segments. */
    double blurFactor;      /* Corner broadening factor. */
    unsigned printCands; /* Max candidates to print. */
  } ???;

TYPE
  Segments == pz_segment_vec_t;
  SegmentPair == ARRAY [0..1] OF pz_segment_t;
  SegData == pz_segment_read_data;
  
  SegmentPairs == ARRAY OF SegmentPair;

  Candidates == pz_candidate.List;

  Chains == ARRAY OF pz_symbol_chain_read_data;
  
  VTable == double_vec_t;


int main(int argc, char **argv 
  )
  {

    { /* with */
      ??? o = pz_get_options(int argc, char **argv);
      ??? band = o.band;
      ??? lambda = ((double)band);
      ??? blurAmount = o.blurFactor * lambda;
      ??? minLength = EnsurePositive(o.minLength - 2.0e0 * blurAmount);
      ??? maxLength = EnsurePositive(o.maxLength - 2.0e0 * blurAmount);
      ??? segData = ReadSegments(o.segments);
      ??? chAllData = ReadChains(o.chainDir, o.chainPrefix, o.nChains, band);
      ??? sigma = GetSigma(chAllData);
      cand = GenerateRandomCandidates(
        seg := segData.s^;
        ch := chAllData.chData^;
        sigma := sigma;
        step := o.step;
        minLength := minLength;
        maxLength := maxLength;
        maxCands := o.maxCands
      )^;
      /* do */

      { /* with */
        ??? wr = open_write(o.output & ".can", TRUE);
        /* do */
        pz_candidate_write(wr, CandComment(o, segData.cmt, chAllData.cmt), cand, lambda);
      };
      PrintCandidates(cand, o.printCands);;
    };
  } /* Main */


double  EnsurePositive(
    double length 
  )
  {
    if (( length < 0.0e0 )){
      fprintf(stderr, "warning: requested candidate length too small for this scale\n");;
    };
    return max(1.0e0, length)
  } /* EnsurePositive */
  

double  GetSigma(
    pz_symbol_chain_read_all_data *chAllData 
  )
  {
    if (( chAllData.sigmaMax > 0.0e0 )){ 
      affirm(chAllData.sigmaMax == chAllData.sigmaMin );
      return chAllData.sigmaMax
    }else{
      /* No chains, return any sensible value: */
      return 1.0e0;
    }
  } /* GetSigma */


Options  pz_get_options(int argc, char **argv 
  )
  VAR o: Options;
  {
    argparser_t *pp = argparser_new(stderr, argc, argv);
    argparser_set_help(pp, PROG_NAME " version " PROG_VERS ", usage:\n" PROG_HELP);
    argparser_set_info(pp, PROG_INFO);
    argparser_process_help_info_options(pp);
    

    { /* with */
      /* do */
      TRY
        argparser_get_keyword(pp, "-segments");
        o.segments = argparser_get_next(pp);
        
        argparser_get_keyword(pp, "-chainPrefix");
        o.chainPrefix = argparser_get_next(pp);
        if (( argparser_keyword_present(pp, "-chainDir") )){
          o.chainDir = argparser_get_next(pp)
        }else{
          o.chainDir = ".";
        };

        argparser_get_keyword(pp, "-nChains");
        o.nChains = argparser_get_next_int(pp);

        argparser_get_keyword(pp, "-band");
        o.band = argparser_get_next_int(pp);

        argparser_get_keyword(pp, "-step");
        o.step = argparser_get_next_double(pp, 0.0e0);

        argparser_get_keyword(pp, "-output");
        o.output = argparser_get_next(pp);

        argparser_get_keyword(pp, "-minLength");
        o.minLength = argparser_get_next_double(pp, 0.0e0);

        argparser_get_keyword(pp, "-maxLength");
        o.maxLength = argparser_get_next_double(pp, o.minLength);

        argparser_get_keyword(pp, "-blurFactor");
        o.blurFactor = argparser_get_next_double(pp, 0.0e0);

        argparser_get_keyword(pp, "-maxCands");
        o.maxCands = argparser_get_next_int(pp, 1);

        if (( argparser_keyword_present(pp, "-printCands") )){
          o.printCands = argparser_get_next_int(pp, 1)
        }else{
          o.printCands = 1000;
        };

        argparser_finish(pp);
      EXCEPT
      | ParseParams.Error ==>
          fprintf(stderr, "Usage: pz_get_random_cands \\\n");
          fprintf(stderr, "  -segments FILE \\\n");
          fprintf(stderr, "  [ -chainDir DIR ] -chainPrefix NAME \\\n");
          fprintf(stderr, "  -nChains NUMBER \\\n");
          fprintf(stderr, "  -band NUMBER -step NUMBER \\\n");
          fprintf(stderr, "  -output NAME \\\n");
          fprintf(stderr, "  -minLength NUMBER  -maxlength NUMBER \\\n");
          fprintf(stderr, "  -blurFactor NUMBER \\\n");
          fprintf(stderr, "  -maxCands NUMBER \\\n");
          fprintf(stderr, "  [ -printCands NUMBER ]\n");
        Process.Exit(1);;
      };;
    };
    return o
  } /* GetOptions */


Candidates *GenerateRandomCandidates(
    Segments *seg   /* Segments to sample & match. */
    Chains *ch,     /* Chain data. */
    double sigma,             /* Encoding parameter for "ch". */
    double step,              /* Sampling step. */
    double minLength,         /* Min length of candidate. */
    double maxLength,         /* Max length of candidate. */
    unsigned maxCands, /* Max number of candidates desired. */
  )
  {

    { /* with */
      ??? rnd = NEW(Random.Default).init(fixed := TRUE);
      ??? dch = pz_symbol_make_decode_table(sigma)^;
      ??? ech = pz_symbol_make_error_var_table(sigma)^;
      ??? minSamples = CEILING(minLength/step) + 1;
      ??? maxSamples = MAX(minSamples, FLOOR(maxLength/step) + 1);
      ??? sp = EnumSegmentPairs(rnd, seg, minSamples, maxSamples)^;
      ??? nCands = MIN(maxCands, (sp.ne));
      ??? rCand = NEW(REF Candidates, nCands), cand = rCand^;
      /* do */
      for (i = 0; i <= nCands-1 ; i++){
        cand[i] = PickCandidateInSegmentPair(
          rnd, sp[i],
          ch, dch, ech,
          minSamples, maxSamples, step
        );
      };
      pz_candidate_sort(cand, pz_candidate_lexically_better);
      return rCand;
    }
  } /* GenerateRandomCandidates */


SegmentPairs *EnumSegmentPairs(
    Random.T rnd            /* Fountain of surprises. */
    Segments *seg,  /* Segments to pick. */
    unsigned minSamples,         /* Min number of steps per candidate. */
    unsigned maxSamples, /* Max number of steps per candidate. */
  )
  /*
    Returns a list of all candidates (actually, segment pairs) that
    can be formed from the segments "seg" that have "nSteps" or more
    steps on both sides.  Each qualifying candidate is chopped into
    half-overlapping pieces with "maxSteps" steps, and the list is
    then scrambled. */
  {

    { /* with */
      ??? avgSamples = (minSamples + maxSamples) DIV 2;
      /* do */
      fprintf(stderr, "generating list of all basic cands...\n");

      { /* with */ 
        ??? sp = EnumPairsFromSegments(seg, minSamples := avgSamples)^;
        ??? rsc = ChopSegmentPairs(sp, maxSamples);
        ??? sc = rsc^;
        /* do */
        RandomizeSegmentPairs(rnd, sc);
        return rsc;
      };
    };
  } /* EnumSegmentPairs */
  

SegmentPairs *EnumPairsFromSegments(
    Segments *seg    /* Segments to pick. */
    unsigned minSamples, /* Min samples in candidate */
  )
  /*
    Selects all segments from "seg" that have at least "minSamples"
    samples, and returns a list of all pairs from the selected set 
    that belong to different chains.  The two segments are returned
    with the same direction ("rev == FALSE"). */
  VAR r: REF SegmentPairs := NIL;
      nPairs: unsigned = 0;
  {
    for (i = 0; i < (seg.ne ) ; i++){

      { /* with */
        ??? si = seg[i];
        /* do */
        if (( si.ns >= minSamples )){
          for (j = 0; j <= i-1 ; j++){

            { /* with */
              ??? sj = seg[j];
              /* do */
              if (( sj.ns >= minSamples ) ANDAND ( si.cvx != sj.cvx )){
                if (( r == NULL ) || ( nPairs >= (r^.ne) )){

                  { /* with */
                    ??? s = NEW(REF SegmentPairs, 2*nPairs + 100);
                    /* do */
                    if (( r != NULL )){ SUBARRAY(s^,0,nPairs) = r^ ;};
                    r = s;
                  };
                };

                { /* with */
                  ??? p = r^[nPairs];
                  /* do */
                  p = SegmentPair{si, sj};
                  p[0].rev = FALSE;
                  p[1].rev = FALSE;;
                };
                nPairs++;
              };
            };
          };
        };
      };
    };
    fprintf(stderr, "found %s segment pairs of sufficient size\n", FI(nPairs,1) );

    { /* with */
      ??? s = NEW(REF SegmentPairs, nPairs);
      /* do */
      if (( r != NULL )){ s^ = SUBARRAY(r^,0,nPairs) ;};
      return s;
    }
  } /* EnumPairsFromSegments */


SegmentPairs *ChopSegmentPairs(
    SegmentPairs *sp, 
    unsigned nSamples 
  )
  /*
    
    Breaks each pair from the list "sp" into half-overlapping pieces of
    size "nSamples".
    
    Ideally, should break each segment of the pair into as many pieces
    as would fit, and pair the pieces in all possible ways.  However
    this may generate far too many segments, and would allow the same
    segment to appear several times in the output.  Moreover, the
    probability of an original candidate to be sampled would be
    proportional to the product of its segment's lengths, which may
    not be a good idea. Thus instead we break the two segments into
    the same number of pieces, and pair them sequentially. */
  
  VAR 
    skip: ARRAY [0..1] OF unsigned;
    base:  ARRAY [0..1] OF unsigned;
    r: REF SegmentPairs = NULL;
    nPairs: unsigned = 0;
  {
    fprintf(stderr, "chopping candidates into pieces with %s samples...\n", FI(nSamples,1) );
    for (i = 0; i < (sp.ne ) ; i++){

      { /* with */ 
        ??? sBig = sp[i];
        ??? minSkip = (nSamples + 1) DIV 2;
        ??? candSamples = MIN(sBig[0].ns, sBig[1].ns);
        /* do */
        if (( candSamples >= nSamples )){

          { /* with */
            ??? nPieces = (candSamples - nSamples) DIV minSkip + 1;
            /* do */
            affirm(nPieces >= 1 );
            for (j = 0; j <= 1 ; j++){

              { /* with */
                ??? sCj = sChop[j], sBj = sBig[j];
                /* do */
                sCj = sBj;
                skip[j] = (sBj.ns - nSamples) DIV max(1, nPieces-1);
                sCj.ns = nSamples;
                base[j] = (sBj.ns - sCj.ns - (nPieces-1)*skip[j]) DIV 2;;
              };
              affirm(nPieces == 1 ) || ( skip[j] >= minSkip );;
            };
            for (k = 0; k <= nPieces-1 ; k++){
              for (j = 0; j <= 1 ; j++){

                { /* with */
                  ??? sCj = sChop[j], sBj = sBig[j];
                  /* do */
                  sCj.ini = sBj.ini + base[j] + k*skip[j];
                  affirm(sCj.ini >= sBj.ini );
                  affirm(sCj.ini + sCj.ns <= sBj.ini + sBj.ns );;
                };
              };
              if (( r == NULL ) || ( nPairs == (r^.ne) )){

                { /* with */
                  ??? s = NEW(REF SegmentPairs, 2*nPairs+100);
                  /* do */
                  if (( r != NULL )){ SUBARRAY(s^, 0, (r^.ne)) = r^ ;};
                  r = s;
                };
              };
              r[nPairs] = sChop; nPairs++;
            };
          };
        };
      };
    };
    fprintf(stderr, "chopped them into %s pieces\n", FI(nPairs,1) );

    { /* with */
      ??? s = NEW(REF SegmentPairs, nPairs);
      /* do */
      if (( r != NULL )){ s^ = SUBARRAY(r^,0,nPairs) ;};
      return s;
    }
  } /* ChopSegmentPairs */
  

void RandomizeSegmentPairs(
    Random.T rnd, 
    SegmentPairs *sp 
  )
  {
    fprintf(stderr, "randomizing list...\n");
    for (i = 0; i < (sp.ne ) - 1 ; i++){

      { /* with */
        ??? j = rnd.integer(i, (sp.ne - 1));
        /* do */
        if (( i != j )){
          VAR t := sp[i]; BEGIN sp[i] := sp[j]; sp[j] := t END;
        };
      };
    }
  } /* RandomizeSegmentPairs */


pz_candidate_t  PickCandidateInSegmentPair(
    Random.T rnd,
    SegmentPair *sBig  /* Parent segments. */
    Chains *ch,        /* Chain table. */
    VTable *dch,       /* Curvature decoding table. */
    VTable *ech,       /* Quantization noise variances. */
    unsigned minSamples,            /* Minimum samples in candidate. */
    unsigned maxSamples,            /* Maximum samples in candidate. */
    double step, /* Sampling step. */
  )
  VAR sCut: SegmentPair;
      d, n: unsigned;
  {
    affirm(sBig[0].ns >= minSamples ) ANDAND ( sBig[1].ns >= minSamples );
    for (j = 0; j <= 1 ; j++){
      sCut[j] = sBig[j];

      { /* with */
        ??? sj = sCut[j];
        ??? u = minSamples;
        ??? v = MIN(sj.ns, maxSamples);
        ??? a = sj.ns - v;
        ??? b = sj.ns - u;
        ??? c = a + b;
        /* do */
        affirm(c >= 0 );
        affirm(u <= v );
        d = rnd.integer(0, c);
        n = rnd.integer(u, v);
        if (( d + n > sj.ns )){ 
          d = c - d; 
          n = u + (v - n);
        };
        affirm(d + n <= sj.ns );
        affirm(n >= minSamples );
        affirm(n <= maxSamples );
        sj.ini = sj.ini + d;
        sj.ns = n;
        sj.rev = (j == 1);
      };
    };

    { /* with */
      avgDistSqr = AverageDistSqr(
        ch[sCut[0].cvx].c^, sCut[0];
        ch[sCut[1].cvx].c^, sCut[1];
        dch, ech
      );
      ??? length = 0.5e0 * step * ((double)sCut[0].ns + sCut[1].ns - 2);
      /* do */
      return pz_candidate_t {
        seg = sCut,
        mismatch = avgDistSqr,
        length = length,
        matchedLength = length,
        pm = NULL
      };
    }
  } /* PickCandidateInSegmentPair */


double  AverageDistSqr(
    pz_symbol_chain_t *a,
    pz_segment_t *sa,
    pz_symbol_chain_t *b,
    pz_segment_t *sb,
    VTable *dch   /* Curvature decoding table. */
    VTable *ech, /* Quantization noise variances. */
  )
  /*
    Computes the average distance squared between segments "sa" and "sb"
    of the chains "a" and "b", by the trapezoid rule.  Assumes the most
    perfect match possible (Bresenham's path in the pairing grid).
  */
  VAR sum: double := 0.0e0;
      h: double;
  {

    { /* with */
      ??? nPairs = MAX(sa.ns, sb.ns);
      ??? fp = ((double)nPairs-1);

      ??? ca = pz_symbol_chain_extract_segment(a, sa)^;
      ??? fa = ((double)sa.ns-1);

      ??? cb = pz_symbol_chain_extract_segment(b, sb)^;
      ??? fb = ((double)sb.ns-1);
      /* do */
      if (( nPairs == 1 )){ 
        return pz_symbol_dist_sqr(
          ca[0], cb[0], 
          complement = FALSE, decode = dch, errorVar = ech
        )
      }else{
        for (k = 0; k <= nPairs-1 ; k++){
          if (( k == 0 ) || ( k == nPairs-1 )){ h = 0.5e0 }else{ h = 1.0e0 ;};

          { /* with */
            ??? fk = ((double)k);
            ??? ia = ROUND(fk*fa/fp);
            ??? ib = ROUND(fk*fb/fp);
            /* do */
            sum = sum + h * pz_symbol_dist_sqr(
              ca[ia], cb[ib], 
              complement = FALSE, decode = dch, errorVar = ech
            );
          };
        };
        return sum/fp;
      };
    }
  } /* AverageDistSqr */


void PrintCandidates (
    Candidates *cand,
    unsigned maxCands 
  )
  {
    if (( maxCands == 0 )){ return ;};
    fprintf(stderr, "\n");
    for (i = 0; i <= min(maxCands, (cand.ne))-1 ; i++){

      { /* with */
        ??? c = cand[i];
        /* do */
        fprintf(stderr, "cand[%s]\n", Fmt.Int(i) );
        pz_candidate_print(stderr, c, pairing = FALSE);
      };
    };
    fflush(stderr);
  } /* PrintCandidates */


pz_symbol_chain_read_all_data  ReadChains(
    char *chainDir,
    char *chainPrefix,
    unsigned nChains,
    unsigned band 
  )
  {
    affirm(NOT Text.Empty(chainPrefix) );

    { /* with */ 
      ??? sel = pz_proc.SelectAll(nChains)^;
      /* do */
      return pz_symbol_chain_read_all(
        prefix = chainPrefix, band = band, extension = ".cvc",
        sel = sel, dir = chainDir, header_only = FALSE
      ) ;
    }
  } /* ReadChains */


SegData  ReadSegments(
    char *segName 
  )
  {
    affirm(NOT Text.Empty(segName) );

    { /* with */
      ??? fileName = txtcat(segName , ".seg");
      /* do */
      fprintf(stderr, fileName & "\n");

      { /* with */
        ??? rd = open_read(fileName, TRUE);
        ??? sData = pz_segment_read(rd);
        /* do */
        return sData;
      };
    }
  } /* ReadSegments */


char *CandComment(
    Options *o, 
    char *segCmt,
    char *chCmt 
  )
  {
    return
      "PZGetRandomCandidates\n" &
      "  segments " & o.segments & "\n" &
      "  chainDir " & o.chainDir & "\n" &
      "  chainPrefix " & o.chainPrefix & "\n" &
      "  nChains == " & Fmt.Int(o.nChains) & "\n" &
      "  band == " & Fmt.Int(o.band) & "\n" &
      "  step == " & Fmt.LongReal(o.step) & "\n" &
      "  maxCands == " & Fmt.Int(o.maxCands) & "\n" &
      "  minLength == " & Fmt.LongReal(o.minLength) & "\n" &
      "  maxLength == " & Fmt.LongReal(o.maxLength) & "\n" &
      "  blurFactor == " & Fmt.LongReal(o.blurFactor) & "\n" &
      "  \n" &
      "  Segments:\n" & segCmt &
      "  Chains:\n" & chCmt &
      "  \n"
  } /* CandComment */


char *FI(
    unsigned x, 
    unsigned w 
  )
  {
    return Fmt.Pad(Fmt.Int(x), w)
  } /* FI */

{

/*
  Copyright � 2001 Universidade Estadual de Campinas (UNICAMP).
  Authors: Helena C. G. Leit�o and Jorge Stolfi.
  
  This file can be freely distributed, used, and modified, provided
  that this copyright and authorship notice is preserved, and that any
  modified versions are clearly marked as such.
  
  This software has NO WARRANTY of correctness or applicability for
  any purpose. Neither the authors nor their employers chall be held
  responsible for any losses or damages that may result from its use.
*/