#! /usr/bin/gawk -f
# Last edited on 2015-02-27 23:51:56 by stolfilocal

# Reads from {stdin} "data" file with merged price series from various exchanges.
# Outputs a file with currency conversion factors to use for ech exchange.
#
# The "data" file must have lines in the format "{DATE} {TIME} |
# {VBTC[kd,kf]} | {VCUR[kd,kf]} | {PMD[kd,kf]}" where {TIME} is always "00:00:00",
# and the fields "| {VBTC[kd,kf]} | {VCUR[kd,kf]} | {PMD[kd,kf]}" are repeated
# for each exchange. The field {PMD[kd,kf]} is an average price in 
# a neighborhood of each date, computed with some smoothing window;
# it needs to be nonzero only when the volumes are nonzero.
#
# The program first computes a smmothed reference USD price 
# {PRF[kd]} for each line that is the {PMD[kd]} of "MGOX" at first,
# transitioning smoothly to that "BSTP".
#
# Then it computes the currency conversion factor for each echange
# and day as the {PMD[kd]} of that exchange divided by the {PRF[kd]}.
#
# The user must define (with {export}) the environment variable {TZ="UTC"}
#
# The user must define (with "-v") the program variables
#
#   {indexFile}, the name of the index file (that specifies the exchanges in the join file).
# 
# See {join_smoothed_price_files.gawk} for the format of the index file.
#
# Writes to standard output a single file, with one line per day in the format
# "{DATE} {TIME} | {PRF[kd]} | {CCF[kd,kf]}" where {PRF[kd]} is the reference price
# for the {DATE}, the fields "| {CCF[kd]}" is repeated for each exchange,
# and:
#
#    {DATE} spans the union of the date ranges in the input files;
#    {TIME} is always "00:00:00";
#    {PRF[kd]} is a smoothed reference price used to compute the currency conversion
#       factors for {DATE};
#    {CCF[kd,kf]} is the estimated currency conversion factor for exchange number {kf};
#       around the {DATE}.

BEGIN \
  { 
    abort = -1;
  
    if (indexFile == "") { arg_error(("must define {indexFile}")); }
    if (ENVIRON["TZ"] != "UTC") { arg_error(("must set TZ to 'UTC'")); }
    
    # Initialize global tables to be read from the index file:
    ixf_initialize_index_tables();
    
    # Read data from file {indexFile}, saves in tables {inidate_fi[0..nfiles-1],.. color_fi[0..nfiles-1]}:
    nfiles = ixf_read_index_file( \
      indexFile, \
      inidate_fi,findate_fi,extag_fi,crtag_fi,exname_fi,rate_fi,rlodate_fi,rhidate_fi,color_fi \
    );
    
    ndays = 0;   # Number of data lines.
    dy_ini = ""; # First date seen.
    dy_fin = ""; # Last date seen.
    
    # Previous date:
    ody = "";
  } 
  
(abort >= 0) { exit(abort); }

# ----------------------------------------------------------------------
# PROCESSING STDIN - THE JOINED SERIES FILE

# Ignore blanks and comment lines
/(^[ ]*([#]|$))|[!]/ \
  { next; }

# Remove inline comments, leading and trailing blanks:
// \
  { gsub(/[\#].*$/, "", $0);
    gsub(/^[ ]+/, "", $0); 
    gsub(/[ ]+$/, "", $0); 
  }

# Line with file data
/^20[01][0-9]-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) / \
  { 
    if (NF != 2 + 6*nfiles) { data_error("invalid field count"); }
    dy = $1;
    tm = $2;
    dy = usf_check_date(FILENAME,FNR,dy)
    if ((ody != "") && (! usf_dates_are_consecutive(ody,dy)))
      { data_error(("non-consecutive dates \"" ody "\" \"" dy "\"")); }
    if (tm != "00:00:00") { data_error(("invalid time = \"" tm "\"")); }  
    
    printf "%s %s", dy, tm;
    
    for (kf = 0; kf < nfiles; kf++)
      { j = 6*kf+3;
        if ($(j) != "|") { data_error(("missing '|' in column " j)); }
        j++;
        vbt = usf_check_num(FILENAME,FNR,$(j));
        j++;
        if ($(j) != "|") { data_error(("missing '|' in column " j)); }
        j++;
        vcr = usf_check_num(FILENAME,FNR,$(j));
        j++;
        if ($(j) != "|") { data_error(("missing '|' in column " j)); }
        j++;
        pmd = usf_check_num(FILENAME,FNR,$(j));
        j++;
        
        if (kf == 0)
          { prf = pmd; 
            printf " | %18.5f", prf;
            if (prf == 0.0) 
              { printf "!! reference price is zero for %s\n", dy > "/dev/stderr"; }
          }
        else
          { ccf = (prf == 0 ? 0.0 : pmd/prf);
            printf " | %12.5f", ccf;
          }
      }
    printf "\n";

    if (ndays == 0) { dy_ini = dy; }
    dy_fin = dy;
    
    ody = dy;
    ndays++;
    next;
  }
  
// \
  { data_error("invalid file list line format"); }
    
END \
  { 
    printf "%d days in output file, from %s to %s\n", ndays, dy_ini, dy_fin > "/dev/stderr";
  }