#! /usr/bin/gawk -f
# Last edited on 2016-10-31 10:25:41 by stolfilocal

# Reads a daily bitcoin price series.
# Outputs a file with upper and lower estimates of the "Bitcoin National Debt"
# accumulated up to each day.
#
# The user must define (with {export}) the environment variable {TZ="UTC"}.
#
# The user must load (with "-f") the packages {useful_functions.gawk}
# and {price_series-functions.gawk}.
#
# The user must define (with "-v") the internal variables
#
#   {priceFile} the input file with daily price series (see {price_series_read_file} in {price_series_functions.gawk).
#   {minedFile} the input file with the daily BTC mined.
#
# Writes to standard output a single file, with one line number {id} per day in the format
# 
#   "{DATE[id]} | {PLO[id]} | {PHI[id]} | {PAV[id]} | {VBM[id]} | {DLO[id]} | {DHI[id]}" where
#
# {DATE[id],PLO[id],PHI[id]} are as in the input file,
# {VBM[id]} is the amount of BTC mined on that day, {DLO[id],DHI[id]} are the 
# low and high estimates for the Bitcin National Debt at the end of dat {DATE[id]}.

BEGIN \
  { 
  
    abourt = -1;
    if (priceFile == "") { arg_error("must define {priceFile}"); }
    if (minedFile == "") { arg_error("must define {minedFile}"); }
    
    R = 1.10;  # Expected payoff factor per year.
    
    # Precision (unit-in-last-place) of input and output values:
    ulp_vbt = 0.0001;   # Unit in the last place of input {vbt}
    ulp_vcr = 0.0001;   # Unit in the last place of input {vcr}
    ulp_pav = 0.00001;  # Unit in the last place of input average price {pav}
    ulp_phl = 0.00001;  # Unit in the last place of {pop,phi,plo,pcl}.
    
    # Read the price series (indexed with {id} in {0..nd_price-1})
    split("", date_id);   # Date of start of interval.
    split("", time_id);   # Time of start of interval.
    split("", pop_id);    # Opening price.
    split("", phi_id);    # High price.
    split("", plo_id);    # Low price
    split("", pcl_id);    # Closing price.
    split("", vbt_id);    # BTC volume.
    split("", vcr_id);    # Currency volume.
    split("", pav_id);    # Average price.
    
    nd_price = price_series_read_file( \
      priceFile,24*60*60, \
      date_id,time_id,pop_id,phi_id,plo_id,pcl_id,vbt_id,vcr_id,pav_id \
    );
    
    # Read the miner output per day, indexed by {jd} in {0..nd_mined}:
    
    split("", date_jd);  # Date from BTC mined file.
    split("", vbm_jd);   # Amount of BTC mined on that day.

    nd_mined = read_btc_mined_per_day(minedFile, date_jd,vbm_jd);
    
    # Output data indexed by {kd} in {0..nd_debit-1}: 
    split("", date_kd);   # Date for output file.
    split("", plo_kd);    # Low price in day.
    split("", phi_kd);    # High price in day.
    split("", pav_kd);    # Average price in day.
    split("", vbm_kd);    # Mined bitcoin in day.
    split("", dhi_kd);    # High estimate of debt at end of {date_kd[kd]}.
    split("", dlo_kd);    # Low estimate of debt at end of {date_kd[kd]}.
    
    # When processing some date {kd}, for each {kb} in {0..kd},
    # {plo_after[kb]} is the minimum of {plo_kd[kk]*R^(kd-kk)} for {kk} in {kb..kd}.
    # Similarly for {phi_after}, but maximum.
    split("", plo_after);  # Min {plo} seen on or after each day. 
    split("", phi_after);  # Max {phi} seen on or after each day. 
    
    # Assumes that the mined data is more complete
    id = 0; 
    jd = 0;
    kd = 0;
    while ((id < nd_price) && (jd < nd_mined))
      { # Copy data from mined BTC file:
        date_kd[kd] = date_jd[jd];
        vbm_kd[kd] = vbm_jd[jd];
        jd++;
        
        if (date_id[id] > date_kd[kd])
          { # No price, early date:
            if (id > 0) { prog_error(("gap in price dates?")); }
            plo_kd[kd] = 0;
            phi_kd[kd] = 0;
            pav_kd[kd] = 0;
          }
        else
          { # Copy data from price file:
            if (date_id[id] != date_kd[kd]) 
              { price_error(("date mismatch \"" date_id[id] "\" \"" date_kd[kd] "\"")); }
            plo_kd[kd] = plo_id[id];
            phi_kd[kd] = phi_id[id];
            pav_kd[kd] = pav_id[id]; 
            id++;
          }
        
        # Compute the price interval {plo_after[kb],phi_after[kb]} from each date {kb} to {kd}:
        if ((plo_kd[kd] <= 0) && (phi_kd[kd] > 0)) { prog_error(("invalid price interval")); }
        plo_after[kd] = plo_kd[kd]; # Min debt on {kd} for 1 BTC bought on {kd}; 0 if unknown.
        phi_after[kd] = phi_kd[kd]; # Max debt on {kd} for 1 BTC bought on {kd}; 0 if unknown.
        for (kb = kd-1; kb >= 0; kb--)
          { # Maybe the min,max debt per BTC are achieved by buying after {kb}:
            plo_after[kb] = plo_after[kb+1];
            phi_after[kb] = phi_after[kb+1];
            unk = (phi_after[kb] <= 0); # True if the min,max debt per BTC after {kb} is unknown:
            if (phi_kd[kb] > 0)
              { # Or maybe they are achieved by buying on {kb}:
                Rpow = exp((kd-kb)*log(R)/365.25);  # Interest factor.
                if (unk || (plo_kd[kb]*Rpow < plo_after[kb])) { plo_after[kb] = plo_kd[kb]*Rpow; }
                if (unk || (phi_kd[kb]*Rpow > phi_after[kb])) { phi_after[kb] = phi_kd[kb]*Rpow; }
              }
          }
        
        # Compute the total debt bounds:
        slo = 0;
        shi = 0;
        for (kb = 0; kb <= kd; kb++)
          { if (phi_after[kb] > 0)
              { slo += vbm_kd[kb]*plo_after[kb];
                shi += vbm_kd[kb]*phi_after[kb];
              }
          }
        dlo_kd[kd] = slo;
        dhi_kd[kd] = shi;
        
        # Write out:
        printf "%s", date_kd[kd];
        printf " | %12.5f | %12.5f | %12.5f", plo_kd[kd], phi_kd[kd], pav_kd[kd];
        printf " | %6d", vbm_kd[kd];
        printf " | %18.2f | %18.2f", dlo_kd[kd], dhi_kd[kd];
        printf "\n";
        
        if ((kd % 100) == 0)
          { printf "%s %18.2f \n", date_kd[kd], dhi_kd[kd] > "/dev/stderr"; }

        kd++;
      }
    nd_debit = kd;
    fflush("/dev/stdout");
    printf "%6d debit lines written (%s -- %s)\n", \
      nd_debit, date_kd[0], date_kd[nd_debit-1] > "/dev/stderr";
    printf "current debit in [ %18.5f _ %18.5f ] USD\n", dlo_kd[nd_debit-1], dhi_kd[nd_debit-1]  > "/dev/stderr";
    exit(0);
  }
  
END \
  { 
  }

function read_btc_mined_per_day \
  ( fname, date_ix,vbm_ix, \
    \
    nlin,lin,ndata,fld,nfld,dy,tm,dt, \
    vbm,j,ody \
  )
  {
    # Reads a file "{fname}" with amounts of BTC mined in each day.
    # Stores the data in {date_ix[ix]},vbm_ix[ix]}
    # where {ix} is the index of the data line.
    # Returns the number of data lines read.
    
    printf "reading file %s ...\n", fname > "/dev/stderr";
    ERRNO = "";

    # Read the file:
    nlin = 0;   # Number of lines read.
    ndata = 0;  # Number of non-blank, non-header, non-comment lines.
    ody = ""; # Date on previous data line.
    while((getline lin < fname) > 0) { 
      nlin++;
      # Remove tabs, inline comments, spurious blanks
      gsub(/[\011]/, " ", lin);
      gsub(/[\#].*$/, "", lin);
      gsub(/^[ ]+/, "", lin); 
      gsub(/[ ]+$/, "", lin); 
      gsub(/[ ][ ]+/, " ", lin); 
      if ((lin != "") && (! match(lin, /[!]/)))
        { /* Data line: */
          nfld = split(lin, fld, " ");
          if (nfld != 3) { file_error(fname, nlin, ("wrong field count = \"" lin "\"")); }
          # Get the input fields:
          dy = usf_check_date(fname,nlin,fld[1]);
          tm = usf_check_time(fname,nlin,fld[2]);
          vbm = usf_check_num(fname, nlin, fld[3]);
          
          # Consistency checks:
          if ((tm != "18:15:05") && (tm != "00:00:00"))
            { file_error(fname,nlin, ("unexpected time \"" tm "\"")); }
          if ((ody != "") && (! usf_dates_are_consecutive(ody,dy)))
            { file_error(fname,nlin, ("non-consecutive dates \"" ody "\" \"" dy "\"")); }
          ody = dy;
          
          # Save in arrays:
          date_ix[ndata] = dy;
          vbm_ix[ndata] = vbm;
          ndata++;
        }
    }
    if ((ERRNO != "0") && (ERRNO != "")) { file_error(fname, nlin, ERRNO); }
    close (fname);
    if (nlin == 0) { arg_error(("file \"" fname "\" empty or missing")); }
    printf "%6d lines read\n", nlin > "/dev/stderr"
    printf "%6d data lines found (%s -- %s)\n", \
      ndata, date_ix[0], date_ix[ndata-1] > "/dev/stderr";
    return ndata;
  }