#! /usr/bin/gawk -f # Last edited on 2015-02-27 23:51:56 by stolfilocal # Reads from {stdin} "data" file with merged price series from various exchanges. # Outputs a file with currency conversion factors to use for ech exchange. # # The "data" file must have lines in the format "{DATE} {TIME} | # {VBTC[kd,kf]} | {VCUR[kd,kf]} | {PMD[kd,kf]}" where {TIME} is always "00:00:00", # and the fields "| {VBTC[kd,kf]} | {VCUR[kd,kf]} | {PMD[kd,kf]}" are repeated # for each exchange. The field {PMD[kd,kf]} is an average price in # a neighborhood of each date, computed with some smoothing window; # it needs to be nonzero only when the volumes are nonzero. # # The program first computes a smmothed reference USD price # {PRF[kd]} for each line that is the {PMD[kd]} of "MGOX" at first, # transitioning smoothly to that "BSTP". # # Then it computes the currency conversion factor for each echange # and day as the {PMD[kd]} of that exchange divided by the {PRF[kd]}. # # The user must define (with {export}) the environment variable {TZ="UTC"} # # The user must define (with "-v") the program variables # # {indexFile}, the name of the index file (that specifies the exchanges in the join file). # # See {join_smoothed_price_files.gawk} for the format of the index file. # # Writes to standard output a single file, with one line per day in the format # "{DATE} {TIME} | {PRF[kd]} | {CCF[kd,kf]}" where {PRF[kd]} is the reference price # for the {DATE}, the fields "| {CCF[kd]}" is repeated for each exchange, # and: # # {DATE} spans the union of the date ranges in the input files; # {TIME} is always "00:00:00"; # {PRF[kd]} is a smoothed reference price used to compute the currency conversion # factors for {DATE}; # {CCF[kd,kf]} is the estimated currency conversion factor for exchange number {kf}; # around the {DATE}. BEGIN \ { abort = -1; if (indexFile == "") { arg_error(("must define {indexFile}")); } if (ENVIRON["TZ"] != "UTC") { arg_error(("must set TZ to 'UTC'")); } # Initialize global tables to be read from the index file: ixf_initialize_index_tables(); # Read data from file {indexFile}, saves in tables {inidate_fi[0..nfiles-1],.. color_fi[0..nfiles-1]}: nfiles = ixf_read_index_file( \ indexFile, \ inidate_fi,findate_fi,extag_fi,crtag_fi,exname_fi,rate_fi,rlodate_fi,rhidate_fi,color_fi \ ); ndays = 0; # Number of data lines. dy_ini = ""; # First date seen. dy_fin = ""; # Last date seen. # Previous date: ody = ""; } (abort >= 0) { exit(abort); } # ---------------------------------------------------------------------- # PROCESSING STDIN - THE JOINED SERIES FILE # Ignore blanks and comment lines /(^[ ]*([#]|$))|[!]/ \ { next; } # Remove inline comments, leading and trailing blanks: // \ { gsub(/[\#].*$/, "", $0); gsub(/^[ ]+/, "", $0); gsub(/[ ]+$/, "", $0); } # Line with file data /^20[01][0-9]-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]) / \ { if (NF != 2 + 6*nfiles) { data_error("invalid field count"); } dy = $1; tm = $2; dy = usf_check_date(FILENAME,FNR,dy) if ((ody != "") && (! usf_dates_are_consecutive(ody,dy))) { data_error(("non-consecutive dates \"" ody "\" \"" dy "\"")); } if (tm != "00:00:00") { data_error(("invalid time = \"" tm "\"")); } printf "%s %s", dy, tm; for (kf = 0; kf < nfiles; kf++) { j = 6*kf+3; if ($(j) != "|") { data_error(("missing '|' in column " j)); } j++; vbt = usf_check_num(FILENAME,FNR,$(j)); j++; if ($(j) != "|") { data_error(("missing '|' in column " j)); } j++; vcr = usf_check_num(FILENAME,FNR,$(j)); j++; if ($(j) != "|") { data_error(("missing '|' in column " j)); } j++; pmd = usf_check_num(FILENAME,FNR,$(j)); j++; if (kf == 0) { prf = pmd; printf " | %18.5f", prf; if (prf == 0.0) { printf "!! reference price is zero for %s\n", dy > "/dev/stderr"; } } else { ccf = (prf == 0 ? 0.0 : pmd/prf); printf " | %12.5f", ccf; } } printf "\n"; if (ndays == 0) { dy_ini = dy; } dy_fin = dy; ody = dy; ndays++; next; } // \ { data_error("invalid file list line format"); } END \ { printf "%d days in output file, from %s to %s\n", ndays, dy_ini, dy_fin > "/dev/stderr"; }