#! /usr/bin/gawk -f # Last edited on 2015-06-17 23:47:32 by stolfilocal # Reads a bunch of data files with daily volumes and smoothed prices. # Merges them into a single file. # # The user must define (with {export}) the environment variable {TZ="UTC"} # The user must load (with "-f") the library {index-file_functions.gawk}, and # define (with "-v") the variables # # {indexFile}, the name of the index file; # {inDir}, the name of the directory where the smoothed price files reside; # {hrad}, the half-width of the smoothing window. # # The files to be joinded are specified the index file. # The format of index file is described in the file "idnex_file_functions.gawk", # function {ixf_read_index_file}. # # From each line of the index file the # program gets the nominal date range {INIDATE .. FINDATE}, the exchange tag # {EXTAG}, the currency tag {CRTAG}, and the date range to use {RLODATE .. RHIDATE}. # # Those parameters specify the file "{inDir}/{INIDATE}--{FINDATE}-{EXTAG}-{CRTAG}-01d-sm{hrad}.txt" # which must contain daily price and volume data for the exchange {EXTAG} in currency {CRTAG} # ("USD", "CNY", etc.), between dates {INIDATE} and {FINDATE} inclusive. # The program will use only entries of the volume file # whose date is between {RLODATE} and {RHIDATE}, inclusive both. # # Each of the named files must contain lines in the format # # "{DAY} {TIME} | {VBT} | {VCR} | {PMD}" # # where {DAY} is the line's date (UTC), {TIME} must be "00:00:00", # {VBT} is the total btc volume traded in that day, {VCR} the total # volume in the exchange currency, and {PMD} is the smoothly averaged price # around {DAY}. # # Writes to standard output a single file, with one line number {i} per day in the format # # "{DAY} {TIME} | {VBT[k]} | {VCR[k]} | {PMD[k]}" where # # {DAY} and {TIME} are as in the input, # and the fields "| {VBT[k]} | {VCR[k]} | {PMD[k]}" are repeated for # each input file. # There will be one output line for each date in any input file. # Missing data will be indicated with {VBT[k]=VCR[k]=PMD[k]=0}. BEGIN \ { if (indexFile == "") { arg_error(("must define {indexFile}")); } if (inDir == "") { arg_error(("must define {inDir}")); } if (hrad == "") { arg_error(("must define {hrad}")); } if (hrad !~ /^[0-9]+$/) { arg_error(("invalid {hrad}")); } pi = 3.1415926; nfiles = 0; # Number of input files. # Initialize global tables to be read from the index file: ixf_initialize_index_tables(); # Read data from file {indexFile}, saves in tables {inidate_fi[0..nfiles-1],.. color_fi[0..nfiles-1]}: nfiles = ixf_read_index_file( \ indexFile, \ inidate_fi,findate_fi,extag_fi,crtag_fi,exname_fi,rate_fi,rlodate_fi,rhidate_fi,color_fi \ ); # Joined series data tables: spf_initialize_smoothed_price_tables() #Read the smoothed price data and store in series data tables: for (kf = 0; kf < nfiles; kf++) { inidate = inidate_fi[kf]; findate = findate_fi[kf]; extag = extag_fi[kf]; crtag = crtag_fi[kf]; rlodate = rlodate_fi[kf]; rhidate = rhidate_fi[kf]; fname = ( inDir "/" inidate "--" findate "-" extag "-" crtag "-01d-sm" sprintf("%02d",hrad) ".txt" ); spf_read_smoothed_price_file(\ fname,kf,rlodate,rhidate, \ date_dy,vbt_dy,vcr_dy,pmd_dy \ ); } printf "done reading %d smoothed price files\n", nfiles > "/dev/stderr"; # Sort lines by date: ndays = asorti(date_dy,date_kd); # Now {date_kd} has the existing dates, indexed {1..ndays} printf "%d days in merged file, from %s to %s\n", ndays, date_kd[1], date_kd[ndays] > "/dev/stderr"; write_joined_price_file(nfiles,extag_fi,crtag_fi, ndays,date_dy,vbt_dy,vcr_dy,pmd_dy,date_kd); printf "done writing the joined file\n" > "/dev/stderr"; exit(0); } function write_joined_price_file \ ( nfiles,extag_fi,crtag_fi, ndays,date_dy,vbt_dy,vcr_dy,pmd_dy,date_kd, \ kd,kf,dy,tm,vbt,vcr,pmd \ ) { # Assumes {date_dy[1..ndays]} are the merged dates, in order. # Print header: printf "# Created by {join_smoothed_price_files.gawk}\n" printf "# Exchanges and currencies:" for (kf = 0; kf < nfiles; kf++) { printf " %s.%s", extag_fi[kf], crtag_fi[kf]; } printf "\n"; printf "\n"; for (kd = 1; kd <= ndays; kd++) { dy = date_kd[kd]; if (date_dy[dy] != 1) { prog_error(("inconsistent {date_dy,date_kd}")); } tm = "00:00:00"; printf "%s %s", dy, tm; for (kf = 0; kf < nfiles; kf++) { vbt = vbt_dy[dy,kf]; vcr = vcr_dy[dy,kf]; pmd = pmd_dy[dy,kf]; printf " | %.4f | %.4f | %.5f", vbt, vcr, pmd; } printf "\n"; } fflush("/dev/stdout"); }