#! /usr/bin/gawk -f # Last edited on 2015-08-25 13:08:19 by stolfilocal # Functions for reading a daily smoothed price and volume file # To be loaded with "-f" into other gawk programs. function spf_initialize_smoothed_price_tables() { # Initializes global tables that contain the price and volume data. # Namely: split("", date_dy); # Set to "1" for dates that exist, undef otherwise, indexed by {DAY}. split("", vbt_dy); # BTC volume, indexed with [{DAY},0..nfiles-1]. split("", vcr_dy); # Currency volume, indexed with [{DAY},0..nfiles-1]. split("", pmd_dy); # Smoothed price around date, indexed with [{DAY},0..nfiles-1]. } function spf_read_smoothed_price_file \ ( fname,kf,rlodate,rhidate, \ date_dy,vbt_dy,vcr_dy,pmd_dy, \ nlin,lin,ndays,nsave,fld,nfld,dy,tm,vbt,vcr,pmd,j,ody,ulp_vbt,ulp_vcr,ulp_pmd \ ) { # Reads a file "{fname}" with total BTC and currency volumes, and smoothed price, in 1 day intervals. # Stores the data in {date_dy[dy],vbt_dy[dy,kf],vcr_dy[dy,kf],pmd_dy[dy,kf]}, # for each date {dy} present in the file that lies in the range {rlodate..rhidate} inclusive. printf "reading file %s ...\n", fname > "/dev/stderr"; ERRNO = ""; ulp_vbt = 0.0001; # Unit in the last place of input {vbt} ulp_vcr = 0.0001; # Unit in the last place of input {vcr} ulp_pmd = 0.00001; # Unit in the last place of input {pmd} # Read the file: nlin = 0; # Number of lines read. ndays = 0; # Number of non-blank, non-header, non-comment lines. nsave = 0; # Number of data lines saved in the output arrays. ody = ""; # Date on previous data line. while((getline lin < fname) > 0) { nlin++; # Remove tabs, inline comments, spurious blanks gsub(/[\011]/, " ", lin); gsub(/[\#].*$/, "", lin); gsub(/^[ ]+/, "", lin); gsub(/[ ]+$/, "", lin); gsub(/[ ][ ]+/, " ", lin); if ((lin != "") && (! match(lin, /[!]/))) { /* Data line: */ nfld = split(lin, fld, " "); if (nfld != 8) { file_error(fname, nlin, ("wrong field count = \"" lin "\"")); } for (j = 3; j <= NF; j = j + 2) { if (fld[j] != "|") { file_error(fname, nlin, ("missing '|' in column " j ", line = \"" lin "\"")); } } dy = usf_check_date(fname,nlin,fld[1]); tm = fld[2]; if (tm != "00:00:00") { file_error(fname, nlin, ("invalid time = \"" tm "\"")); } vbt = usf_check_num(fname, nlin, fld[4]); vcr = usf_check_num(fname, nlin, fld[6]); pmd = usf_check_num(fname, nlin, fld[8]); if ((dy,kf) in vbt_dy) { file_error(fname, nlin, ("repeated date = \"" dy "\"")); } if ((ody != "") && (! usf_dates_are_consecutive(ody,dy))) { file_error(fname,nlin, ("non-consecutive dates \"" ody "\" \"" dy "\"")); } ody = dy; # Volumes mustbe zero, or unit-in-last-place: if (vbt != 0) { usf_check_min_value(fname,nlin, vbt,ulp_vbt, "BTC volume"); } if (vcr != 0) { usf_check_min_value(fname,nlin, vcr,ulp_vcr, "currency volume"); } if (pmd != 0) { # Check that {pmd} is at least a unit in last place: usf_check_min_value(fname,nlin, pmd,ulp_pmd, "smoothed price"); if ((dy >= rlodate) && (dy <= rhidate)) { # Save in arrays: date_dy[dy] = 1; vbt_dy[dy,kf] = vbt; vcr_dy[dy,kf] = vcr; pmd_dy[dy,kf] = pmd; nsave++; } } ndays++; } } if ((ERRNO != "0") && (ERRNO != "")) { file_error(fname, nlin, ERRNO); } close (fname); if (nlin == 0) { arg_error(("file \"" fname "\" empty or missing")); } printf "%6d lines read\n", nlin > "/dev/stderr" printf "%6d data lines found\n", ndays > "/dev/stderr" printf "%6d significant data lines\n", nsave > "/dev/stderr" } function spf_smooth_series(idmin,idmax,val_id,hrad,smo_id, id,smo) { # Computes {smo_id[id]} as the average of {val_id} in a window around {id}, # for {id} in {idmin..idmax}. # Sets {smo_id[id]} to zero if {val_id[id]} is missing. for (id = idmin; id <= idmax; id++) { smo_id[id] = spf_compute_local_average(idmin,idmax,val_id,id,hrad); } } function spf_compute_local_average \ ( idmin,idmax,val_id,kd,hrad, \ rad,jd,id,w,val,val_jd,wht_jd,smo,pi \ ) { # Assumes {val_id} is indexed with day index {id} in {idmin..idmax}. # Computes the average of prices in a soft window # around day with index {kd}. The window spans {2*hrad+1} days. # The procedure fits a polynomial by least squares to the # logs of the daily mean prices in the window, weighted by a Hann-like window weight. # Then it evaluates the fitted line # at the current date. Returns 0.0 if there is not enough # data to fit a line. if (hrad == 0) { return val_id[kd]; } pi = 3.141592653589793238; # Collect the data points in window, indexed {-hrad..+hrad}: split("", val_jd); # Mean price per day. split("", wht_jd); # Weight (including currency volume) per day. for (jd = -hrad; jd <= hrad; jd++) { val_jd[jd] = 0.0; # By default. wht_jd[jd] = 0.0; # By default. id = kd + jd; if ((id >= idmin) && (id <= idmax)) { val = val_id[id]; if (val > 0) { w = 0.5*(1 + cos(pi*jd/(hrad + 0.5))); val_jd[jd] = val; wht_jd[jd] = w; # printf " %4d %18.5f %10.7f\n", jd, val_jd[jd], wht_jd[jd] > "/dev/stderr"; } } } # Fit polynomial and evaluate at center: smo = spf_fit_and_eval(1,hrad,val_jd,wht_jd); # printf " smo = %18.5f\n", smo > "/dev/stderr"; return smo; } function spf_fit_and_eval \ ( deg,hrad,val_jd,wht_jd, \ j,w,val,A00,A01,A11,A0p,A1p,M00,M01,M11,c0,c1 \ ) { # Fits a polynomial {P(j)} of degree {deg} to the data points {(j,log(val_jd[j]))} with # weight {wht_jd[j]}, for {j} in {-hrad..+hrad}. Then evaluates {exp(P(0))}. # Returns 0.0 if there is not enough data to fit the polynomial. if (deg > 1) { prog_error(("degree not implemented")); } A00 = 0; # { = SUM{wht_jd[j]} A01 = 0; # { = SUM{wht_jd[j]*j} A11 = 0; # { = SUM{wht_jd[j]*j*j} A0p = 0; # { = SUM{wht_jd[j]*val_jd[j]} A1p = 0; # { = SUM{wht_jd[j]*val_jd[j]*j} for (j = -hrad; j <= hrad; j++) { w = wht_jd[j]; val = val_jd[j]; if (w > 0.0) { if (val <= 0.0) { prog_error(("non-positive price")); } val = log(val); A00 += w; A01 += w*j; A11 += w*j*j; A0p += w*val; A1p += w*val*j; } } if (A00 < 0.000001) { return 0.0; } # Solve the system {A*c = b} to det = A00*A11 - A01*A01; if ((det >= 0 ? det : -det) < 1.0e-6) { return 0.0; } M00 = A11; M01 = -A01; M11 = A00; # Adjoint {A00,A01,A11}. c0 = (M00*A0p + M01*A1p)/det; c1 = (M01*A0p + M11*A1p)/det; # printf "(%8.5f) + (%8.5f)*j\n", c0, c1 > "/dev/stderr"; # for (j = -hrad; j <= +hrad; j++) # { w = wht[j]; # val = pav[j]; # if (w > 0.0) # { val = log(val); # printf " %+4d %8.5f %8.5f %8.5f\n", j, w, val, c0+c1*j > "/dev/stderr"; # } # } # Evaluate at {j = 0}: return exp(c0); }