#! /bin/bash -eu
# Last edited on 2026-01-19 19:39:26 by stolfi

# Reads an EVT file from standard input, with lines like "{LOC} {DATA}"
# where {LOC} is "<{FNUM}.{LSEQ};{TRANS}>" except that the ";{TRANS}"
# part may be missing.
# 
# Removes inline comments from {DATA}, in case they have blanks.
# 
# Writes to stdout a file with lines "{SEC} {TXTY} {LOC} {DATA}" where
# {SEC} is the section ("hea", "bio", etc.) and {TXTY} is the type of
# text ("parags", "labels", "trings", etc.)
#

cat  \
  | gawk \
      ' /^ *([#]|$)/{ next }
        //{ gsub(/[<][!][^<>]*[>]/, "", $0); }
        //{ 
          if (NF != 2) { data_error("BUG NF"); }
          loc=$1; dat=$2;
          fnum=loc; gsub(/[<]/, "", fnum); gsub(/[.].*[>]/, "", fnum)
          fs=loc; gsub(/[<>]/, "", fs); gsub(/[;][A-Z]/, "", fs)
          print fnum, fs, loc, dat
        }
      ' \
  | map_field.gawk \
      -v inField=2 -v outField=1 -v defSubst='t???' \
      -v table=work/ivt_loc_to_type.tbl \
  | map_field.gawk \
      -v inField=3 -v outField=1 -v defSubst='s???' \
      -v table=work/ivt_loc_to_sec.tbl \
  | gawk \
      ' //{ 
          if (NF != 6) { data_error("BUG NF B"); }
          print $1, $2, $5, $6
        }
      '
      
