#! /bin/bash -e
# Last edited on 2026-01-16 18:47:38 by stolfi

??? Update to the new IVTFF-like format.

# Lists all loci from the full EVT file ${full_ivt} that have one-leg gallows 
# (EVA @f, @p, @z, @w) that are not in the Stolfi-only file ${stolfi_ivt}.
# Writes a list ".fplocs" with the loci that 
# are in the former but not in the second.

# Ignores entries from the Currier ";C" and Friedman ";F" transcriptions since 
# they had too many gallows mistakes.

# Ignores all loci with folio number less than or equal to
# ${fskip}.  

full_ivt="$1"; shift;
stolfi_ivt="$1"; shift;
fskip="$1"; shift;

ivts=( ${full_ivt} ${stolfi_ivt} )
for vs in 0 1 ; do 
  ifile=${ivts[${vs}]}
  ofile=.fplocs-${vs}
  if [[ ${vs} -eq 0 ]]; then onlygals=( egrep -e '[>].*[fpzw]' ); else onlygals=( cat ); fi
  cat ${ifile} \
    | egrep -e '^[<].*[.].*[.].*[>]' \
    | egrep -v -e '^[<][^<>]*[;][CF][>]' \
    | sed -e 's:[{][^{}]*[}]::g' \
    | ${onlygals[@]} \
    > .aa-${vs}
  
  cat .aa-${vs} \
    | sed -e 's:^[<]::g' -e 's:[;].*[>].*$::g' -e 's:[.]: :g' \
    | sort | uniq \
    | gawk \
        -v fskip=${fskip} \
        ' // {
            fn = $1; un = $2; ln = $3;
            pn = fn; gsub(/^f/,"",pn); gsub(/[rv].*$/,"",pn); 
            pn += 0; if (pn <= fskip) { next; }
            sn = fn; gsub(/^f[0-9]+/,"",sn); gsub(/[.].*$/,"",sn);
            gn = substr(sn, 2);
            if (sn ~ /^v/) { pn = 20*pn + 10 - gn; } else { pn = 20*pn + gn; }
            printf "%03d %s %s %s\n", pn, fn, un, ln; 
          } 
        ' \
    | sort \
    > ${ofile}
  done

bool 1-2 .fplocs-0 .fplocs-1 \
  | sort -k1n -k2,3 -k4n \
  | gawk \
      ' //{ 
         pn = $1 + 0; fn = $2; un = $3; ln = $4;
         pu = (pn "." un); if (pu != ou) { print ""; ou = pu; }
         print; 
       }
      ' \
   > .fplocs
   
