#! /bin/bash -ue
# Last edited on 2025-05-04 22:58:32 by stolfi

# Writes to disk a file with the tags of the sections that occur in
# ${lang}/${book} and the subset that is worth analyzing. To
# be called after the EVT file specific of ${lang} and ${book} has been
# created.

lang="$1"; shift
book="$1"; shift

if [[ "/${lang}" == "/" ]]; then echo "** lang not specified" 1>&2; exit 1; fi
if [[ "/${book}" == "/" ]]; then echo "** book not specified" 1>&2; exit 1; fi

# Top folders for output data and output TeX tables of this note:
gen_book_dir="gen/${lang}/${book}"; mkdir -p ${gen_book_dir} 
tex_book_dir="tex/${lang}/${book}"; mkdir -p ${tex_book_dir}

if [[ "/${lang}" != "/voyn" ]]; then echo "** cannot handle language ${lang}" 1>&2; exit 1; fi

raw_evt="${gen_book_dir}/raw.evt"

fnums_list="${gen_book_dir}/fnums.txt"
echo "extracting non-empty page numbers ${fnums_list} for ${lang}/${book} ..." 1>&2
cat ${raw_evt} \
  | egrep -e '^<f[0-9]+[a-z]+[0-9]*[.][A-Za-z][0-9]*[a-z]*[.][0-9a-z]+[;][A-Za-z]>' \
  | sed -e 's:[.].*$::g' -e 's:^[<]::g' \
  | uniq \
  > ${fnums_list}
./vms_wc.sh ${fnums_list}
# ./show_first_last_lines.sh 10 5 ${fnums_list}

fnum_to_sec_tbl="inp/${lang}/fnum-to-subsec.tbl"
echo "checking for table ${fnum_to_sec_tbl} that maps page nums to sections ..." 1>&2
if [[ ! ( -s ${fnum_to_sec_tbl} ) ]]; then echo "** missing table ${fnum_to_sec_tbl}" 1>&2 ; exit 1; fi
# ./show_first_last_lines.sh 20 3 ${fnum_to_sec_tbl}

all_sec_list="inp/${lang}/sections-all.tags"
echo "checking for table ${all_sec_list} that lists all sections in publishing order ..." 1>&2
if [[ ! ( -s ${all_sec_list} ) ]]; then echo "** missing table ${all_sec_list}" 1>&2 ; exit 1; fi
# ./show_first_last_lines.sh 20 20 ${all_sec_list}

occ_sec_list="${gen_book_dir}/sections-occ.tags"
echo "making list ${occ_sec_list} if all subsections present in ${lang}/${book} ..." 1>&2
cat ${fnums_list} \
  | map_field.gawk \
      -v inField=1 \
      -v outField=1 \
      -v table=${fnum_to_sec_tbl} \
  | gawk '//{ print $1 }' \
  | sort | uniq \
  > .aa
  
cat ${all_sec_list} \
  | fgrep -f .aa \
  > ${occ_sec_list}
./vms_wc.sh ${occ_sec_list}
occ_secs=( `cat ${occ_sec_list}` )
echo "    sections that occur = ${occ_secs[*]}" 1>&2

use_sec_list="${gen_book_dir}/sections-use.tags"
echo "making list ${use_sec_list} if subsections of ${lang}/${book} worth analyzing ..." 1>&2
echo "    !!! MUST CHOOSE SPECIFIC SECTION LIST DEPENDING ON BOOK, IN PUBLISHABLE ORDER !!!" 1>&2
if [[ ( "${book}" == "maj" ) || ( ${book} == "prs" ) || ( ${book} == "lab" ) ]]; then
  # This book is worth analyzing separately by section (minus missing of course):
  use_secs=( `cat ${occ_sec_list}` )
else
  # This book should be analyzed only as a whole: 
  use_secs=( )
fi
echo "    sections to analyze = ${use_secs[*]} tot.1" 1>&2

# Save list of use_secs to analyze:
echo "writing ${use_sec_list} with list of sections to consider in ${lang}/${book} ..." 1>&2
echo "${use_secs[@]}" \
  | tr ' ' '\012' \
  > ${use_sec_list}
# ./show_first_last_lines.sh 20 3 ${use_sec_list}
