# Last edited on 2022-06-16 08:00:41 by stolfi

GOAL

  Analyze the letters to the SEC re the conversion of GBTC to an ETF.

CREATING THE MAIN PAGE

  Date list of letters was last captured:

    snap_date="2022-06-14"

  List of letters as posted by the SEC:
  
    prefix="https://www.sec.gov/comments/sr-nysearca-2021-90/srnysearca202190-"
    rawposted="${snap_date}-posted-raw.html"
    wget -nv 'https://www.sec.gov/comments/sr-nysearca-2021-90/srnysearca202190.htm' -O ${rawposted}

  Manually edited the ${rawposted} file producing the ${posted} one:

    posted="${snap_date}-posted.html"

  Extracted by hand the table mapping number+extension to date and name:

    table="${snap_date}-url-date-name.tbl"

  Formatting the selected entries in the public webpage:

    psrc="${snap_date}-grayscale-gbtc-to-etf-spam.hsrc"
    page="${snap_date}-grayscale-gbtc-to-etf-spam.html"

    cat ${psrc} | add_date_name_to_urls.gawk -v table="${table}" -v prefix="${prefix}" > ${page}

LETTER STATISTICS

  Letters received by date:

    ctbyday="${snap_date}-letters-by-day"
    cat ${table} | gawk '/^[0-9]/{print $2}' | sort | uniq -c > ${ctbyday}.txt
    plot_letters_by_day.sh ${ctbyday}.txt > ${ctbyday}.png
  

>>> TO FIX >>>


Counting letters by first name of sender:

  sec_byfname="${snap_date}-letters-by-first-name"
  cat ${sec_raw_file} \
    | gawk \
      ' //{
          f=tolower($2); 
          gsub(/[.,]/,"",f); 
          if ((length(f) > 1) && (f ~ /[aeiouy]/)) {print f }
        }
      ' \
    | sort | uniq -c | sort -k1nr -k2 \
    > ${sec_byfname}.txt

Got frequencies of male first names from 1990 US census.

    usa_byfname="1990-first-name-freq-US"

Making Zipf plots of the two lists:

    plot_zipf_names.sh "Male first names - 1990 US Census" ${usa_byfname}
    plot_zipf_names.sh "First names in SEC letters" ${sec_byfname}

Comparing the lists:

    sec_usa_byfname="first-name-USA-SEC-freq"
    for name in ${usa_byfname} ${sec_byfname} ; do 
      cat ${name}.txt \
        | gawk \
            ' //{ gsub(/[ ]*[#].*$/, "", $0) }
              /^[ ]*$/ { next; }
              // { c = $1; f = tolower($2); if (f != "anonymous") { print c, f } }
            ' \
        | sort -k2 -k1n \
        > .${name}-s.txt
    done
    join -j 2 -a1 -a2 -e '0' -o0,1.1,2.1 .${usa_byfname}-s.txt .${sec_byfname}-s.txt > ${sec_usa_byfname}.txt
    cat ${sec_usa_byfname}.txt | gawk '($3 == 0){ print }' | sort -k2nr  > .only-usa.txt
    cat ${sec_usa_byfname}.txt | gawk '($2 == 0){ print }' | sort -k3nr  > .only-sec.txt
    
    plot_both_freqs.sh \
      "Frequencies of first names" \
      "Male 1990 US Census" \
      "SEC letters" \
      ${sec_usa_byfname}