# Last edited on 2022-06-16 08:00:41 by stolfi GOAL Analyze the letters to the SEC re the conversion of GBTC to an ETF. CREATING THE MAIN PAGE Date list of letters was last captured: snap_date="2022-06-14" List of letters as posted by the SEC: prefix="https://www.sec.gov/comments/sr-nysearca-2021-90/srnysearca202190-" rawposted="${snap_date}-posted-raw.html" wget -nv 'https://www.sec.gov/comments/sr-nysearca-2021-90/srnysearca202190.htm' -O ${rawposted} Manually edited the ${rawposted} file producing the ${posted} one: posted="${snap_date}-posted.html" Extracted by hand the table mapping number+extension to date and name: table="${snap_date}-url-date-name.tbl" Formatting the selected entries in the public webpage: psrc="${snap_date}-grayscale-gbtc-to-etf-spam.hsrc" page="${snap_date}-grayscale-gbtc-to-etf-spam.html" cat ${psrc} | add_date_name_to_urls.gawk -v table="${table}" -v prefix="${prefix}" > ${page} LETTER STATISTICS Letters received by date: ctbyday="${snap_date}-letters-by-day" cat ${table} | gawk '/^[0-9]/{print $2}' | sort | uniq -c > ${ctbyday}.txt plot_letters_by_day.sh ${ctbyday}.txt > ${ctbyday}.png >>> TO FIX >>> Counting letters by first name of sender: sec_byfname="${snap_date}-letters-by-first-name" cat ${sec_raw_file} \ | gawk \ ' //{ f=tolower($2); gsub(/[.,]/,"",f); if ((length(f) > 1) && (f ~ /[aeiouy]/)) {print f } } ' \ | sort | uniq -c | sort -k1nr -k2 \ > ${sec_byfname}.txt Got frequencies of male first names from 1990 US census. usa_byfname="1990-first-name-freq-US" Making Zipf plots of the two lists: plot_zipf_names.sh "Male first names - 1990 US Census" ${usa_byfname} plot_zipf_names.sh "First names in SEC letters" ${sec_byfname} Comparing the lists: sec_usa_byfname="first-name-USA-SEC-freq" for name in ${usa_byfname} ${sec_byfname} ; do cat ${name}.txt \ | gawk \ ' //{ gsub(/[ ]*[#].*$/, "", $0) } /^[ ]*$/ { next; } // { c = $1; f = tolower($2); if (f != "anonymous") { print c, f } } ' \ | sort -k2 -k1n \ > .${name}-s.txt done join -j 2 -a1 -a2 -e '0' -o0,1.1,2.1 .${usa_byfname}-s.txt .${sec_byfname}-s.txt > ${sec_usa_byfname}.txt cat ${sec_usa_byfname}.txt | gawk '($3 == 0){ print }' | sort -k2nr > .only-usa.txt cat ${sec_usa_byfname}.txt | gawk '($2 == 0){ print }' | sort -k3nr > .only-sec.txt plot_both_freqs.sh \ "Frequencies of first names" \ "Male 1990 US Census" \ "SEC letters" \ ${sec_usa_byfname}