# Last edited on 2014-09-26 11:26:09 by stolfilocal ANALYZING THE BITCOINTALK POSTS BY TOPIC Copy-paste the "recent posts" page to a file: https://bitcointalk.org/index.php?action=unread Manually remove the header and trailer lines (egrep won't work with UTF-8 Chinese, unfortunately) Use Emacs replace to (in order) remove lines '\012by [- ._A-Za-z0-9]* *' replace '|' by '-' replace '#' by '%' replace TAB by ' | ' replace Chinese text with '[CN]' replace '!' by '.' replace ' in \(.*\) in ' by ' i@n \1 in ' (repeat) replace 'New «[ .0-9]*[ All]*»' by ' | ' replace ' New in ' by ' | in ' delete ' [|] *Last post [A-Za-z0-9: ]*' delete '^ | | ' replace '| *in \([^|]*\) *|' by '| \1 |' Replace funny UTF-8 characters by with 2 or 3 ascii chars change type to iso-latin-1, replace bad chars, save. Pipe through 'txtable-reformat | sort' replace "i@n" by "in" Remove the last two columns (numbers): replace '|[ 0-9]+|[ 0-9]+$' by '' Rearrange colums Add column with category in front Sort and count TO SORT UTF-8: C-x RET c utf-8 RET C-u M-| 2014-06-21 ! 2014-07-25 ! 2014-09-25 ! --------------+---------------+---------------+ Posts ! % ! Posts ! % ! Posts ! % ! Category ------+-------+-------+-------+-------+-------+-------------------------------- 8 | 6.7 | 14 | 12.6 | 28 | 11.7 | Off-topic 4 | 3.3 | 7 | 6.3 | 15 | 6.3 | Gambling 18 | 15.0 | 12 | 10.8 | 58 | 24.3 | Bitcoin mining 37 | 30.8 | 22 | 19.8 | 23 | 9.6 | Bitcoin non-mining 53 | 44.2 | 56 | 50.5 | 115 | 48.1 | Altcoins (including mining) ------+-------+-------+-------+-------+-------+-------------------------------- 120 | 100.0 | 111 | 100.0 | 239 | 100.0 | TOTAL To (re)compute percentages, make a scratch table with lines in the format "{COUNT} | 0.0 | {CATEGORY}" and pipe throught the following command (with the proper value of {ntot}): gawk -v ntot=120 'BEGIN { FS = "|"; nt = 0; } /^ *[0-9]/ { n = xb($1); t = xb($3); printf "%4d | %4.1f | %s\n", n, 100.0*n/ntot, t; nt += n; next; } //{ print; next; } END { printf "total %d posts (should be %d)\n", nt, ntot > "/dev/stderr"; } function xb(x){ gsub(/^[ ]+/,"",x); gsub(/[ ]+$/, "", x); return x; }'