# Last edited on 2016-05-10 11:56:32 by stolfilocal FETCHING BITCOINTALK CONTRIBUTIONS fetch-posts.sh 183158 JorgeStolfi 1 52 fetch-posts.sh 183158 JorgeStolfi 1 3 fetch-posts.sh 183158 JorgeStolfi 1 40 fetch-posts.sh 183158 JorgeStolfi 1 109 fetch-posts.sh 183158 JorgeStolfi 1 97 Private messages: fetch-inbox.sh 1 2 fetch-outbox.sh 1 3 FETCHING GAVIN ANDRSEN'S POSTS fetch-posts.sh 224 Gavin_Andresen 1 114 EXTRACTING GAVIN'S POSTS Splitting into individual posts with minimal HTML header: dir="Gavin_Andresen" for f in ` cd $dir && ls *.html ` ; do echo $f cat ${dir}/${f} \ | do-split-posts.sh "${dir}/split" done Removing quoted texts: dir="Gavin_Andresen" for f in ` cd $dir/split && ls *.html ` ; do echo $f cat ${dir}/split/${f} \ | remove-quoted-text.gawk \ > "${dir}/noquotes/${f}" done Extracting the text without HTML markup: dir="Gavin_Andresen" for f in ` cd $dir/noquotes && ls *.html ` ; do name="${f%%.*}" echo $name cat ${dir}/noquotes/${f} \ | extract-post-text.gawk \ > "${dir}/text/${name}.txt" done