#! /usr/bin/gawk -f # Last edited on 2000-09-21 03:02:05 by stolfi BEGIN { abort = -1; usage = ( ARGV[0] " \\\n" \ " -v sticky_qmarks=BOOL \\\n" \ " [ -v min_line_length=NUM ] \\\n" \ " < INFILE > OUTFILE" \ ); # Breaks the lines of the input file into runs of equal characters. # Runs are usually delimited transitions between two different characters, # including line boundaries. # If "sticky_qmarks" is set, a transition between different characters # is a run only if one of them is a line boundary, or both are # different from "?". # If min_line_length is set, lines shorter than that are eliminated. # The default is 1. # For each line, also writes out a string with one dot # for each non-commented character in the line. if (sticky_qmarks == "") { arg_error("must define sticky_qmarks"); } if (min_line_length == "") { min_line_length = 1; } if (min_line_length < 1) { min_line_length = 1; } } (abort >= 0) { exit abort; } /[#]/ { gsub(/#.*$/, "", $0); } (length($0) < min_line_length) { next; } // { lin = $0; run = ""; last_c = "#"; dots = ""; while (lin != "") { c = substr(lin,1,1); if (sticky_qmarks) { diff = ((c != last_c) && (c != "?") && (last_c != "?")); } else { diff = (c != last_c); } if (diff) { if (run != "") { print run; run = ""; } } run = ( run c ); dots = (dots "."); last_c = c; lin = substr(lin, 2); } if (run != "") { print run; } print dots; next; } function arg_error(msg) { printf "%s\n", msg > "/dev/stderr"; printf "usage: %s\n", usage > "/dev/stderr"; abort = 1; exit abort; } function data_error(msg) { printf "file %s, line %s: %s\n", FILENAME, FNR, msg > "/dev/stderr"; abort = 1; exit abort; }