#! /usr/bin/gawk -f # Last edited on 2003-10-29 23:41:27 by stolfi BEGIN { # This script reads a list of paper citations, obtained from the ISI # WebOfScience or from the NEC CiteSeer, and brings it closer to the # Bibtex format. # # The input file must be a sequence of bibligraphy entries. Each # entry must have been broken up into at least three major chunks: # author names, title, and publication data, each on a line by # itself.. # # Each entry starts with a line "@TYPE{KEY," where TYPE is any # Bibtex entry type, or "isiitem", or "necitem"; and KEY is a # bibtex-like citation key, or "??". The final comma is optional. # Each entry ends with a line containing a single brace "}". # # Within an entry there may be one or more field lines, of the form # " FIELD = {VALUE},", where FIELD is an alpha string (field name) # and VALUE is anything. The final comma is optional. # # Each field FIELD may be any Bibtex field name, or one of several # special field names starting with "isi" for information still in # the ISI format. The main fields in this class are # # (isi|nec)author = {Author list in the ISI/NEC format} # (isi|nec)where = {Journal,volume,dat,pages,etc.} # (isi|nec)title = {Raw title, possibly with wrong capitalization.} # # Between the entries there may be lines of the form "! FIELD = {VALUE}" # Each of these lines is converted into a comment, and all subsequent # entries will be provided with a new field "FIELD = {VALUE}". A line # "! FIELD" (with empty value) cancels the previous "! FIELD" directive. # # The output should be closer to the Bibtex format, except that # there are no commas between the entry fields. abort = -1; # Fields to be added to each entry: split("", addfield); # Indexed by active field names } (abort >= 0) { exit abort; } /^[ ]*([#%]|$)/ { lin = $0; sub(/[#]/, "%", lin); print lin; next; } /^[@][a-zA-Z]+[ ]*[{]/ { # Beginning-of-entry: lin = $0; print lin; next; } /^[!][ ]*[a-z]+[ ]*[=][ ]*[{].*[}][, ]*$/ { # Adds field to all subsequent entries: lin = $0; if (! match(lin, /^[!][ ]*[a-z]+[ ]*[=]/)) { data_error(("prog error")); } fname = substr(lin,RSTART,RLENGTH-1); fval = substr(lin,RSTART+RLENGTH); gsub(/^[!][ ]*/, "", fname); gsub(/[ ]*[=][ ]*$/, "", fname); gsub(/^[ ]*[{][ ]*/, "", fval); gsub(/[ ]*[}][, ]*$/, "", fval); addfield[fname] = fval; data_warning(("start adding \"" fname " = {" fval "}\"...")); print ("% " $0); next; } /^[!][ ]*[a-z]+[ ]*$/ { # Cancels addition of field to entries: fname = $0; gsub(/^[!][ ]*/, "", fname); gsub(/[ ]*$/, "", fname); if (fname in addfield) { data_warning(("stop adding \"" fname " = {" addfield[fname] "}\".")); delete addfield[fname]; } else { data_warning(("field \"" fname "\" is not currently being added")); } print ("% " $0); next; } /^[ ]*[a-z]+[ ]*[=][ ]*{.*}[, ]*$/ { # Generic field entry, remove final commas: gsub(/[ ,]+$/, "", $0); } /^[ ]*isiauthor[ ]*[=][ ]*{/ { # Author list in ISI format, convert to Bibtex: lin = $0; lin = convert_isi_authors(lin); if (lin ~ /[,]/) { data_warning(("commas not removed from authors")); } print lin; next; } /^[ ]*necauthor[ ]*[=][ ]*{/ { # Author list in NEC format, try to convert to Bibtex: lin = $0; lin = convert_nec_authors(lin); if (lin ~ /[,]/) { data_warning(("commas not removed from authors")); } print lin; next; } # WAS: /isiwhere[ ]*[=][ ]*{.* +[-A-Z0-9()]*[0-9][-0-9A-Z()]*[ ]*[:]/ /^[ ]*isiwhere[ ]*[=][ ]*{/ { # Journal with volume number, possibly issue number: lin = $0; lin = split_isi_where(lin); if (lin ~ /isiwhere[ ]*[=]/) { data_warning(("split_isi_where failed")); } print lin; next; } /^[ ]*necwhere[ ]*[=][ ]*{/ { # Journal with volume number, possibly issue number: lin = $0; lin = split_nec_where(lin); if (lin ~ /necwhere[ ]*[=]/) { data_warning(("split_nec_where failed\n lin = \"" lin "\"")); } print lin; next; } /^[ ]*necvolpagedate *[=]/ { # Volume (possibly number), page range and date: lin = $0; # To be handled... print lin; next; } /^[ ]*isijournal[ ]*[=]/ { # Rewrite journal names lin = fix_isi_journal_name($0); print lin; next; } /^[ ]*isipagedate *[=]/ { # Page range and date: lin = $0; lin = split_isi_pagedate(lin); if (lin ~ /isipagedate[ ]*[=]/) { data_warning(("split_isi_pagedate failed")); } print lin; next; } /^[ ]*type[ ]*[=][ ]*[a-z]+[ ]*$/ { # Entry type (without braces). print; next; } /^[ ]*month[ ]*[=][ ]*[jfmasond][a-z][a-z]([-][-][jfmasond][a-z][a-z]|)[ ]*$/ { # Month(s) without braces (predefined string). print; next; } /^[ ]*(isi|nec|)(author|title|journal|day|year|volume|number|pages)[ ]*[=][ ]*[{].*[}][ ]*$/ { # Another valid field: print; next; } /^[ ]*(isi|nec|)(publisher|note|comment|abstract|booktitle|series|school)[ ]*[=][ ]*[{].*[}][ ]*$/ { # Another valid field:: print; next; } /^[ ]*(isi|nec|)(institution|howpublished|edition|editor|cites|citations|url)[ ]*[=][ ]*[{].*[}][ ]*$/ { # Another valid field: print; next; } /^[ ]*(isi|nec|)(where|pagedate|instdate|volpagedate|key|ctxurl|docurl|misc)[ ]*[=][ ]*[{].*[}][ ]*$/ { # A field still partially in ISI or NEC format: print; next; } /^[ ]*[}][ ]*$/ { # End of entry # Add new fields: for (fname in addfield) { printf " %s = {%s}\n", fname, addfield[fname]; } # Print end of entry: print; next; } // { data_error(("unrecognized line format")); print; next; } END { if (abort >= 0) { exit abort; } } function convert_isi_authors(lin, aui,aub,res) { # Convert author list from ISI to BibTex format # Remove field name and braces: gsub(/^[ ]*isiauthor[ ]*[=][ ]*{[ ]*/, "", lin); gsub(/[ ]*}[ ]*$/, "", lin); # Fix "et al." gsub(/[ ]*et +al[.]?[ ]*/, "??", lin); # Break into authors and convert each one: res = ""; while (match(lin, /[ ]*[,][ ]*/)) { aui = substr(lin,1,RSTART-1); lin = substr(lin,RSTART+RLENGTH); aub = convert_one_isi_author(aui); res = ( res aub " and " ); } aub = convert_one_isi_author(lin); res = ( res aub ); return ( " author = {" res "}" ); } function convert_one_isi_author(aui, lst,inis,tmp,pre,suf,aub) { # Convert one author name from ISI to BibTex: # Remove extraneous spaces: gsub(/^[ ]*/, "", aui); gsub(/[ ]*$/, "", aui); gsub(/[ ][ ][ ]*/, " ", aui); if (match(aui, /^[?]+$/)) { return "??"; } else if (match(aui, /^[A-Za-z?]+$/)) { data_warning(("single-word author name \"" aui "\"")); return aui; } else if (match(aui, /[ ][A-Z]+$/)) { # Separate last name from initials: lst = substr(aui,1,RSTART-1); inis = substr(aui,RSTART); # Remove surrounding spaces: gsub(/^[ ]*/, "", lst); gsub(/[ ]*$/, "", lst); gsub(/^[ ]*/, "", inis); gsub(/[ ]*$/, "", inis); # Try to fix last name capitalization: if (match(lst, /^[A-Z][-'A-Z]+$/)) { # Last name is all caps, try to fix capitalization: # Watch out for some special patterns: if (match(lst, /^[M][C][A-Z][-A-Za-z]+$/)) { tmp = ("Mc" substr(lst,3,1) tolower(substr(lst,4))); } else if (match(lst, /^[O]['][A-Z][-A-Za-z]+$/)) { tmp = ("O'" substr(lst,3,1) tolower(substr(lst,4))); } else if (match(lst, /^[D]['][A-Z][-A-Za-z]+$/)) { tmp = ("D'" substr(lst,3,1) tolower(substr(lst,4))); } else { tmp = ""; } if (tmp != "") { data_warning(("last name \"" lst "\" changed to \"" tmp "\"")); lst = tmp; } else { lst = (substr(lst,1,1) tolower(substr(lst,2))); } } else if (match(lst, /^([Dd][aeiou']|[Vv][ao]n([ ]*[Dd]e[rn]|))[ ]*[A-Z][a-z]/)) { # Name with detachable prefix, normalize spacing: pre = substr(lst,1,RLENGTH-2); suf = substr(lst,RLENGTH-1); if (match(pre, /^[Vv][ao]n[Dd]e[rn]$/)) { pre = (substr(pre,1,3) " " substr(pre,4,3)); } tmp = (pre " " suf); gsub(/[ ][ ][ ]*/, " ", tmp); gsub(/['][ ][ ]*/, "'", tmp); if (! match(suf, /^[A-Z][a-z]+$/)) { data_warning(("strange last name \"" lst "\"")); } else if (tmp != lst) { data_warning(("last name \"" lst "\" changed to \"" tmp "\"")); lst = tmp; } } else if (match(lst, /^([O])[ ]*[A-Z][a-z]/)) { # Irish last name without the "'": suf = substr(lst,2); tmp = ("O'" suf); if (! match(suf, /^[A-Z][a-z]+$/)) { data_warning(("strange Irish name \"" lst "\"")); } else if (tmp != lst) { data_warning(("last name \"" lst "\" changed to \"" tmp "\"")); lst = tmp; } } else if (match(lst, /^(La|Mc|O[']|)[A-Z][a-z]+([-][A-Z][a-z]+|)$/)) { # OK. } else { data_warning(("strange last name \"" lst "\"")); } # Insert periods after each initial: inis = gensub(/([A-Z])/, "\\1. ", "g", inis); # Put back initials before last name: aub = (inis " " lst); # Remove extraneous spaces: gsub(/^[ ]*/, "", aub); gsub(/[ ]*$/, "", aub); gsub(/[ ][ ][ ]*/, " ", aub); return aub; } else { data_warning(("author name \"" aui "\" garbled")); return aui; } } function convert_nec_authors(lin, aui,aub,res,p1,p2,inverted) { # Convert author list from NEC to BibTex format # printf " lin = [%s]\n", lin > "/dev/stderr"; # Remove field name and braces: gsub(/^[ ]*necauthor[ ]*[=][ ]*{[ ]*/, "", lin); gsub(/[ ]*}[ ]*$/, "", lin); # Fix "et al." gsub(/[ ]*et +al[.]?[ ]*/, "??", lin); # Try to guess whether the format is "Stolfi, J." or "J. Stolfi" p1 = match(lin, / and +.*[,]/); p2 = match(lin, /^[^,]*[,][^,]*$/); inverted = (p1 || p2); # Replace " and " by semicolon: gsub(/ and /, ";", lin); # Remove comma before semicolon: gsub(/[,][ ]*[;]/, ";", lin); # Append a final semicolon to simplify parsing: lin = ( lin ";" ); # Break into authors and convert each one: res = ""; while (lin != "") { aui = ""; if (inverted) { # Presumably the format is "Guibas, L.J., Stolfi, J., ..." # Grab two next two things separated by a comma # and terminated by comma or semicolon. if (match(lin, /^[ ]*([A-Za-z][-'A-Za-zçãéá ]*[,][ ]*[A-Z][A-Z. ]*[.][,;])[ ]*/)) { aui = substr(lin,RSTART,RLENGTH); lin = substr(lin,RSTART+RLENGTH); # Remove name delimiter at end of name: gsub(/[ ]*[,;]+[ ]*$/, "", aui); # Bring initials to front of name: if (! match(aui, /[,]/)) { data_error(("duh?")); } aui = ( substr(aui,RSTART+1) " " substr(aui,1,RSTART-1) ); } else { aui = ""; } } if (aui == "") { # Presumably the format is "L.J. Guibas, J. Stolfi, , ..." if (match(lin, /^[ ]*([A-Za-z][-.'A-Za-zçãéá ]*[,;])[ ]*/)) { aui = substr(lin,RSTART,RLENGTH); lin = substr(lin,RSTART+RLENGTH); # Remove name delimiter at end of name: gsub(/[ ]*[,;]+[ ]*$/, "", aui); } else { aui = ""; } } if (aui == "") { # Cannot split name, use whole line: data_warning(("cannot parse name \"" lin "\"")); aui = lin; lin = ""; # Remove name delimiter at end of name: gsub(/[ ]*[,;]+[ ]*$/, "", aui); } # printf " aui = [%s]", aui > "/dev/stderr"; aub = convert_one_nec_author(aui); # printf " -> %s\n", aub > "/dev/stderr"; # Acrescenta nome ao resultado: res = ( res ( res == "" ? "" : " and ") aub ); } return ( " author = {" res "}" ); } function convert_one_nec_author(aui, lst,ini,tmp,pre,suf,aub) { # Try to convert one author name from NEC to BibTex format: # Spread out initials: gsub(/[.]/, ". ", aui); # Remove extraneous spaces: gsub(/^[ ]+/, "", aui); gsub(/[ ]+$/, "", aui); gsub(/[ ][ ]+/, " ", aui); # Single word name, or "??": if (match(aui, /^[?]+$/)) { return "??"; } else if (match(aui, /^[A-Za-z?]+$/)) { data_warning(("single-word author name \"" aui "\"")); return aui; } # Try to bring all initials to the front: aui = ( "@@ " aui ); # Safety marker to detect loop. while (match(aui, /[A-Z][.][ ]*$/)) { # Bring postfixed initial to front: lst = substr(aui,1,RSTART-1); ini = substr(aui,RSTART); # Remove surrounding spaces: gsub(/^[ ]+/, "", lst); gsub(/[ ]+$/, "", lst); gsub(/^[ ]+/, "", ini); gsub(/[ ]+$/, "", ini); aui = (ini " " lst); # Prevent infinite loop: if (match(lst, /[@]$/)) { break; } } gsub(/[ ]*[@]+[ ]*/, " ", aui); gsub(/^[ ]+/, "", aui); gsub(/[ ]+$/, "", aui); return aui; } function split_isi_where(lin, res,fld,sep) { # Splits "isiwhere = {...}" field into "isijournal" and "isipagedate" # Strip initial "[ ]*isiwhere = {" and final "}": gsub(/^[ ]*isiwhere[ ]*[=][ ]*{[ ]*/, "", lin); gsub(/[ ]*}[ ]*$/, "", lin); # Prepare for parsing: split("", fld); res = ""; sep = ""; # Journal with volume and issue numbers: if (match(lin, \ /^([^{}]*) +([A-Z]*[0-9]+[A-Z]*) +[(]([-0-9]+)[)][ ]*[:][ ]*([^{}]*)$/, \ fld)) { res = ( res sep \ " isijournal = {" fld[1] "}\n" \ " volume = {" fld[2] "}\n" \ " number = {" fld[3] "}\n" \ " isipagedate = {" fld[4] "}" \ ); sep = "\n"; return res; } # Journal with volume number only: if (match(lin, \ /^([^{}]*) +([A-Z]*[0-9]+[A-Z]*)[ ]*[:][ ]*([^{}]*)$/, \ fld)) { res = ( res sep \ " isijournal = {" fld[1] "}\n" \ " volume = {" fld[2] "}\n" \ " isipagedate = {" fld[3] "}" \ ); sep = "\n"; return res; } # Give up: if (lin != "") { res = ( res sep " isiwhere = {" lin "}" ); sep = "\n"; lin = ""; } return res; } function split_nec_where(lin, res,fld,sep) { # Splits "necwhere = " field into "necjournal" and "necvolpagedate" # Strip initial "[ ]*necwhere = {" and final "}": gsub(/^[ ]*necwhere[ ]*[=][ ]*{[ ]*/, "", lin); gsub(/[ ]*}[ ]*$/, "", lin); # Prepare for parsing: split("", fld); res = ""; sep = ""; # Yank out any URLs (before fiddling with puncts!): while (match(lin, \ /^([^{}]*)(http[:]|)([\/]*www[.][^ ,{}]*)[ ]*([^{}]*)$/, \ fld)) { res = ( res sep " url = {{\\url{" fld[2] fld[3] "}}}" ); sep = "\n"; lin = ( fld[1] ". " fld[4] ); } # Normalize punctuation: lin = remove_extra_punctuation(lin); # Yank out the year in parenthesis: if (match(lin, \ /^([^{}]*)[(]([12][09][0-36-9][0-9])[)][ ]*([^{}]*)$/, \ fld)) { res = ( res sep " year = {" fld[2] "}" ); sep = "\n"; lin = ( fld[1] ", @YEAR@, " fld[3] ); lin = remove_extra_punctuation(lin); } # Yank out the month (in full): if (match(lin, \ /^([^{}]*)[ ,;:.]([Jj]anuary|[Ff]ebruary|[Mm]arch|[Aa]pril|[Jj]une|[Jj]uly|[Aa]ugust|[Ss]eptember|[Oo]ctober|[Nn]ovember|[Dd]ecember)[ ]*[ ,;:.]([^{}]*)$/, \ fld)) { res = ( res sep " month = {" fld[2] "}" ); sep = "\n"; lin = ( fld[1] ", @MTH@, " fld[3] ); lin = remove_extra_punctuation(lin); } # Yank out the month (abbreviated): if (match(lin, \ /^([^{}]*)[ ,;:.]([Jj]an|[Ff]eb|[Mm]ar|[Aa]pr|[Jj]un|[Jj]ul|[Aa]ug|[Ss]ep[t]*|[Nn]ov|[Dd]ec)[ ]*[ ,;:.]([^{}]*)$/, \ fld)) { res = ( res sep " month = {" fld[2] "}" ); sep = "\n"; lin = ( fld[1] ", @MTH@, " fld[3] ); lin = remove_extra_punctuation(lin); } # Yank out year at end of field: if (match(lin, \ /^([^{}]*)[ ,;:.][ ]*([12][09][0-36-9][0-9])[ ,;.]*$/, \ fld)) { res = ( res sep " year = {" fld[2] "}" ); sep = "\n"; lin = ( fld[1] ", @YEAR@, " ); lin = remove_extra_punctuation(lin); } # Yank out volume number and issue number: if (match(lin, \ /^([^{}]*)[ ]+([0-9]+)[ ]*[(]([0-9]+)[)][ ]*([^{}]*)$/, \ fld)) { res = ( res sep \ " volume = {" fld[2] "}\n" \ " number = {" fld[3] "}" \ ); sep = "\n"; lin = ( fld[1] ", @VOL@, " fld[4] ); lin = remove_extra_punctuation(lin); } # Try to recognize technical reports (TR numbers look like page ranges): gsub(/ *[Rr]elat[']*[ óo]rio[ ]*[Tt][']*[ ée]c[h]*nico */, " Technical Report ", lin); gsub(/ *[Tt]ech(nical|[.]*)[ ]*[Rr]ep(ort|t|[.]*) */, " Technical Report ", lin); gsub(/ *[Rr]es(earch|[.]*)[ ]*[Rr]ep(ort|t|[.]*) */, " Research Report ", lin); gsub(/ *Report[ ]+([Nn]o|[Nn]|[Nn]um|[#])[.]**/, " Report ", lin); if (match(lin, \ /^[ ]*(Technical[ ]*|Research[ ]*|)Report[ ]+([-A-Z]*[-0-9]+)[ ]*([^{}]*)$/, \ fld)) { res = ( res sep \ " type = techreport\n" \ " number = {" fld[2] "}\n" \ " necinstdate = {" fld[3] "}" \ ); sep = "\n"; lin = ""; } # Yank out page range: if (match(lin, \ /^([^{}]*)(pages|pp[.]*) *([0-9]+)[-]+([0-9]+)[ ]*([^{}]*)$/, \ fld)) { res = ( res sep " pages = {" fld[3] "--" fld[4] "}" ); sep = "\n"; lin = ( fld[1] ", @PGS@ , " fld[5] ); lin = remove_extra_punctuation(lin); } else if (match(lin, \ /^([^{}]*)[ ]+([0-9]+)[-][-]([0-9]+)[ ]*([^{}]*)$/, \ fld)) { res = ( res sep " pages = {" fld[2] "--" fld[3] "}" ); sep = "\n"; lin = ( fld[1] ", @PGS@ " fld[4] ); lin = remove_extra_punctuation(lin); } else if (match(lin, \ /^([^{}]*)[ ]+([0-9]+)[-]([0-9]+)[ ]*$/, \ fld)) { res = ( res sep " pages = {" fld[2] "--" fld[3] "}" ); sep = "\n"; lin = ( fld[1] ", @PGS@ " ); lin = remove_extra_punctuation(lin); } # No internal delimiters, assume it is a journal/proceedings if (match(lin, \ /^[ .,;:]*([^.,;:(){}]*)[ .,;:]*$/, \ fld)) { res = ( res sep " necjournal = {" fld[1] "}" ); sep = "\n"; lin = ""; } # First phrase delimited by comma or semicolon is probably journal name: if (match(lin, \ /^([^,;{}]*[a-zA-Z][.() ]*)[,;][ ]*([^{}]*)$/, \ fld)) { res = ( res sep \ " necjournal = {" fld[1] "}\n" \ " necvolpagedate = {" fld[2] "}" \ ); sep = "\n"; lin = ""; } # Remove markers fof removed fields: gsub(/[@][A-Z]+[@]/, " ", lin); lin = remove_extra_punctuation(lin); # Give up: if (lin != "") { res = ( res sep " necwhere = {" lin "}" ); sep = "\n"; lin = ""; } return res; } function remove_extra_punctuation(lin) { # Remove multiple punctuation, leave only the strongest: # However, leave a comma after a period: gsub(/[ ,;:]*[.][ ,;:.]*[.]/, ". ", lin); gsub(/[ ,;:]*[.][ ,;:]*[:][ ,;]*/, ".: ", lin); gsub(/[ ,;:]*[.][ ,;]*[;][ ,]*/, ".; ", lin); gsub(/[ ,;:]*[.][ ,]*[,][ ]*/, "., ", lin); gsub(/[ ,;:]*[.][ ]*/, ". ", lin); gsub(/[ ,;]*[:][ ,;:]*/, ": ", lin); gsub(/[ ,]*[;][ ,;]*/, "; ", lin); gsub(/[ ]*[,][ ,]*/, ", ", lin); # Remove puncts after bol, open brace, parenthesis: lin = gensub(/(^|[{(])[- ,;:.]+/, "\\1", "g", lin); # Remove puncts (except ".") before close brace, parenthesis, eol: lin = gensub(/[- ,;:]+([)}]|$)/, "\\1", "g", lin); return lin; } function fix_isi_journal_name(lin, tmp) { if (match(lin,/{LECT NOTES ARTIF INT}/)) { # Things in LNAI are usually proceedings lin = \ ( " booktitle = {??}\n" \ " series = {Lecture Notes in Artificial Intelligence}\n" \ " publisher = {Springer}" \ ); } else if (match(lin,/{LECT NOTES COMPUT SC}/)) { # Things in LNCS are usually proceedings lin = \ ( " booktitle = {??}\n" \ " series = {Lecture Notes in Computer Science}\n" \ " publisher = {Springer}" \ ); } else { tmp = lin; # Map journal names sub(/{ACM COMPUT SURV}/, "{ACM Computing Surveys}", tmp); sub(/{ACM SIGPLAN NOTICES}/, "{ACM SIGPLAN Notices}", tmp); sub(/{ACM T GRAPHIC}/, "{ACM Transactions on Graphics}", tmp); sub(/{ACM T MATH SOFTWARE}/, "{ACM Transactions on Mathematical Software}", tmp); sub(/{ACTA INFORM}/, "{Acta Informatica}", tmp); sub(/{ALGORITHMICA}/, "{Algorithmica}", tmp); sub(/{ANN OPER RES}/, "{Annals of Operations Research}", tmp); sub(/{ARS COMBINATORIA}/, "{Ars Combinatoria}", tmp); sub(/{ARTIF INTELL}/, "{Artificial Intelligence}", tmp); sub(/{ASTRON ASTROPHYS}/, "{Astronomy and Astrophysics}", tmp); sub(/{BIT}/, "{BIT}", tmp); sub(/{COMP GEOM-THEOR APPL}/, "{Cumputational Geometry - Theory and Applications}", tmp); sub(/{COMPUT AIDED DESIGN}/, "{Computer Aided Design}", tmp); sub(/{COMPUT AIDED GEOM D}/, "{Computer Aided Geometric Design}", tmp); sub(/{COMPUT GRAPH FORUM}/, "{Computer Graphics Forum}", tmp); sub(/{COMPUT GRAPH}/, "{Computers {\\&} Graphics}", tmp); sub(/{COMPUT VIS IMAGE UND}/, "{Computer Vision and Image Understanding}", tmp); sub(/{COMPUT VISION GRAPH}/, "{Computer Vision and Graphics}", tmp); sub(/{DISCRETE COMPUT GEOM}/, "{Discrete and Computational Geometry}", tmp); sub(/{IEEE COMPUT GRAPH}/, "{IEEE Computer Graphics}", tmp); sub(/{IEEE T CIRCUITS-I}/, "{IEEE Trans. on Circuits - I}", tmp); sub(/{IEEE T COMPUT AID D}/, "{IEEE Trans. on Computer Aided Design}", tmp); sub(/{IEEE T COMPUT}/, "{IEEE Trans. on Computers}", tmp); sub(/{IEEE T EDUC}/, "{IEEE Trans. on Education}", tmp); sub(/{IEEE T NEURAL NETWOR}/, "{IEEE Trans. on Neural Networks}", tmp); sub(/{IEEE T PATTERN ANAL}/, "{IEEE Trans. on Pattern Analysis and Machine Intelligence}", tmp); sub(/{INFORM PROCESS LETT}/, "{Information Processing Letters}", tmp); sub(/{INT J COMPUT GEOM AP}/, "{Int. J. of Computational Geometry and Applications}", tmp); sub(/{INT J ROBOT RES}/, "{Int. J. of Robot Research}", tmp); sub(/{J ACM}/, "{J. of the ACM}", tmp); sub(/{J ALGORITHM}/, "{J. of Algorithms}", tmp); sub(/{J APPROX THEORY}/, "{J. of Approximation Theory}", tmp); sub(/{J COMPUT PHYS}/, "{J. of Computational Physics}", tmp); sub(/{J GRAPH THEOR}/, "{J. of Graph Theory}", tmp); sub(/{J SYMB COMPUT}/, "{J. of Symbolic Computation}", tmp); sub(/{KYBERNETES}/, "{Kybernetes}", tmp); sub(/{Neural Networks}/, "{Neural Networks}", tmp); sub(/{NUCL INSTRUM METH A}/, "{Nuclear Instrumentation Methods - A}", tmp); sub(/{OPER RES}/, "{Operations Research}", tmp); sub(/{P IEEE}/, "{Proceedings of the IEEE}", tmp); sub(/{PATTERN RECOGN LETT}/, "{Pattern Recognition Letters}", tmp); sub(/{PATTERN RECOGN}/, "{Pattern Recognition}", tmp); sub(/{SIAM J COMPUT}/, "{SIAM J. on Computing}", tmp); sub(/{SIAM J NUMER ANAL}/, "{SIAM J. on Numerical Analysis}", tmp); sub(/{THEOR COMPUT SCI}/, "{Theoretical Computer Science}", tmp); sub(/{VISUAL COMPUT}/, "{Visual Computer}", tmp); # To be checked: # ? sub(/{ACM T PROGR LANG SYS}/, "{ACM Transactions on Programming Languages and Systems}", tmp); # ? sub(/{ACTA APPL MATH}/, "{}", tmp); # ? sub(/{ADV ENG SOFTW}/, "{Advances in Engineering Software}", tmp); # ? sub(/{ADV IMAG ELECT PHYS}/, "{Advances in Imaging and Electron Physics}", tmp); # ? sub(/{ADV MATH}/, "{Advances in Mathematics}", tmp); # ? sub(/{ANNU REV COMPUT SCI}/, "{Annual Review of Computer Science}", tmp); # ? sub(/{APPL MATH LETT}/, "{Applied Mathematics Letters}", tmp); # ? sub(/{CELL PROLIFERAT}/, "{Cell Proliferation}", tmp); # ? sub(/{CEREB CORTEX}/, "{Cerebral Cortex}", tmp); # ? sub(/{COMMUN APPL NUMER M}/, "{Communications in Applied Numerical Methods}", tmp); # ? sub(/{COMPUT BIOMED RES}/, "{Computers and Biomedical Research}", tmp); # ? sub(/{COMPUT GEOSCI-UK}/, "{Computational Geosciences - UK}", tmp); # ? sub(/{COMPUT GEOSCI}/, "{Computational Geosciences}", tmp); # ? sub(/{COMPUT GRAPH-UK}/, "{Computers and Graphics - UK}", tmp); # ? sub(/{COMPUT GRAPH}/, "{Computer Graphics}", tmp); # ? sub(/{COMPUT IND}/, "{}", tmp); # ? sub(/{COMPUT MATH APPL}/, "{Computational Mathematics and Applications}", tmp); # ? sub(/{COMPUT NETWORKS ISDN}/, "{Computer Networks ISDN}", tmp); # ? sub(/{COMPUT STRUCT}/, "{Computer Structures}", tmp); # ? sub(/{COMPUTING}/, "{Computing}", tmp); # ? sub(/{CR ACAD SCI I-MATH}/, "{}", tmp); # ? sub(/{CVGIP-GRAPH MODEL IM}/, "{CVGIP - Graph Models in Image Processing}", tmp); # ? sub(/{DISCRETE APPL MATH}/, "{Discrete Applied Mathematics}", tmp); # ? sub(/{DISCRETE COMPUT GEOM}/, "{Discrete and Computational Geometry}", tmp); # ? sub(/{ECOL MODEL}/, "{Ecological Modeling}", tmp); # ? sub(/{ELECTROPHORESIS}/, "{Electrophoresis}", tmp); # ? sub(/{FRACTALS}/, "{Fractals}", tmp); # ? sub(/{FUNCT ANAL APPL+}/, "{Functional Analysis and Applications}", tmp); # ? sub(/{FUZZY SET SYST}/, "{Fuzzy Set Theory}", tmp); # ? sub(/{GRAPH MODEL IM PROC}/, "{Graph Models in Image Processing}", tmp); # ? sub(/{GRAPH MODELS}/, "{Graph Models}", tmp); # ? sub(/{IEEE T CIRC SYST VID}/, "{IEEE Trans. on Circuits and Systems - Video}", tmp); # ? sub(/{IEEE T DIELECT EL IN}/, "{IEEE Trans. on Dielectrics and Electrical Insulation}", tmp); # ? sub(/{IEEE T GEOSCI REMOTE}/, "{IEEE Trans. on Geoscience and Remote Sensing}", tmp); # ? sub(/{IEEE T PATTERN ANAL}/, "{IEEE Trans. on Pattern Analysis and Machine Intelligence}", tmp); # ? sub(/{IEEE T ROBOTIC AUTOM}/, "{IEEE Trans. on Robotics and Automation}", tmp); # ? sub(/{IEEE T VIS COMPUT GR}/, "{IEEE Trans. on Visualization and Computer Graphics}", tmp); # ? sub(/{IEICE T FUND ELECTR}/, "{IEICE Transactions on Fundamental Electronics}", tmp); # ? sub(/{IEICE T INF SYST}/, "{IEICE Transactions on Information Systems}", tmp); # ? sub(/{IETE TECH REV}/, "{IETE Technical Review}", tmp); # ? sub(/{INT J BIFURCAT CHAOS}/, "{Int. J. on Bifurcation and Chaos}", tmp); # ? sub(/{INT J COMPUT VISION}/, "{Int. J. of Computer Vision}", tmp); # ? sub(/{INT J GEOGR INF SCI}/, "{Int. J. on Geographical Information Science}", tmp); # ? sub(/{INT J NUMER METH ENG}/, "{Int. J. of Numerical Methods in Engineering}", tmp); # ? sub(/{INT J ROBUST NONLIN}/, "{Int. J. of Robust Nonlinear Systems}", tmp); # ? sub(/{ISPRS J PHOTOGRAMM}/, "{ISPRS J. of Photogrammetry}", tmp); # ? sub(/{J CHEM INF COMP SCI}/, "{J. of Chemical Information and Computation Science}", tmp); # ? sub(/{J COMPUT SYST SCI}/, "{J. of Computer and Systems Science}", tmp); # ? sub(/{J EXP THEOR ARTIF IN}/, "{J. of Experimental and Thoretical Artifical Intelligence}", tmp); # ? sub(/{J FUNCT PROGRAM}/, "{J. of Functional Programming}", tmp); # ? sub(/{J GUID CONTROL DYNAM}/, "{J. of Guidance and Control Dynamics}", tmp); # ? sub(/{J MATER PROCESS TECH}/, "{J. of Material Processing Techniques}", tmp); # ? sub(/{J NEUROSCI}/, "{J. of Neuroscience}", tmp); # ? sub(/{J SURV ENG-ASCE}/, "{}", tmp); # ? sub(/{J VIS COMMUN IMAGE R}/, "{J. of Visual Communication and Imaging Research}", tmp); # ? sub(/{J VISUAL COMP ANIMAT}/, "{J. of Visualization and Computing Animation}", tmp); # ? sub(/{J WUHAN UNIV TECHNOL}/, "{J. of the Wuhan Univ. of Technology}", tmp); # ? sub(/{MACH VISION APPL}/, "{Machine Vision and Applications}", tmp); # ? sub(/{MATER STRUCT}/, "{Material Structure}", tmp); # ? sub(/{MATH GEOL}/, "{Mathematical Geology}", tmp); # ? sub(/{NETWORKS}/, "{Networks}", tmp); # ? sub(/{NEURAL NETWORKS}/, "{Neural Networks}", tmp); # ? sub(/{P I MECH ENG B-J ENG}/, "{Proc. of the Inst. of Mechanical Engineers B - J. Engineering}", tmp); # ? sub(/{PHILOS MAG B}/, "{Philosophical Magazine - B}", tmp); # ? sub(/{PHILOS T ROY SOC B}/, "{Philosophical Trans. of the Royal Society - B}", tmp); # ? sub(/{PROG NAT SCI}/, "{Progress in Natural Sciences}", tmp); # ? sub(/{Physica D}/, "{Physica - D}", tmp); # ? sub(/{RAIRO-AUTOM PROD INF}/, "{RAIRO - Automation et Prod. Informatique}", tmp); # ? sub(/{RAIRO-INF THEOR APPL}/, "{RAIRO - Informatique Theorique et Appliquée}", tmp); # ? sub(/{RAPID PROTOTYPING J}/, "{Rapid Prototyping J.}", tmp); # ? sub(/{REAL-TIME IMAGING}/, "{Real-Time Imaging}", tmp); # ? sub(/{SIAM J SCI STAT COMP}/, "{SIAM J. on Scific Statistics and Computing}", tmp); # ? sub(/{SIGNAL PROCESS-IMAGE}/, "{Signal Processing - Image}", tmp); # ? sub(/{STAT COMPUT}/, "{Statistical Computing}", tmp); # ? sub(/{VISION RES}/, "{Vision Research}", tmp); if (tmp != lin) { sub(/ isijournal[ ]*[=][ ]*/, " journal = ", tmp); lin = tmp; } } return lin; } function split_isi_pagedate(lin, imo,res,fld,sep) { # Splits "isipagedate" field into "month", "year", and "pages". # Strip initial "[ ]*isipagedate = {" and final "}": gsub(/^[ ]*isipagedate[ ]*[=][ ]*{[ ]*/, "", lin); gsub(/[ ]*}[ ]*$/, "", lin); # Fix ISI notation for unknown final page: lin = gensub(/[-][&][ ]/, "-?? ", "g", lin); # Prepare for parsing: split("", fld); res = ""; sep = ""; # Variant with pages, month, day, and year: if (match(lin, \ /^([A-Z]?[0-9?]+)[-]([A-Z]?[0-9?]+) +([JFMASOND][A-Z][A-Z]) +([0-3]?[0-9]) +([12][089][0-9][0-9])$/, \ fld)) { res = ( res sep \ " day = {" fld[4] "}\n" \ " month = " convert_isi_month(fld[3]) "\n" \ " year = {" fld[5] "}\n" \ " pages = {" fld[1] "--" fld[2] "}" \ ); sep = "\n"; return res; } # Variant with pages, month, year: if (match(lin, \ /^([A-Z]?[0-9?]+)[-]([A-Z]?[0-9?]+) +([JFMASOND][A-Z][A-Z]) +([12][089][0-9][0-9])$/, \ fld)) { res = ( res sep \ " month = " convert_isi_month(fld[3]) "\n" \ " year = {" fld[4] "}\n" \ " pages = {" fld[1] "--" fld[2] "}" \ ); sep = "\n"; return res; } # Variant with pages, month range, year: if (match(lin, \ /^([A-Z]?[0-9?]+)[-]([A-Z]?[0-9?]+) +([JFMASOND][A-Z][A-Z][-][JFMASOND][A-Z][A-Z]) +([12][089][0-9][0-9])$/, \ fld)) { gsub(/[-]/, "--", fld[3]); res = ( res sep \ " month = " convert_isi_month(fld[3]) "\n" \ " year = {" fld[4] "}\n" \ " pages = {" fld[1] "--" fld[2] "}" \ ); sep = "\n"; return res; } # Variant with pages and year: if (match(lin, \ /^([A-Z]?[0-9?]+)[-]([A-Z]?[0-9?]+) +([12][089][0-9][0-9])$/, \ fld)) { res = ( res sep \ " year = {" fld[3] "}\n" \ " pages = {" fld[1] "--" fld[2] "}" \ ); sep = "\n"; return res; } # Variant with month, day, and year: if (match(lin, \ /^([JFMASOND][A-Z][A-Z]) +([0-3]?[0-9]) +([12][089][0-9][0-9])$/, \ fld)) { res = ( res sep \ " day = {" fld[2] "}\n" \ " month = " convert_isi_month(fld[1]) "\n" \ " year = {" fld[3] "}" \ ); sep = "\n"; return res; } # Variant with month and year: if (match(lin, \ /^([JFMASOND][A-Z][A-Z]) +([12][089][0-9][0-9])$/, \ fld)) { res = ( res sep \ " month = " convert_isi_month(fld[1]) "\n" \ " year = {" fld[2] "}" \ ); sep = "\n"; return res; } # Variant with month range and year: if (match(lin, \ /^([JFMASOND][A-Z][A-Z][-][JFMASOND][A-Z][A-Z]) +([12][089][0-9][0-9])$/, \ fld)) { gsub(/[-]/, "--", fld[1]); res = ( res sep \ " month = {" convert_isi_month(fld[1]) "}\n" \ " year = {" fld[2] "}" \ ); sep = "\n"; return res; } # Variant with year only: if (match(lin, \ /^([12][089][0-9][0-9])$/, \ fld)) { res = ( res sep \ " year = {" fld[1] "}" \ ); sep = "\n"; return res; } # Give up: if (lin != "") { res = ( res sep " isipagedate = {" lin "}" ); sep = "\n"; lin = ""; } return res; } function convert_isi_month(m) { # Converts month from ISI format to Bibtex format: return tolower(m); } function data_warning(msg) { printf "%s:%d: ++ Warning: %s\n", FILENAME, FNR, msg > "/dev/stderr"; printf " $0 = \"%s\"\n", $0 > "/dev/stderr"; } function data_error(msg) { printf "%s:%d: ** %s\n", FILENAME, FNR, msg > "/dev/stderr"; printf " $0 = \"%s\"\n", $0 > "/dev/stderr"; abort = -1; exit abort; }