#! /usr/bin/python -t # _*_ coding: iso-8859-1 _*_ # Last edited on 2009-07-07 23:14:23 by stolfi MODULE_NAME = "mformula_parse" MODULE_DESC = "Functions to parse a molecular formula (an {MFormula}) from stdin" MODULE_VERS = "1.0" import sys import math from math import sqrt,sin,cos # sys.path[1:0] = [ sys.path[0] + '/../lib', os.path.expandvars('${STOLFIHOME}/lib'), '.' ] # sys.stderr.write("%s.py: path = %r\n" % (MODULE_NAME, sys.path)); import mformula MODULE_COPYRIGHT = "Copyright © 2009-07-06 by the State University of Campinas (UNICAMP)" MODULE_INFO = \ "NAME\n" \ " " + MODULE_NAME + " - " + MODULE_DESC + ".\n" \ "\n" \ "DESCRIPTION\n" \ " " +MODULE_DESC+ ".\n" \ "AUTHOR\n" \ " Created 2009-07-06 by Jorge Stolfi, IC-UNICAMP.\n" \ "\n" \ "MODIFICATION HISTORY\n" \ " 2009-07-06 by J. Stolfi, IC-UNICAMP: created.\n" \ "\n" \ "WARRANTY\n" \ " " +argparser.help_info_NO_WARRANTY+ "\n" \ "\n" \ "RIGHTS\n" \ " " +MODULE_COPYRIGHT+ ".\n" \ "\n" \ " " +argparser.help_info_STANDARD_RIGHTS def parse_input() : "Parses the input file\n" \ "\n" \ " Returns a {MFormula} instance {fm}, where" \ " {fm.err} is the parsing error messages (a string or {None})." "\n" \ "INPUT FILE FORMAT\n" \ " The input file describes the molecular diagra, as follows. First, some general parameters:\n" \ "\n" \ " fontheight = {FONT_HEIGHT_PX}\n" \ " atomradius = {ATOM_RADIUS_PX}\n" \ " bondlength = {BOND_LENGTH_PX}\n" \ "\n" \ " Then, the number of chemical species (element) that will be used:\n" \ "\n" \ " nelems = {NUM_CHEMICAL_SPECIES}\n" \ "\n" \ " Then, one line for each chemical species, in the format:\n" \ "\n" \ " {IORD} {ELEM_SYMBOL} {RADIUS} {COLOR_R} {COLOR_G} {COLOR_B} {SHOW_SYMBOL}\n" \ "\n" \ " Then, the number of atoms in the formula:\n" \ "\n" \ " natoms = {NUM_ATOMS}\n" \ "\n" \ " Then, one line for each atom:\n" \ "\n" \ " {IORD} {ELEM_SYMBOL} {CTR_X} {CTR_Y}\n" \ "\n" \ " These lines are followed by the number of chemical bonds in the formula:\n" \ "\n" \ " nbonds = {NUM_BONDS}\n" \ "\n" \ " Then, one line for each chemical bond:\n" \ "\n" \ " {IORD} {ATOM_1_INDEX} {ATOM_2_INDEX} {VALENCY}\n" \ "\n" \ " The {RADIUS} will be multiplied by {ATOM_RADIUS_PX} to get" \ " the actual atom radius. The {SHOW_SYMBOL} is a flag (0 or 1).\n" \ "\n" \ " The coordintes {CTR_X,CTR_Y} will be multiplied by the {BOND_LENGTH_PX}" \ " parameter. The origin is assumed to be at the LOWER left, and the Y" \ " axis points up.\n" \ "\n" \ " The {VALENCY} is 1, 2, or 3 for covalent bonds (solid lines), and 0.5, 1.5, or" \ " 2.5 for weak or aromatic bonds (the '.5' stands for a dashed line)." fm = MFormula(); PAR = re.compile(r"^[ ]*(\S+)[ ]*[=][ ]*(\S.*)$"); lin = sys.stdin.readline(); while (lin != '') and (not fm.err) : lin = strip_comment(lin); sys.stderr.write("| %s\n" % lin); if lin != '': mt = PAR.match(lin); if mt : tag = mt.group(1); val = mt.group(2); # sys.stderr.write(" %s =" % tag); # sys.stderr.write(" %s\n" % val); if tag == 'fontheight' : fm.fontheight = float(val); elif tag == 'atomradius' : fm.atomradius = float(val); elif tag == 'bondlength' : fm.bondlength = float(val); elif tag == 'linewidth' : fm.linewidth = float(val); elif tag == 'ringwidth' : fm.ringwidth = float(val); elif tag == 'nelems': fm.nelems = int(val); parse_input_elems(fm); elif tag == 'natoms': fm.natoms = int(val); parse_input_atoms(fm); elif tag == 'nbonds': fm.nbonds = int(val); parse_input_bonds(fm); else : fm.err = "unrecognized parameter " + tag; lin = sys.stdin.readline(); if (not fm.fontheight) : fm.err = "missing 'fontheight =' "; if (not fm.bondlength) : fm.err = "missing 'bondlength =' "; if (not fm.atomradius) : fm.err = "missing 'atomradius =' "; if (not fm.linewidth) : fm.err = "missing 'linewidth =' "; if (not fm.ringwidth) : fm.err = "missing 'ringwidth =' "; if (not fm.nelems) : fm.err = "missing 'nelems =' "; if (not fm.natoms) : fm.err = "missing 'natoms =' "; if (not fm.nbonds) : fm.err = "missing 'nbonds =' "; if fm.err : data_error(fm.err); return fm; #---------------------------------------------------------------------- def parse_input_elems(fm) : DAT = re.compile(r"^[ ]*(\d+)[ ]*(\S+)[ ]*([.0-9]+)[ ]*([.0-9]+)[ ]*([.0-9]+)[ ]*([.0-9]+)[ ]*([.0-9]+)[ ]*$"); lin = sys.stdin.readline(); i = 0; while (lin != '') and (i < fm.nelems) and (not fm.err) : lin = strip_comment(lin); sys.stderr.write("| %s\n" % lin); if lin != '': mt = DAT.match(lin); if mt : iord = mt.group(1); if int(iord) != i : fm.err = "elem data line sequence error: " + iord + " != " + ifm(i); break; fm.elem_symbol[i:i] = [ mt.group(2) ]; fm.elem_radius[i:i] = [ float(mt.group(3)) ]; fm.elem_color[i:i] = [ [ float(mt.group(4)), float(mt.group(5)), float(mt.group(6)) ] ]; fm.elem_show_symbol[i:i] = [ int(mt.group(7)) ]; i = i + 1; else : fm.err = "malformed elem data line «" + lin + "»"; lin = sys.stdin.readline(); if i != fm.nelems : fm.err = "not enough elems"; #---------------------------------------------------------------------- def parse_input_atoms(fm) : DAT = re.compile(r"^[ ]*(\d+)[ ]*(\S+)[ ]*([-+.0-9]+)[ ]*([-+.0-9]+)[ ]*$"); lin = sys.stdin.readline(); i = 0; while (lin != '') and (i < fm.natoms) and (not fm.err) : lin = strip_comment(lin); sys.stderr.write("| %s\n" % lin); if lin != '': mt = DAT.match(lin); if mt : iord = mt.group(1); if int(iord) != i : fm.err = "atom data line sequence error: " + iord + " != " + ifm(i); break; fm.atom_symbol[i:i] = [ mt.group(2) ]; fm.atom_center[i:i] = [ [ float(mt.group(3)), float(mt.group(4)) ] ]; i = i + 1; else : fm.err = "malformed atom data line «" + lin + "»"; lin = sys.stdin.readline(); if i != fm.natoms : fm.err = "not enough atoms"; #---------------------------------------------------------------------- def parse_input_bonds(fm) : DAT = re.compile(r"^[ ]*(\d+)[ ]*([0-9]+)[ ]*([0-9]+)[ ]*([.0-9]+)[ ]*$"); lin = sys.stdin.readline(); i = 0; while (lin != '') and (i < fm.nbonds) and (not fm.err) : lin = strip_comment(lin); sys.stderr.write("| %s\n" % lin); if lin != '': mt = DAT.match(lin); if mt : iord = mt.group(1); if int(iord) != i : fm.err = "bond data line sequence error: " + iord + " != " + ifm(i); break; fm.bond_atom_1_index[i:i] = [ int(mt.group(2)) ]; fm.bond_atom_2_index[i:i] = [ int(mt.group(3)) ]; fm.bond_valency[i:i] = [ float(mt.group(4)) ]; i = i + 1; else : fm.err = "malformed bond data line «" + lin + "»"; lin = sys.stdin.readline(); if i != fm.nbonds : fm.err = "not enough bonds"; #---------------------------------------------------------------------- def strip_comment(lin) : "Removes trailing blanks and trailing '#.*' if any from {lin}." lin = lin.rstrip(); lin = re.sub(r"^[ ]*([#].*|)$", "", lin, 1); return lin; #---------------------------------------------------------------------- def data_error(msg): "Prints the error message {msg} about the input data file, and aborts." sys.stderr.write("stdin:%d: %s\n" % (sys.stdin.tell(), msg)); sys.exit(1) #----------------------------------------------------------------------