#! /usr/bin/python -t 
# _*_ coding: iso-8859-1 _*_
# Last edited on 2009-07-07 23:14:23 by stolfi

MODULE_NAME = "mformula_parse"
MODULE_DESC = "Functions to parse a molecular formula (an {MFormula}) from stdin"
MODULE_VERS = "1.0"

import sys
import math
from math import sqrt,sin,cos
# sys.path[1:0] = [ sys.path[0] + '/../lib', os.path.expandvars('${STOLFIHOME}/lib'), '.' ] 
# sys.stderr.write("%s.py: path = %r\n" % (MODULE_NAME, sys.path));
import mformula

MODULE_COPYRIGHT = "Copyright Š 2009-07-06 by the State University of Campinas (UNICAMP)"

MODULE_INFO = \
  "NAME\n" \
  "  " + MODULE_NAME + " - " + MODULE_DESC + ".\n" \
  "\n" \
  "DESCRIPTION\n" \
  "  " +MODULE_DESC+ ".\n" \
  "AUTHOR\n" \
  "  Created 2009-07-06 by Jorge Stolfi, IC-UNICAMP.\n" \
  "\n" \
  "MODIFICATION HISTORY\n" \
  "  2009-07-06 by J. Stolfi, IC-UNICAMP: created.\n" \
  "\n" \
  "WARRANTY\n" \
  "  " +argparser.help_info_NO_WARRANTY+ "\n" \
  "\n" \
  "RIGHTS\n" \
  "  " +MODULE_COPYRIGHT+ ".\n" \
  "\n" \
  "  " +argparser.help_info_STANDARD_RIGHTS

def parse_input() :
  "Parses the input file\n" \
  "\n" \
  "  Returns a {MFormula} instance {fm}, where" \
  " {fm.err} is the parsing error messages (a string or {None})."
  "\n" \
  "INPUT FILE FORMAT\n" \
  "  The input file describes the molecular diagra, as follows. First, some general parameters:\n" \
  "\n" \
  "    fontheight = {FONT_HEIGHT_PX}\n" \
  "    atomradius = {ATOM_RADIUS_PX}\n" \
  "    bondlength = {BOND_LENGTH_PX}\n" \
  "\n" \
  "  Then, the number of chemical species (element) that will be used:\n" \
  "\n" \
  "    nelems = {NUM_CHEMICAL_SPECIES}\n" \
  "\n" \
  "  Then, one line for each chemical species, in the format:\n" \
  "\n" \
  "      {IORD} {ELEM_SYMBOL} {RADIUS} {COLOR_R} {COLOR_G} {COLOR_B} {SHOW_SYMBOL}\n" \
  "\n" \
  "  Then, the number of atoms in the formula:\n" \
  "\n" \
  "    natoms = {NUM_ATOMS}\n" \
  "\n" \
  "  Then, one line for each atom:\n" \
  "\n" \
  "      {IORD} {ELEM_SYMBOL} {CTR_X} {CTR_Y}\n" \
  "\n" \
  "  These lines are followed by the number of chemical bonds in the formula:\n" \
  "\n" \
  "    nbonds = {NUM_BONDS}\n" \
  "\n" \
  "  Then, one line for each chemical bond:\n" \
  "\n" \
  "      {IORD} {ATOM_1_INDEX} {ATOM_2_INDEX} {VALENCY}\n" \
  "\n" \
  "  The {RADIUS} will be multiplied by {ATOM_RADIUS_PX} to get" \
  " the actual atom radius. The {SHOW_SYMBOL} is a flag (0 or 1).\n" \
  "\n" \
  "  The coordintes {CTR_X,CTR_Y} will be multiplied by the {BOND_LENGTH_PX}" \
  " parameter.  The origin is assumed to be at the LOWER left, and the Y" \
  " axis points up.\n" \
  "\n" \
  "  The {VALENCY} is 1, 2, or 3 for covalent bonds (solid lines), and 0.5, 1.5, or" \
  " 2.5 for weak or aromatic bonds (the '.5' stands for a dashed line)."
  
  fm = MFormula();
  PAR = re.compile(r"^[ ]*(\S+)[ ]*[=][ ]*(\S.*)$");
  lin = sys.stdin.readline();
  while (lin != '') and (not fm.err) :
    lin = strip_comment(lin);
    sys.stderr.write("| %s\n" % lin);
    if lin != '':
      mt = PAR.match(lin);
      if mt :
        tag = mt.group(1);
        val = mt.group(2);
        # sys.stderr.write("  %s =" % tag);
        # sys.stderr.write(" %s\n" % val);
        if tag == 'fontheight' :
          fm.fontheight = float(val);
        elif tag == 'atomradius' :
          fm.atomradius = float(val);
        elif tag == 'bondlength' :
          fm.bondlength = float(val);
        elif tag == 'linewidth' :
          fm.linewidth = float(val);
        elif tag == 'ringwidth' :
          fm.ringwidth = float(val);
        elif tag == 'nelems':
          fm.nelems = int(val);
          parse_input_elems(fm);
        elif tag == 'natoms':
          fm.natoms = int(val);
          parse_input_atoms(fm);
        elif tag == 'nbonds':
          fm.nbonds = int(val);
          parse_input_bonds(fm);
        else :
          fm.err = "unrecognized parameter " + tag;
    lin = sys.stdin.readline();
  if (not fm.fontheight) :
    fm.err = "missing 'fontheight =' ";
  if (not fm.bondlength) :
    fm.err = "missing 'bondlength =' ";
  if (not fm.atomradius) :
    fm.err = "missing 'atomradius =' ";
  if (not fm.linewidth) :
    fm.err = "missing 'linewidth =' ";
  if (not fm.ringwidth) :
    fm.err = "missing 'ringwidth =' ";
  if (not fm.nelems) :
    fm.err = "missing 'nelems =' ";
  if (not fm.natoms) :
    fm.err = "missing 'natoms =' ";
  if (not fm.nbonds) :
    fm.err = "missing 'nbonds =' ";
  if fm.err :
    data_error(fm.err);
  return fm;
  #----------------------------------------------------------------------
  
def parse_input_elems(fm) :
  DAT = re.compile(r"^[ ]*(\d+)[ ]*(\S+)[ ]*([.0-9]+)[ ]*([.0-9]+)[ ]*([.0-9]+)[ ]*([.0-9]+)[ ]*([.0-9]+)[ ]*$");
  lin = sys.stdin.readline();
  i = 0;
  while (lin != '') and (i < fm.nelems) and (not fm.err) :
    lin = strip_comment(lin);
    sys.stderr.write("| %s\n" % lin);
    if lin != '':
      mt = DAT.match(lin);
      if mt :
        iord = mt.group(1);
        if int(iord) != i :
          fm.err = "elem data line sequence error: " + iord + " != " + ifm(i);
          break;
        fm.elem_symbol[i:i] = [ mt.group(2) ];
        fm.elem_radius[i:i] = [ float(mt.group(3)) ];
        fm.elem_color[i:i] = [ [ float(mt.group(4)), float(mt.group(5)), float(mt.group(6)) ] ];
        fm.elem_show_symbol[i:i] = [ int(mt.group(7)) ];
        i = i + 1;
      else :
        fm.err = "malformed elem data line Ť" + lin + "ť";
    lin = sys.stdin.readline();
  if i != fm.nelems :
    fm.err = "not enough elems";
  #----------------------------------------------------------------------
  
def parse_input_atoms(fm) :
  DAT = re.compile(r"^[ ]*(\d+)[ ]*(\S+)[ ]*([-+.0-9]+)[ ]*([-+.0-9]+)[ ]*$");
  lin = sys.stdin.readline();
  i = 0;
  while (lin != '') and (i < fm.natoms) and (not fm.err) :
    lin = strip_comment(lin);
    sys.stderr.write("| %s\n" % lin);
    if lin != '':
      mt = DAT.match(lin);
      if mt :
        iord = mt.group(1);
        if int(iord) != i :
          fm.err = "atom data line sequence error: " + iord + " != " + ifm(i);
          break;
        fm.atom_symbol[i:i] = [ mt.group(2) ];
        fm.atom_center[i:i] = [ [ float(mt.group(3)), float(mt.group(4)) ] ];
        i = i + 1;
      else :
        fm.err = "malformed atom data line Ť" + lin + "ť";
    lin = sys.stdin.readline();
  if i != fm.natoms :
    fm.err = "not enough atoms";
  #----------------------------------------------------------------------
  
def parse_input_bonds(fm) :
  DAT = re.compile(r"^[ ]*(\d+)[ ]*([0-9]+)[ ]*([0-9]+)[ ]*([.0-9]+)[ ]*$");
  lin = sys.stdin.readline();
  i = 0;
  while (lin != '') and (i < fm.nbonds) and (not fm.err) :
    lin = strip_comment(lin);
    sys.stderr.write("| %s\n" % lin);
    if lin != '':
      mt = DAT.match(lin);
      if mt :
        iord = mt.group(1);
        if int(iord) != i :
          fm.err = "bond data line sequence error: " + iord + " != " + ifm(i);
          break;
        fm.bond_atom_1_index[i:i] = [ int(mt.group(2)) ];
        fm.bond_atom_2_index[i:i] = [ int(mt.group(3)) ];
        fm.bond_valency[i:i] = [ float(mt.group(4)) ];
        i = i + 1;
      else :
        fm.err = "malformed bond data line Ť" + lin + "ť";
    lin = sys.stdin.readline();
  if i != fm.nbonds :
    fm.err = "not enough bonds";
  #----------------------------------------------------------------------
    
def strip_comment(lin) :
  "Removes trailing blanks and trailing '#.*' if any from {lin}."
  lin = lin.rstrip();
  lin = re.sub(r"^[ ]*([#].*|)$", "", lin, 1);
  return lin;
  #----------------------------------------------------------------------

def data_error(msg):
  "Prints the error message {msg} about the input data file, and aborts."
  sys.stderr.write("stdin:%d: %s\n" % (sys.stdin.tell(), msg));
  sys.exit(1)
  #----------------------------------------------------------------------