#!/usr/bin/env python


# EDITABLE SECTIONS ARE MARKED WITH #@# 


version="1.1"
authors=["Tsjerk A. Wassenaar","Djurre de Jong", "Clement Arnarez"]

desc = ""

notes = [
    ("TAW110929","Modified Option class to handle boolean options nicely"),
    ("TAW110929","Modified position restraints to allow arbitrary atom name selections"),
    ("TAW110929","Enabled merging chains (option -merge)"),
    ("TAW110930","Changed order of fields in CG atom representation (name,resn,chain,resi)->(name,resn,resi,chain)"),
    ("TAW110930","Enabled cystine bridges, including automatic detection"),
    ("TAW111007","Added stuff to support multiscaling"),
    ("TAW111007","Added writing of an index file for multiscaling"),
    ("TAW120215","Started rearranging things and added documentation to options"),    
    ("TAW120314","Enabled elastic networks"),
    ("TAW120314","Moved mapping stuff to class CoarseGrained - Class needs to be made into generic converter"),
    ("TAW120329","Added support for residue insertion codes in PDB files"),
    ]

# 
# This program has grown to be pretty complete and complex. 
# The routines have been organized in sections, which are 
# tagged to make jumping to a particular section easy.
#
# Index of this file:
#
#   1. Options and documentation                             @DOC
#   2. Description, options and command line parsing         @CMD
#   3. Helper functions and macros                           @FUNC
#   4. Finegrained to coarsegrained mapping                  @MAP
#   5. Secondary structure determination and interpretation  @SS
#   6. Force field parameters (MARTINI/ELNEDYN)              @FF
#   7. Elastic network                                       @ELN
#   8. Structure I/O                                         @IO
#   9. Topology generation                                   @TOP
#  10. Main                                                  @MAIN
#


import math, os, sys, string, itertools, re, logging, random
import subprocess as subp


###################################
## 1 # OPTIONS AND DOCUMENTATION ##  -> @DOC <-
###################################


# This is a simple and versatily option class that allows easy
# definition and parsing of options. 
class Option:
    def __init__(self,func=str,num=1,default=None,description=""):
        self.func        = func
        self.num         = num
        self.value       = default
        self.description = description
    def __nonzero__(self): 
        if self.func == bool:
            return self.value != False
        return bool(self.value)
    def __str__(self):
        return self.value and str(self.value) or ""
    def setvalue(self,v):
        if len(v) == 1:
            self.value = self.func(v[0])
        else:
            self.value = [ self.func(i) for i in v ]


# Lists for gathering arguments to options that can be specified 
# multiple times on the command line.
cystines = []
merges   = []
links    = []
multi    = []


# List of 
options = [
#   NOTE: Options marked with (+) can be given multiple times on the command line
#   option              type number default description
    """
Primary input/output
--------------------
The input file (-f) should be a coordinate file in PDB or GROMOS
format. The format is inferred from the structure of the file, also
allowing reading MEAD/GRASP (.pqr) files. The input can also be
provided through stdin, allowing piping of structures, e.g. using grep
or sed to make a selection of atoms. This can be useful for removing
HETATM records: 

grep -v '^HETATM' input.pdb | martinize.py

The input structure can have multiple frames/models. If an output
structure file (-x) is given, each frame will be coarsegrained,
resulting in a multimodel output structure. Having multiple frames may
also affect the topology. If secondary structure is determined
internally, the structure will be averaged over the frames. Likewise,
interatomic distances, as used for backbone bond lengths in Elnedyn
and in elastic networks, are also averaged over the frames available.

If an output file (-o) is indicated for the topology, that file will
be used for the master topology, using #include statements to link the
moleculetype definitions, which are written to separate files. If no
output filename is given, the topology and the moleculetype
definitions are written to stdout.
""",
    ("-f",        Option(str,             1,     None, "Input file (PDB|GRO)")),
    ("-o",        Option(str,             1,     None, "Output topology (TOP)")),
    ("-x",        Option(str,             1,     None, "Output coarse grained structure (PDB)")),
    ("-n",        Option(str,             1,     None, "Output index file")),
    ("-v",        Option(bool,            0,    False, "Verbose. Be load and noisy.")), 
    ("-h",        Option(bool,            0,    False, "Display this help.")),
    """
Secondary structure
-------------------
The secondary structure plays a central role in the assignment of atom
types and bonded interactions in MARTINI. Martinize allows
specification of the secondary structure as a string (-ss), or as a
file containing a specification in GROMACS' ssdump format
(-ss). Alternatively, DSSP can be used for an on-the-fly assignment of
the secondary structure. For this, the option -dssp has to be used
giving the location of the executable as the argument. 
The option -collagen will set the whole structure to collagen.  With
multimodel input files, the secondary structure as determined with
DSSP or PyMOL will be averaged over the frames. In this case, a cutoff
can be specified (-ssc) indicating the fraction of frames to match a
certain secondary structure type for designation.
""",
    ("-ss",       Option(str,             1,     None, "Secondary structure (File or string)")),
    ("-ssc",      Option(float,           1,      0.5, "Cutoff fraction for ss in case of ambiguity (default: 0.5).")),
    ("-dssp",     Option(str,             1,     None, "DSSP executable for determining structure")),
#    ("-pymol",    Option(str,             1,     None, "PyMOL executable for determining structure")),
    ("-collagen", Option(bool,            0,    False, "Use collagen parameters")),
    """
Topology
--------

Several options are available to tune the resulting topology. By
default, termini are charged, and chain breaks are kept neutral. This
behaviour can be changed using -nt and -cb, respectively.

Disulphide bridges can be specified using -cys. This option can be
given multiple times on the command line. The argument is a pair of
cysteine residues, using the format
chain/resn/resi,chain/resn/resi. For disulphide bridges, the residue
name is not required, and the chain identifier is optional. If no
chain identifier is given, all matching residue pairs will be checked,
and pairs within the cutoff distance (0.22 nm) will be linked. It is
also possible to let martinize detect cysteine pairs based on this
cut-off distance, by giving the keyword 'auto' as argument to -cys.
Alternatively, a different cut-off distance can be specified, which
will also trigger a search of pairs satisfying the distance
criterion.

In addition to cystine bridges, links between other atoms can be
specified using -link. This requires specification of the atoms, using
the format
chain/resn/resi/atom,chain/resi/resn/atom,bondlength,forceconstant.
If only two atoms are given, a constraint will be added with length
equal to the (average) distance in the coordinate file. If a bond
length is added, but no force constant, then the bondlength will be
used to set a constraint.

Linking atoms requires that the atoms are part of the same
moleculetype. Therefore any link between chains will cause the chains
to be merged. Merges can also be specified explicitly, using the
option -merge with a comma-separated list of chain identifiers to be
joined into one moleculetype. The option -merge can be used several
times. Note that specifying a chain in several merge groups will cause
all chains involved to be merged into a single moleculetype.

The moleculetype definitions are written to topology include (.itp)
files, using a name consisting of the molecule class (e.g. Protein)
and the chain identifier. With -name a name can be specified instead.
By default, martinize only writes a moleculetype for each unique
molecule, inferred from the sequence and the secondary structure
definition. It is possible to force writing a moleculetype definition
for every single molecule, using -sep.

The option -p can be used to write position restraints, using the 
force constant specified with -pf, which is set to 1000 kJ/mol 
by default.

For stability, elastic bonds are used to retain the structure of 
extended strands. The option -ed causes dihedrals to be used 
instead.
""",
    ("-nt",       Option(bool,            0,    False, "Set neutral termini (charged is default)")), 
    ("-cb",       Option(bool,            0,    False, "Set charges at chain breaks (neutral is default)")), 
    ("-cys",      Option(cystines.append, 1,     None, "Disulphide bond (+)")),
    ("-link",     Option(links.append,    1,     None, "Link (+)")),
    ("-merge",    Option(merges.append,   1,     None, "Merge chains: e.g. -merge A,B,C (+)")),
#    ("-mixed",    Option(bool,            0,    False, "Allow chains of mixed type (default: False)")),
    ("-name",     Option(str,             1,     None, "Moleculetype name")),
    ("-p",        Option(str,             1,   'None', "Output position restraints (None/All/Backbone) (default: None)")),
    ("-pf",       Option(float,           1,     1000, "Position restraints force constant (default: 1000 kJ/mol/nm^2)")),
    ("-ed",       Option(bool,            0,    False, "Use dihedrals for extended regions rather than elastic bonds)")),
    ("-sep",      Option(bool,            0,    False, "Write separate topologies for identical chains.")),
    """
Elastic network
---------------

Martinize can write an elastic network for atom pairs within a cutoff
distance according to the Elnedyn definition. The force constant (-ef)
and the upper distance bound (-eu) can be speficied.
""",
# Fij = Fc exp( -a (rij - lo)**p )
    ("-elastic",  Option(bool,            0,    False, "Write elastic bonds")),
    ("-elnedyn",  Option(bool,            0,    False, "Use Elnedyn mapping and parameters with elastic network")),
    ("-ef",       Option(float,           1,      500, "Elastic bond force constant Fc")),
    ("-el",       Option(float,           1,        0, "Elastic bond lower cutoff: F = Fc if rij < lo")),
    ("-eu",       Option(float,           1,     0.90, "Elastic bond upper cutoff: F = 0  if rij > up")),
    ("-ea",       Option(float,           1,        0, "Elastic bond decay factor a")),
    ("-ep",       Option(float,           1,        1, "Elastic bond decay power p")),
    ("-em",       Option(float,           1,        0, "Remove elastic bonds with force constant lower than this")),
    ("-eb",       Option(str,             1,     'BB', "Comma separated list of bead names for elastic bonds")),
#    ("-cgo",      Option(str,             1,     None, "PyMOL CGO file for elastic network visualization")), 
#    ("-hetatm",   Option(bool,            0,    False, "Include HETATM records from PDB file (Use with care!)")),
    """
Multiscaling
------------

Martinize can process a structure to yield a multiscale system,
consisting of a coordinate file with atomistic parts and
corresponding, overlaid coarsegrained parts. For chains that are
multiscaled, rather than writing a full moleculetype definition, 
additional [atoms] and [virtual_sitesn] sections are written, to 
be appended to the atomistic moleculetype definitions. 
The option -multi can be specified multiple times, and takes a chain
identifier as argument. Alternatively, the keyword 'all' can be given
as argument, causing all chains to be multiscaled.
""",
    ("-multi",    Option(multi.append,    1,     None, "Chain to be set up for multiscaling (+)")),
"========================================================================\n"
    ]


## Martini Quotes
martiniq = [
    ("Robert Benchley",
     "Why don't you get out of that wet coat and into a dry martini?"),
    ("James Thurber",
     "One martini is all right, two is two many, three is not enough"),
    ("Philip Larkin",
     "The chromatic scale is what you use to give the effect of drinking a quinine martini and having an enema simultaneously."),
    ("William Emerson, Jr.",
     "And when that first martini hits the liver like a silver bullet, there is a sigh of contentment that can be heard in Dubuque."),
    ("Alec Waugh",
     "I am prepared to believe that a dry martini slightly impairs the palate, but think what it does for the soul."),
    ("Gerald R. Ford",
     "The three-martini lunch is the epitome of American efficiency. Where else can you get an earful, a bellyful and a snootful at the same time?"),
    ("P. G. Wodehouse",
     "He was white and shaken, like a dry martini."),
    ]

  
##############################
## 2 # COMMAND LINE PARSING ##  -> @CMD <-
##############################


args = sys.argv[1:]


# Check whether there is a request for help
if '-h' in args or '--help' in args:
    print "\n",__file__
    print desc or "\nClement, you ought to write a description for this script...\n"
    for item in options:
        if type(item) == str:
            print item
    for item in options:
        if type(item) != str:
            print "%10s  %s"%(item[0],item[1].description)
    print
    sys.exit()


# Convert the option list to a dictionary, discarding all comments
options = dict([i for i in options if not type(i) == str])


# Process the command line
while args:
    ar = args.pop(0)
    options[ar].setvalue([args.pop(0) for i in range(options[ar].num)])


# Boolean options are set to more intuitive variables
Collagen            = options['-collagen']
ChargesAtBreaks     = options['-cb']
NeutralTermini      = options['-nt']
ExtendedDihedrals   = options['-ed']
RetainHETATM        = False # options['-hetatm']
SeparateTop         = options['-sep']
MixedChains         = False # options['-mixed']
ElasticNetwork      = options['-elastic']
Elnedyn             = options['-elnedyn']


# Parsing of some other options into variables
ElasticMaximumForce = options['-ef'].value 
ElasticMinimumForce = options['-em'].value
ElasticLowerBound   = options['-el'].value
ElasticUpperBound   = options['-eu'].value
ElasticDecayFactor  = options['-ea'].value
ElasticDecayPower   = options['-ep'].value
ElasticBeads        = options['-eb'].value.split(',')
PosResForce         = options['-pf'].value


PosRes              = [i.lower() for i in options['-p'].value.split(",")]
if "none"     in PosRes: PosRes = []
if "backbone" in PosRes: PosRes.append("BB")


if Elnedyn:
    #
    # The option "-elnedyn" is foremost the way to choose the bead mapping and
    # parameters for Elnedyn and will set an elastic network using the default 
    # (Elnedyn) parameters, *unless* the user wants to override them by specifying
    # additional elastic network options.
    #
    # A warning should be issued if an elastic network is used that is not 
    # strictly compliant with Elnedyn. 
    #
    ElasticNetwork  = True
    # Unless explicitly told not to, using Elnedyn will 
    # merge all chains into a single moleculetype to 
    # allow a global Elnedyn network.
    if "no" in merges:
        merges.remove("no")
    else:
        merges = ["all"]


# Merges, links and cystines
mergeList = "all" in merges and ["all"] or [i.split(",") for i in merges]


# Helper function to parse atom strings given on the command line:
#   resid
#   resname/resid
#   chain/resname/resid
#   resname/resid/atom
#   chain/resname/resid/atom
#   chain//resid
#   chain/resname/atom
def str2atom(a):
    a = a.split("/")   
    if len(a) == 1: # Only a residue number:
        return (None,None,int(a[0]),None)
    if len(a) == 2: # Residue name and number (CYS/123):
        return (None,a[0],int(a[1]),None)
    if len(a) == 3:
        if a[2].isdigit(): # Chain, residue name, residue number
            return (None,a[1],int(a[2]),a[0])
        else: # Residue name, residue number, atom name
            return (a[2],a[0],int(a[1]),None)
    return (a[3],a[1],int(a[2]),a[0])


# Process links
linkList   = []
linkListCG = []
for i in links:
    ln     = i.split(",")
    a, b   = str2atom(ln[0]), str2atom(ln[1])
    if len(ln) > 3: # Bond with given length and force constant
        bl, fc = (ln[2] and float(ln[2]) or None, float(ln[3]))
    elif len(a) == 3: # Constraint at given distance
        bl, fc = float(a[2]), None
    else: # Constraint at distance in structure
        bl, fc = None, None
    # Store the link, but do not list the atom name in the
    # atomistic link list. Otherwise it will not get noticed 
    # as a valid link when checking for merging chains
    linkList.append(((None,a[1],a[2],a[3]),(None,b[1],b[2],b[3])))
    linkListCG.append((a,b,bl,fc))


# Cystines -- This should be done for all special bonds listed in the _special_ dictionary
CystineCheckBonds = False   # By default, do not detect cystine bridges
CystineMaxDist2   = (10*0.22)**2 # Maximum distance (A) for detection of SS bonds
for i in cystines:
    if i.lower() == "auto":
        CystineCheckBonds = True
    elif i.replace(".","").isdigit():
        CystineCheckBonds = True
        CystineMaxDist2   = (10*float(i))**2
    else:
        # This item should be a pair of cysteines
        cysA, cysB = [str2atom(j) for j in i.split(",")]
        linkList.append((("SG","CYS",cysA[2],cysA[3]),("SG","CYS",cysB[2],cysB[3])))
        linkListCG.append((("SC1","CYS",cysA[2],cysA[3]),("SC1","CYS",cysB[2],cysB[3]),-1,-1))


## LOGGING ##

# Set the log level and communicate which options are set and what is happening

# If 'Verbose' is set, change the logger level
logLevel = options["-v"] and logging.DEBUG or logging.INFO
logging.basicConfig(format='%(levelname)-7s    %(message)s',level=logLevel)


logging.info("Chain termini will%s be charged"%(NeutralTermini and " not" or ""))


logging.info("Residues at chain brakes will%s be charged"%((not ChargesAtBreaks) and " not" or ""))


if ExtendedDihedrals:  
    logging.info('Dihedrals will be used for extended regions. (Elastic bonds may be more stable)')
else:                  
    logging.info('Local elastic bonds will be used for extended regions.')


if PosRes:
    logging.info("Position restraints will be generated.")
    logging.warning("Position restraints are only enabled if -DPOSRES is set in the MDP file")


if MixedChains:
    logging.warning("So far no parameters for mixed chains are available. This might crash the program!")


if RetainHETATM:
    logging.warning("I don't know how to handle HETATMs. This will probably crash the program.")


#################################################
## 3 # HELPER FUNCTIONS, CLASSES AND SHORTCUTS ##  -> @FUNC <-
#################################################


#----+------------------+
## A | STRING FUNCTIONS |
#----+------------------+

# Split a string                                                              
def spl(x):                                                                   
    return x.split()                                                          


# Split each argument in a list                                               
def nsplit(*x):                                                               
    return [i.split() for i in x]                                             


# Make a dictionary from two lists                                            
def hash(x,y):                                                                
    return dict(zip(x,y))                                                     


# Function to reformat pattern strings                                        
def pat(x,c="."):                                                             
    return x.replace(c,"\x00").split()                                        


# Function to generate formatted strings according to the argument type       
def formatString(i):                                                          
    if type(i) == str:                                                        
        return i                                                              
    if type(i) == int:                                                        
        return "%5d"%i                                                        
    if type(i) == float:                                                      
        return "%8.5f"%i                                                      
    else:                                                                     
        return str(i)                                                         


#----+----------------+
## B | MATH FUNCTIONS |
#----+----------------+


def cos_angle(a,b):
    p = sum([i*j for i,j in zip(a,b)])
    q = math.sqrt(sum([i*i for i in a])*sum([j*j for j in b]))
    return min(max(-1,p/q),1)


def norm2(a):
    return sum([i*i for i in a])


def norm(a):
    return math.sqrt(norm2(a))


def distance2(a,b):
    return (a[0]-b[0])**2+(a[1]-b[1])**2+(a[2]-b[2])**2


##########################
## 4 # FG -> CG MAPPING ##  -> @MAP <-
##########################


# Amino acid codes:                                                                                  
AA3     = spl("TRP TYR PHE HIS ARG LYS CYS ASP GLU ILE LEU MET ASN PRO HYP GLN SER THR VAL ALA GLY") #@#
AA1     = spl("  W   Y   F   H   R   K   C   D   E   I   L   M   N   P   O   Q   S   T   V   A   G") #@#


# Dictionaries for conversion from one letter code to three letter code v.v.                         
AA123, AA321 = hash(AA1,AA3),hash(AA3,AA1)                                                           


# Residue classes:
protein = AA3
water   = spl("HOH SOL TIP")
lipids  = spl("DPP DHP DLP DMP DSP POP DOP DAP DUP DPP DHP DLP DMP DSP PPC DSM DSD DSS")
nucleic = spl("DAD DCY DGU DTH ADE CYT GUA THY URA")


residueTypes = dict(
    [(i,"Protein") for i in protein ]+
    [(i,"Water")   for i in water   ]+
    [(i,"Lipid")   for i in lipids  ]+
    [(i,"Nucleic") for i in nucleic ]
    )

class CoarseGrained:
    # Class for mapping an atomistic residue list to a coarsegrained one
    # Should get an __init__ function taking a residuelist, atomlist, Pymol selection or ChemPy model
    # The result should be stored in a list-type attribute
    # The class should have pdbstr and grostr methods

    # Standard mapping groups
    # Protein backbone
    bb        = "N CA C O H H1 H2 H3 O1 O2"                                                                    #@#  
    # Lipid tails
    palmitoyl1    = nsplit("C1B C1C C1D C1E","C1F C1G C1H C1I","C1J C1K C1L C1M","C1N C1O C1P")                #@#
    palmitoyl2    = nsplit("C2B C2C C2D C2E","C2F C2G C2H C2I","C2J C2K C2L C2M","C2N C2O C2P")                #@#
    oleyl1        = nsplit("C1B C1C C1D C1E","C1F C1G C1H","C1I C1J","C1K C1L C1M C1N","C1O C1P C1Q C1R")      #@#
    oleyl2        = nsplit("C2B C2C C2D C2E","C2F C2G C2H","C2I C2J","C2K C2L C2M C2N","C2O C2P C2Q C2R")      #@#
    #lauroyl1      = []
    #stearoyl1     = []
    #arachidonoyl1 = []
    #linoleyl1     = []
    #hexanoyl1     = []
    # Lipid head groups      
    #phoshpatidylcholine      = 
    phosphatydilethanolamine = nsplit("N H1 H2 H3 CA","CB P OA OB OC OD","CC CD OG C2A OH","CE OE C1A OF")     #@#
    phosphatidylglycerol     = nsplit("H1 O1 CA H2 O2 CB","CC P OA OB OC OD","CD CE OG C2A OH","CF OE C1A OF") #@#
    #phosphatidylserine       =

    # This is the mapping dictionary
    # For each residue it returns a list, each element of which
    # lists the atom names to be mapped to the corresponding bead.
    # The order should be the standard order of the coarse grained
    # beads for the residue. Only atom names matching with those 
    # present in the list of atoms for the residue will be used
    # to determine the bead position. This adds flexibility to the
    # approach, as a single definition can be used for different 
    # states of a residue (e.g., GLU/GLUH).
    # For convenience, the list can be specified as a set of strings,
    # converted into a list of lists by 'nsplit' defined above.
    mapping = {
        "ALA":  nsplit(bb + " CB"),
        "CYS":  nsplit(bb,"CB SG"),
        "ASP":  nsplit(bb,"CB CG OD1 OD2"),
        "GLU":  nsplit(bb,"CB CG CD OE1 OE2"),
        "PHE":  nsplit(bb,"CB CG CD1 HD1","CD2 HD2 CE2 HE2","CE1 HE1 CZ HZ"),
        "GLY":  nsplit(bb),
        "HIS":  nsplit(bb,"CB CG","CD2 HD2 NE2 HE2","ND1 HD1 CE1 HE1"),
        "ILE":  nsplit(bb,"CB CG1 CG2 CD CD1"),
        "LYS":  nsplit(bb,"CB CG CD","CE NZ HZ1 HZ2 HZ3"),
        "LEU":  nsplit(bb,"CB CG CD1 CD2"),
        "MET":  nsplit(bb,"CB CG SD CE"),
        "ASN":  nsplit(bb,"CB CG ND1 ND2 OD1 OD2 HD11 HD12 HD21 HD22"),
        "PRO":  nsplit(bb,"CB CG CD"),
        "GLN":  nsplit(bb,"CB CG CD OE1 OE2 NE1 NE2 HE11 HE12 HE21 HE22"),
        "ARG":  nsplit(bb,"CB CG CD","NE HE CZ NH1 NH2 HH11 HH12 HH21 HH22"),    
        "SER":  nsplit(bb,"CB OG HG"),
        "THR":  nsplit(bb,"CB OG1 HG1 CG2"),
        "VAL":  nsplit(bb,"CB CG1 CG2"),
        "TRP":  nsplit(bb,"CB CG CD2","CD1 HD1 NE1 HE1 CE2","CE3 HE3 CZ3 HZ3","CZ2 HZ2 CH2 HH2"),
        "TYR":  nsplit(bb,"CB CG CD1 HD1","CD2 HD2 CE2 HE2","CE1 HE1 CZ OH HH"),
        "POPE": phosphatydilethanolamine + palmitoyl1 + oleyl2,
        "DOPE": phosphatydilethanolamine + oleyl1     + oleyl2,
        "DPPE": phosphatydilethanolamine + palmitoyl1 + palmitoyl2,
        "POPG": phosphatidylglycerol     + palmitoyl1 + oleyl2,
        "DOPG": phosphatidylglycerol     + oleyl1     + oleyl2,
        "DPPG": phosphatidylglycerol     + palmitoyl1 + palmitoyl2,
        }

    # Generic names for side chain beads
    residue_bead_names = spl("BB SC1 SC2 SC3 SC4")

    # This dictionary contains the bead names for all residues,
    # following the order in 'mapping'
    names  = {
        "POPE": "NH3 PO4 GL1 GL2 C1A C2A C3A C4A C1B C2B D3B C4B C5B".split(),
        "POPG": "GLC PO4 GL1 GL2 C1A C2A C3A C4A C1B C2B D3B C4B C5B".split() 
        }
    # Add default bead names for all amino acids
    names.update([(i,("BB","SC1","SC2","SC3","SC4")) for i in AA3])

    # This dictionary allows determining four letter residue names
    # for ones specified with three letters, e.g., resulting from
    # truncation to adhere to the PDB format.
    # Each entry returns a prototypical test, given as a string,
    # and the residue name to be applied if eval(test) is True.
    # This is particularly handy to determine lipid types.
    # The test assumes there is a local or global array 'atoms'
    # containing the atom names of the residue in correct order.
    restest = {
        "POP": [('atoms[0] == "CA"', "POPG"),
                ('atoms[0] == "N"',  "POPE")]
        }

    # Crude mass for weighted average. No consideration of united atoms.
    # This will probably give only minor deviations, while also giving less headache
    mass = {'H': 1,'C': 12,'N': 14,'O': 16,'S': 32,'P': 31,'M': 0}

    # Determine average position for a set of weights and coordinates
    # This is a rather specific function that requires a list of items
    # [(m,(x,y,z),id),..] and returns the weighted average of the 
    # coordinates and the list of ids mapped to this bead
    def aver(self,b):
        mwx,ids = zip(*[((m*x,m*y,m*z),i) for m,(x,y,z),i in b])              # Weighted coordinates     
        tm  = sum(zip(*b)[0])                                                 # Sum of weights           
        return [sum(i)/tm for i in zip(*mwx)],ids                             # Centre of mass           

    # Return the CG beads for an atomistic residue, using the mapping specified above
    # The residue 'r' is simply a list of atoms, and each atom is a list:
    # [ name, resname, resid, chain, x, y, z ]
    def map(self,r,ca2bb=False):
        p = self.mapping[r[0][1]]                                             # Mapping for this residue 
        if ca2bb: p[0] = ["CA"]                                               # Elnedyn maps BB to CA    
        # Get the name, mass and coordinates for all atoms in the residue
        a = [(i[0],self.mass.get(i[0][0],0),i[4:]) for i in r]                    
        # Store weight, coordinate and index for atoms that match a bead
        q = [[(m,coord,a.index((atom,m,coord))) for atom,m,coord in a if atom in i] for i in p]

        # Bead positions      
        return zip(*[self.aver(i) for i in q])

CG = CoarseGrained()


#############################
## 5 # SECONDARY STRUCTURE ##  -> @SS <-
#############################


#----+--------------------------------------+
## A | SECONDARY STRUCTURE TYPE DEFINITIONS |
#----+--------------------------------------+

# This table lists all coarse grained secondary structure types
# The following are matched lists. Make sure they stay matched.
# The lists do not need to be of the same length. The longer list
# will be truncated when combined with a shorter list, e.g. with
# dihedral definitions, which are not present for coil and termini
#
ss_names = {
 "F": "Collagenous Fiber",                                                                  #@#
 "E": "Extended structure (beta sheet)",                                                    #@#
 "H": "Helix structure",                                                                    #@#
 "1": "Helix start (H-bond donor)",                                                         #@#
 "2": "Helix end (H-bond acceptor)",                                                        #@#
 "3": "Ambivalent helix type (short helices)",                                              #@#
 "T": "Turn",                                                                               #@#
 "S": "Bend",                                                                               #@#
 "C": "Coil",                                                                               #@#
}

bbss = ss_names.keys()
bbss     =    spl("  F     E     H     1     2     3     T     S     C")  # SS one letter 


# The following dictionary contains secondary structure types as assigned by
# different programs. The corresponding Martini secondary structure types are               
# listed in cgss                                                                            
#                                                                                           
# NOTE:                                                                                     
#  Each list of letters in the dictionary ss should exactly match the list                  
#  in cgss.                                                                                 
#                                                                                           
ssdefs = {
    "dssp":  list(".HGIBETSC~"),             # DSSP one letter secondary structure code     #@#
    "pymol": list(".H...S...L"),             # Pymol one letter secondary structure code    #@# 
    "gmx":   list(".H...ETS.C"),             # Gromacs secondary structure dump code        #@#    
    "self":  list("FHHHEETSCC")              # Internal CG secondary structure codes        #@#
}
cgss     =   list("FHHHEETSCC")              # Corresponding CG secondary structure types   #@#


#----+-------------------------------------------+
## B | SECONDARY STRUCTURE PATTERN SUBSTITUTIONS |
#----+-------------------------------------------+


# For all structure types specific dihedrals may be used if four or
# more consecutive residues are assigned that type.                

# Helix start and end regions are special and require assignment of
# specific types. The following pattern substitutions are applied 
# (in the given order). A dot matches any other type.             
# Patterns can be added to the dictionaries. This only makes sense
# if for each key in patterns there is a matching key in pattypes.
patterns = {
    "H": pat(".H. .HH. .HHH. .HHHH. .HHHHH. .HHHHHH. .HHHHHHH. .HHHH HHHH.")                #@#
}
pattypes = {
    "H": pat(".3. .33. .333. .3333. .13332. .113322. .1113222. .1111 2222.")                #@#
}

    
#----+----------+
## C | INTERNAL |
#----+----------+


# Pymol Colors
#           F   E   H   1   2   3   T   S   C
ssnum  = ( 13,  4,  2,  2,  2,  2,  6, 22,  0 )                                             #@#

# Dictionary returning a number for a given type of secondary structure
# This can be used for setting the b-factor field for coloring         
ss2num = hash(bbss,ssnum)                                                                   


# List of programs for which secondary structure definitions can be processed
programs = ssdefs.keys()                                                                    


# Dictionaries mapping ss types to the CG ss types                                          
ssd = dict([ (i, hash(ssdefs[i],cgss)) for i in programs ])                                 


# From the secondary structure dictionaries we create translation tables
# with which all secondary structure types can be processed. Anything
# not listed above will be mapped to C (coil).
# Note, a translation table is a list of 256 characters to map standard  
# ascii characters to.
def tt(program):                                                                            
    return  "".join([ssd[program].get(chr(i),"C") for i in range(256)])                     


# The translation table depends on the program used to obtain the 
# secondary structure definitions
sstt = dict([(i,tt(i)) for i in programs])                                                  


# The following translation tables are used to identify stretches of 
# a certain type of secondary structure. These translation tables have
# every character, except for the indicated secondary structure, set to
# \x00. This allows summing the sequences after processing to obtain
# a single sequence summarizing all the features.
null = "\x00"                                                                               
sstd = dict([ (i,ord(i)*null+i+(255-ord(i))*null) for i in cgss ])                          


# Pattern substitutions
def typesub(seq,patterns,types):                                                            
    for i,j in zip(patterns,types):                                                         
        seq = seq.replace(i,j)                                                              
    return seq                                                                              


# The following function translates a string encoding the secondary structure
# to a string of corresponding Martini types, taking the origin of the 
# secondary structure into account, and replacing termini if requested.
def ssClassification(ss,program="dssp"):                                                    
    # Translate dssp/pymol/gmx ss to Martini ss                                             
    ss  = ss.translate(sstt[program])                                                       
    # Separate the different secondary structure types                                      
    sep = dict([(i,ss.translate(sstd[i])) for i in sstd.keys()])                            
    # Do type substitutions based on patterns                                               
    # If the ss type is not in the patterns lists, do not substitute                        
    # (use empty lists for substitutions)                                                   
    typ = [ typesub(sep[i],patterns.get(i,[]),pattypes.get(i,[]))                           
            for i in sstd.keys()]                                                           
    # Translate all types to numerical values                                               
    typ = [ [ord(j) for j in list(i)] for i in typ ]                                        
    # Sum characters back to get a full typed sequence                                      
    typ = "".join([chr(sum(i)) for i in zip(*typ)])                                         
    # Return both the actual as well as the fully typed sequence                             
    return ss, typ                                                                          


# The following functions are for determination of secondary structure, 
# given a list of atoms. The atom format is generic and can be written out
# as PDB or GRO. The coordinates are in Angstrom.
# NOTE: There is the *OLD* DSSP and the *NEW* DSSP, which require 
# different calls. The old version uses '--' to indicate reading from stdin
# whereas the new version uses '-i /dev/stdin'
def call_dssp(chain,atomlist,executable='dsspcmbi'):
    '''Get the secondary structure, by calling to dssp'''
    ssdfile = 'chain_%s.ssd'%chain.id 

    try:
        if os.system(executable+" -V >/dev/null"):
            logging.debug("New version of DSSP; Executing '%s -i /dev/stdin -o %s'"%(executable,ssdfile))
            p = subp.Popen([executable,"-i","/dev/stdin","-o",ssdfile],stderr=subp.PIPE,stdout=subp.PIPE,stdin=subp.PIPE)
        else:
            logging.debug("Old version of DSSP; Executing '%s -- %s'"%(executable,ssdfile))
            p = subp.Popen([executable,"--",ssdfile],stderr=subp.PIPE,stdout=subp.PIPE,stdin=subp.PIPE)
    except OSError:
        logging.error("A problem occured calling %s."%executable)
        sys.exit(1)

    for atom in atomlist: 
        if atom[0][:2] == 'O1': atom=('O',)+atom[1:]
        if atom[0][0]!='H' and atom[0][:2]!='O2': p.stdin.write(pdbOut(atom))
    p.stdin.write('TER\n')
    data = p.communicate()
    p.wait()
    main,ss = 0,''
    for line in open(ssdfile).readlines(): 
      if main and not line[13] == "!": ss+=line[16]
      if line[:15] == '  #  RESIDUE AA': main=1
    return ss
     
ssDetermination = {
    "dssp": call_dssp
    }


################################
## 6 # FORCE FIELD PARAMETERS ##  -> @FF <-
################################


# Charged types:
charges = {"Qd":1, "Qa":-1, "RQd":1, "AQa":-1}                                                           #@#


#----+---------------------+
## A | BACKBONE PARAMETERS |
#----+---------------------+
#
# bbss  lists the one letter secondary structure code
# bbdef lists the corresponding default backbone beads
# bbtyp lists the corresponding residue specific backbone beads
#
# bbd   lists the structure specific backbone bond lengths
# bbkb  lists the corresponding bond force constants
#
# bba   lists the structure specific angles
# bbka  lists the corresponding angle force constants
#
# bbd   lists the structure specific dihedral angles
# bbkd  lists the corresponding force constants
#
# -=NOTE=- 
#  if the secondary structure types differ between bonded atoms
#  the bond is assigned the lowest corresponding force constant 
#
# -=NOTE=-
# if proline is anywhere in the helix, the BBB angle changes for 
# all residues
#
###############################################################################################
## BEADS ##                                                               #                 
#                    F     E     H     1     2     3     T     S     C    # SS one letter   
bbdef    =    spl(" P5   Nda    N0    Nd    Na   Nda   Nda    P5    P5")  # Default beads   #@#
bbtyp    = {                                                              #                 #@#
       "ALA": spl(" P4    N0    C5    N0    N0    N0    N0    P4    P4"), # ALA specific    #@#
       "PRO": spl(" Na    N0    C5    N0    Na    N0    N0    Na    Na"), # PRO specific    #@#
       "HYP": spl(" Na    N0    C5    C5    C5    C5    N0    Na    Na")  # HYP specific    #@#
}                                                                         #                 #@#
## BONDS ##                                                               #                 
bbldef   =       (.360, .350, .350, .350, .350, .350, .350, .350, .350)   # BB bond lengths #@#
bbkb     =       (1250, 1250, 1250, 1250, 1250, 1250,  500,  400,  400)   # BB bond kB      #@#
bbltyp   = {}                                                             #                 #@#
bbkbtyp  = {}                                                             #                 #@#
## ANGLES ##                                                              #                 
bbadef   =       ( 121,  134,   96,   96,   96,   96,  100,  130,  127)   # BBB angles      #@#
bbka     =       ( 150,   25,  700,  700,  700,  700,   25,   25,   25)   # BBB angle kB    #@#
bbatyp   = {                                                              #                 #@#
       "PRO":    ( 121,  134,   98,   98,   98,   98,  100,  130,  127)   # PRO specific    #@#
}                                                                         #                 #@#
bbkatyp  = {                                                              #                 #@#
       "PRO":    ( 150,   25,  100,  100,  100,  100,   25,   25,   25)   # PRO specific    #@#
}                                                                         #                 #@#
## DIHEDRALS ##                                                           #                 
bbddef   =       ( -89,    0, -120, -120, -120, -120)                     # BBBB dihedrals  #@#
bbkd     =       ( 100,   10,  400,  400,  400,  400)                     # BBBB kB         #@#
bbdmul   =       (   1,    1,    1,    1,    1,    1)                     # BBBB mltplcty   #@#
bbdtyp   = {}                                                             #                 #@#
bbkdtyp  = {}                                                             #                 #@#
                                                                          #                 
###############################################################################################               


# BBS angle, equal for all ss types                                                         
# Connects BB(i-1),BB(i),SC(i), except for first residue: BB(i+1),BB(i),SC(i)               
#                 ANGLE   Ka                                                                
bbsangle =      [   100,  25]                                                               #@#

# Bonds for extended structures (more stable than using dihedrals)                          
#               LENGTH FORCE                                                                
ebonds   = {                                                                                #@#
       'short': [ .640, 2500],                                                              #@#
       'long' : [ .970, 2500]                                                               #@#
}                                                                                           #@#


#----+-----------------------+
## B | SIDE CHAIN PARAMETERS |
#----+-----------------------+


# To be compatible with Elnedyn, all parameters are explicitly defined, even if they are double.
sidechains = {
    #RES#   BEADS                   BONDS                                                   ANGLES              DIHEDRALS
    #                               BB-SC          SC-SC                                        BB-SC-SC  SC-SC-SC
    "TRP": [spl("SC4 SP1 SC4 SC4"),[(0.300,5000)]+[(0.270,None) for i in range(5)],        [(210,50),(90,50),(90,50)], [(0,50),(0,200)]],
    "TYR": [spl("SC4 SC4 SP1"),    [(0.320,5000), (0.270,None), (0.270,None),(0.270,None)],[(150,50),(150,50)],        [(0,50)]],
    "PHE": [spl("SC4 SC4 SC4"),    [(0.310,7500), (0.270,None), (0.270,None),(0.270,None)],[(150,50),(150,50)],        [(0,50)]],
    "HIS": [spl("SC4 SP1 SP1"),    [(0.320,7500), (0.270,None), (0.270,None),(0.270,None)],[(150,50),(150,50)],        [(0,50)]],
    "ARG": [spl("N0 Qd"),          [(0.330,5000), (0.340,5000)],                           [(180,25)]],
    "LYS": [spl("C3 Qd"),          [(0.330,5000), (0.280,5000)],                           [(180,25)]],
    "CYS": [spl("C5"),             [(0.310,7500)]],
    "ASP": [spl("Qa"),             [(0.320,7500)]],
    "GLU": [spl("Qa"),             [(0.400,5000)]],
    "ILE": [spl("AC1"),            [(0.310,None)]],
    "LEU": [spl("AC1"),            [(0.330,7500)]],
    "MET": [spl("C5"),             [(0.400,2500)]],
    "ASN": [spl("P5"),             [(0.320,5000)]],
    "PRO": [spl("AC2"),            [(0.300,7500)]],
    "HYP": [spl("P1"),             [(0.300,7500)]],
    "GLN": [spl("P4"),             [(0.400,5000)]],
    "SER": [spl("P1"),             [(0.250,7500)]],
    "THR": [spl("P1"),             [(0.260,None)]],
    "VAL": [spl("AC2"),            [(0.265,None)]],
    "ALA": [],
    "GLY": [],
    }

# Sidechain parameters for Elnedyn. (read from cg-2.1.dat). For HIS the order of bonds is changed and a bond with fc=0 is added.
sidechainsElnedyn = {
#RES#   BEADS                      BONDS                                                                    ANGLES                          DIHEDRALS
'TRP': [spl("SC4 SP1 SC4 SC4"), [(0.255,73000), (0.220,None), (0.250,None), (0.280,None), (0.255,None)], [(142,30), (143,20), (104,50)], [(180,200)]],
'TYR': [spl("SC4 SC4 SP1"),     [(0.335, 6000), (0.335,6000), (0.240,None), (0.310,None), (0.310,None)], [(70,100), (130, 50)]],
'PHE': [spl("SC4 SC4 SC4"),     [(0.340, 7500), (0.340,7500), (0.240,None), (0.240,None), (0.240,None)], [(70,100), (125,100)]],
'HIS': [spl("SC4 SP1 SP1"),     [(0.195, None), (0.340,   0), (0.193,None), (0.295,None), (0.216,None)], [(135,100),(115, 50)]],
'ARG': [spl("C5 Qd"),           [(0.250,12500), (0.350,6200)],                                           [(150,15)]],
'LYS': [spl("C3 Qd"),           [(0.250,12500), (0.300,9700)],                                           [(150,20)]],
'CYS': [spl("C5"),              [(0.240,94000)]],
'ASP': [spl("Qa"),              [(0.255,65000)]],
'GLU': [spl("Qa"),              [(0.310, 2500)]],
'ILE': [spl("C1"),              [(0.225,13250)]],
'LEU': [spl("C1"),              [(0.265,81500)]],
'MET': [spl("C5"),              [(0.310, 2800)]],
'ASN': [spl("P5"),              [(0.250,61000)]],
'PRO': [spl("C2"),              [(0.190, None)]],
'GLN': [spl("P4"),              [(0.300, 2400)]],
'SER': [spl("P1"),              [(0.195, None)]],
'THR': [spl("P1"),              [(0.195, None)]],
'VAL': [spl("C2"),              [(0.200, None)]],
'GLY': [],
'ALA': [],
}

# Defines the connectivity between between beads
connectivity = {
#SC-BEADS  BONDS                                   ANGLES             DIHEDRALS
    0:    [],
    1:    [[(0,1)]],
    2:    [[(0,1),(1,2)],                         [(0,1,2)]],
    3:    [[(0,1),(1,2),(1,3),(2,3)],             [(0,1,2),(0,1,3)], [(0,2,3,1)]],
    4:    [[(0,1),(1,2),(1,3),(2,3),(2,4),(3,4)], [(0,1,2),(0,1,3)], [(0,2,3,1),(1,2,4,3)]],
    }

# Connectivity records for Elnedyn (read from cg-2.1.dat). For HIS the order of bonds is changed and a bond with fc=0 is added.
connectivityElnedyn = {
#SC-BEADS  BONDS                                    ANGLES                            DIHEDRALS
    0:    [],
    1:    [[(0, 1)]],
    2:    [[(0, 1), (1, 2)],                        [(0, 1, 2)]],
    3:    [[(0, 1), (0, 2), (1, 2), (1, 3), (2, 3)],[(0, 1, 2), (0, 1, 3)]],
    4:    [[(0, 1), (1, 2), (2, 4), (4, 3), (3, 1)],[(0, 1, 2), (0, 1, 4), (0, 1, 3)],[(1, 2, 3, 4)]],
    }


# This should later be replaced by using dictionairies for the sidechain parameters.
if Elnedyn:
    sidechains      = sidechainsElnedyn
    connectivity    = connectivityElnedyn


#----+----------------+
## C | SPECIAL BONDS  |
#----+----------------+


special = {
    # ATOM 1         ATOM 2          BOND LENGTH   FORCE CONSTANT
    (("SC1","CYS"), ("SC1","CYS")):     (0.39,         5000),
    }


#----+----------------+
## D | INTERNAL STUFF |
#----+----------------+


## BACKBONE BEAD TYPE ##                                                                    
# Dictionary of default bead types (*D)                                                     
bbBeadDictD  = hash(bbss,bbdef)                                                             
# Dictionary of dictionaries of types for specific residues (*S)                            
bbBeadDictS  = dict([(i,hash(bbss,bbtyp[i])) for i in bbtyp.keys()])                        


# The following function returns the backbone bead for a given residue and                   
# secondary structure type.                                                                 
# 1. Look up the proper dictionary for the residue                                          
# 2. Get the proper type from it for the secondary structure                                
# If the residue is not in the dictionary of specials, use the default                      
# If the secondary structure is not listed (in the residue specific                         
# dictionary) revert to the default.                                                        
def bbGetBead(r1,ss="C"):                                                                   
    return bbBeadDictS.get(r1,bbBeadDictD).get(ss,bbBeadDictD.get(ss))                      


## BB BOND TYPE ##                                                                          
# Dictionary of default abond types (*D)                                                    
bbBondDictD = hash(bbss,zip(bbldef,bbkb))                                                   
# Dictionary of dictionaries for specific types (*S)                                        
bbBondDictS = dict([(i,hash(bbss,zip(bbltyp[i],bbkbtyp[i]))) for i in bbltyp.keys()])       


## BBB ANGLE TYPE ##                                                                        
# Dictionary of default angle types (*D)                                                    
bbAngleDictD = hash(bbss,zip(bbadef,bbka))                                                  
# Dictionary of dictionaries for specific types (*S)                                        
bbAngleDictS = dict([(i,hash(bbss,zip(bbatyp[i],bbkatyp[i]))) for i in bbatyp.keys()])      


## BBBB DIHEDRAL TYPE ##                                                                    
# Dictionary of default dihedral types (*D)                                                 
bbDihedDictD = hash(bbss,zip(bbddef,bbkd,bbdmul))                                           
# Dictionary of dictionaries for specific types (*S)                                        
bbDihedDictS = dict([(i,hash(bbss,zip(bbdtyp[i],bbkdtyp[i]))) for i in bbdtyp.keys()])      


#########################
## 7 # ELASTIC NETWORK ##  -> @ELN <-
#########################


## ELASTIC NETWORK ##

# Only the decay function is defined here, the network 
# itself is set up through the Topology class

# The function to determine the decay scaling factor for the elastic network 
# force constant, based on the distance and the parameters provided.
# This function is very versatile and can be fitted to most commonly used 
# profiles, including a straight line (rate=0)
def decayFunction(distance,shift,rate,power):
    return math.exp(-rate*math.pow(distance-shift,power))

def rubberBands(atomList,lowerBound,upperBound,decayFactor,decayPower,forceConstant,minimumForce):
    out = []
    u2  = upperBound**2
    while len(atomList) > 3:
        bi,xi = atomList.pop(0)
        for bj,xj in atomList[2:]:
            # Mind the nm/A conversion -- This has to be standardized! Global use of nm?
            d2 = distance2(xi,xj)/100
            
            if d2 < u2:
                dij  = math.sqrt(d2)
                fscl = decayFunction(dij,lowerBound,decayFactor,decayPower)
                if fscl*forceConstant > minimumForce:
                    out.append({"atoms":(bi,bj),"parameters": (dij,"RUBBER_FC*%f"%fscl)})
    return out


## SOME OTHER STUFF ##


#######################
## 8 # STRUCTURE I/O ##  -> @IO <-
#######################

#----+---------+
## A | PDB I/O |
#----+---------+

d2r = 3.14159265358979323846264338327950288/180

# Reformatting of lines in structure file                                     
pdbAtomLine = "ATOM  %5d %4s%4s %1s%4d%1s   %8.3f%8.3f%8.3f%6.2f%6.2f\n"        
pdbBoxLine  = "CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1           1\n"        


def pdbBoxString(box):
    # Box vectors
    u, v, w  = box[0:3], box[3:6], box[6:9]

    # Box vector lengths
    nu,nv,nw = [math.sqrt(norm2(i)) for i in (u,v,w)]

    # Box vector angles
    alpha = nv*nw == 0 and 90 or math.acos(cos_angle(v,w))/d2r
    beta  = nu*nw == 0 and 90 or math.acos(cos_angle(u,w))/d2r
    gamma = nu*nv == 0 and 90 or math.acos(cos_angle(u,v))/d2r

    return pdbBoxLine % (10*norm(u),10*norm(v),10*norm(w),alpha,beta,gamma)


def pdbAtom(a):
    ##01234567890123456789012345678901234567890123456789012345678901234567890123456789
    ##ATOM   2155 HH11 ARG C 203     116.140  48.800   6.280  1.00  0.00
    if a.startswith("TER"):
        return 0
    # NOTE: The 27th field of an ATOM line in the PDB definition can contain an
    #       insertion code. We shift that 20 bits and add it to the residue number
    #       to ensure that residue numbers will be unique.
    ## ===> atom name,       res name,        res id,                        chain,
    return (a[12:16].strip(),a[17:20].strip(),int(a[22:26])+(ord(a[26])<<20),a[21],
    ##            x,              y,              z       
            float(a[30:38]),float(a[38:46]),float(a[46:54]))


def pdbOut(atom,i=1):
    insc = atom[2]>>20
    resi = atom[2]-(insc<<20)
    pdbline = "ATOM  %5i  %-3s %3s%2s%4i%1s   %8.3f%8.3f%8.3f%6.2f%6.2f           %1s  \n"
    return pdbline%((i,atom[0][:3],atom[1],atom[3],resi,chr(insc)) + atom[4:] + (1,40,atom[0][0])) 


def isPdbAtom(a):    
    return a.startswith("ATOM") or (options["-hetatm"] and a.startswith("HETATM")) or a.startswith("TER")


def pdbBoxRead(a):
    fa, fb, fc, aa, ab, ac = [float(i) for i in a.split()[1:7]]
    ca, cb, cg, sg         = math.cos(d2r*aa), math.cos(d2r*ab), math.cos(d2r*ac) , math.sin(d2r*ac)
    wx, wy                 = 0.1*fc*cb, 0.1*fc*(ca-cb*cg)/sg
    wz                     = math.sqrt(0.01*fc*fc - wx*wx - wy*wy)
    return [0.1*fa, 0, 0, 0.1*fb*cg, 0.1*fb*sg, 0, wx, wy, wz]


# Function for splitting a PDB file in chains, based
# on chain identifiers and TER statements
def pdbChains(pdbAtomList):
    chain = []
    for atom in pdbAtomList:
	if not atom: # Was a "TER" statement
	    if chain:
		yield chain
	    else:
		logging.info("Skipping empty chain definition")
	    chain = [] 
            continue
        if not chain or chain[-1][3] == atom[3]:	
            chain.append(atom)
        else:
            yield chain
            chain = [atom]
    if chain:
	yield chain


# Simple PDB iterator
def pdbFrameIterator(streamIterator):  
    title, atoms, box = [], [], []
    for i in streamIterator:
        if i.startswith("ENDMDL"):
            yield "".join(title), atoms, box
            title, atoms, box = [], [], []            
        elif i.startswith("TITLE"):
            title.append(i)
        elif i.startswith("CRYST1"):
            box = pdbBoxRead(i)
        elif i.startswith("ATOM") or i.startswith("HETATM"):
            atoms.append(pdbAtom(i))
    if atoms:
        yield "".join(title), atoms, box


#----+---------+
## B | GRO I/O |
#----+---------+

groline = "%5d%-5s%5s%5d%8.3f%8.3f%8.3f\n"                                    

def groBoxRead(a):    
    b = [float(i) for i in a.split()] + 6*[0]             # Padding for rectangular boxes
    return b[0],b[3],b[4],b[5],b[1],b[6],b[7],b[8],b[2]   # Return full definition xx,xy,xz,yx,yy,yz,zx,zy,zz

def groAtom(a):
    #012345678901234567890123456789012345678901234567890
    #    1PRN      N    1   4.168  11.132   5.291
    ## ===> atom name,        res name,          res id,    chain,
    return (a[10:15].strip(), a[5:10].strip(),   int(a[:5]), " ", 
    ##               x,                 y,                 z       
            10*float(a[20:28]),10*float(a[28:36]),10*float(a[36:44]))

# Simple GRO iterator
def groFrameIterator(streamIterator):
    while True:
        try:
            title = streamIterator.next()
        except StopIteration:
            break
        natoms = streamIterator.next().strip()
        if not natoms:
            break
        natoms = int(natoms)
        atoms  = [groAtom(streamIterator.next())  for i in range(natoms)] 
        box    = groBoxRead(streamIterator.next())
        yield title, atoms, box


#----+-------------+
## C | GENERAL I/O |
#----+-------------+


# *NOTE*: This should probably be a CheckableStream class that
# reads in lines until either of a set of specified conditions
# is met, then setting the type and from thereon functioning as
# a normal stream.
def streamTag(stream):
    # Tag the stream with the type of structure file
    # If necessary, open the stream, taking care of 
    # opening using gzip for gzipped files

    # First check whether we have have an open stream or a file
    # If it's a file, check whether it's zipped and open it
    if type(stream) == str:
        if stream.endswith("gz"):
            logging.info('Read input structure from zipped file.')
            s = gzip.open(stream)
        else:
            logging.info('Read input structure from file.')
            s = open(stream)
    else:
        logging.info('Read input structure from command-line')
        s = stream

    # Read a few lines, but save them
    x = [s.readline(), s.readline()]
    if x[-1].strip().isdigit():
        # Must be a GRO file
        logging.info("Input structure is a GRO file. Chains will be labeled consecutively.")
        yield "GRO"
    else:
        # Must be a PDB file then
        # Could wind further to see if we encounter an "ATOM" record
        logging.info("Input structure is a PDB file.")
        yield "PDB"
    
    # Hand over the lines that were stored
    for i in x:
        yield i

    # Now give the rest of the lines from the stream
    for i in s:
        yield i


#----+-----------------+
## D | STRUCTURE STUFF |
#----+-----------------+


# This list allows to retrieve atoms based on the name or the index
# If standard, dictionary type indexing is used, only exact matches are
# returned. Alternatively, partial matching can be achieved by setting
# a second 'True' argument. 
class Residue(list):
    def __getitem__(self,tag): 
        if type(tag) == int:
            # Call the parent class __getitem__
            return list.__getitem__(self,tag)
        if type(tag) == str:
            for i in self:
                if i[0] == tag:
                    return i
            else:
                return 
        if tag[1]:
            return [i for i in self if tag[0] in i[0]] # Return partial matches
        else:
            return [i for i in self if i[0] == tag[0]] # Return exact matches only


def residues(atomList):
    residue = [atomList[0]]
    for atom in atomList[1:]:
        if (atom[1] == residue[-1][1] and # Residue name check
            atom[2] == residue[-1][2] and # Residue id check
            atom[3] == residue[-1][3]):   # Chain id check
            residue.append(atom)
        else:
            yield Residue(residue)
            residue = [atom]
    yield Residue(residue)


def residueDistance2(r1,r2):
    return min([distance2(i,j) for i in r1 for j in r2])


def breaks(residuelist,selection=("N","CA","C"),cutoff=2.5):
    # Extract backbone atoms coordinates
    bb = [[atom[4:] for atom in residue if atom[0] in selection] for residue in residuelist]

    # We cannot rely on some standard order for the backbone atoms.
    # Therefore breaks are inferred from the minimal distance between
    # backbone atoms from adjacent residues.
    return [ i+1 for i in range(len(bb)-1) if residueDistance2(bb[i],bb[i+1]) > cutoff]


def contacts(atoms,cutoff=5):
    rla = range(len(atoms))
    crd = [atom[4:] for atom in atoms]
    return [(i,j) for i in rla[:-1] for j in rla[i+1:] 
            if distance2(crd[i],crd[j]) < cutoff]


def check_merge(chains, m_list=[], l_list=[], ss_cutoff=0):
    chainIndex = range(len(chains))

    if 'all' in m_list:
        logging.info("All chains will be merged in a single moleculetype.")
        return chainIndex, [chainIndex]

    chainID = [chain.id for chain in chains]

    # Mark the combinations of chains that need to be merged
    merges = []
    if m_list:
        # Build a dictionary of chain IDs versus index
        # To give higher priority to top chains the lists are reversed 
        # before building the dictionary
        chainIndex.reverse()
        chainID.reverse()
        dct = dict(zip(chainID,chainIndex))
        chainIndex.reverse()
        # Convert chains in the merge_list to numeric, if necessary
        # NOTE The internal numbering is zero-based, while the 
        # command line chain indexing is one-based. We have to add
        # one to the number in the dictionary to bring it on par with
        # the numbering from the command line, but then from the 
        # result we need to subtract one again to make indexing 
        # zero-based
        merges = [[(i.isdigit() and int(i) or dct[i]+1)-1 for i in j] for j in m_list]
        for i in merges:
            i.sort()

    # Rearrange merge list to a list of pairs
    pairs = [(i[j],i[k]) for i in merges for j in range(len(i)-1) for k in range(j+1,len(i))]

    # Check each combination of chains for connections based on
    # ss-bridges, links and distance restraints
    for i in chainIndex[:-1]:
        for j in chainIndex[i+1:]:           
            if (i,j) in pairs:
                continue
            # Check whether any link links these two groups
            for a,b in l_list:                
                if ((a in chains[i] and b in chains[j]) or 
                    (a in chains[j] and b in chains[i])):
                    logging.info("Merging chains %d and %d to allow link %s"%(i+1,j+1,str((a,b))))
                    pairs.append( i<j and (i,j) or (j,i) )
                    break
            if (i,j) in pairs:
                continue
            # Check whether any cystine bond given links these two groups
            #for a,b in s_list:
            #    if ((a in chains[i] and b in chains[j]) or 
            #        (a in chains[j] and b in chains[i])):
            #        logging.info("Merging chains %d and %d to allow cystine bridge"%(i+1,j+1))
            #        pairs.append( i<j and (i,j) or (j,i) )
            #        break
            #if (i,j) in pairs:
            #    continue
            # Check for cystine bridges based on distance
            if not ss_cutoff:
                continue
            # Get SG atoms from cysteines from either chain
            # Check this pair of chains
            for cysA in chains[i]["CYS"]:
                for cysB in chains[j]["CYS"]:
                    d2 = distance2(cysA["SG"][4:7],cysB["SG"][4:7]) 
                    if d2 <= ss_cutoff:
                        logging.info("Found SS contact linking chains %d and %d (%f nm)"%(i+1,j+1,math.sqrt(d2)/10))
                        pairs.append((i,j))
                    break
                if (i,j) in pairs:
                    break

    # Sort the combinations
    pairs.sort(reverse=True)

    merges = []
    while pairs:
        merges.append(set([pairs[-1][0]]))
        for i in range(len(pairs)-1,-1,-1):
            if pairs[i][0] in merges[-1]:
                merges[-1].add(pairs.pop(i)[1])
            elif pairs[i][1] in merges[-1]:
                merges[-1].add(pairs.pop(i)[0])
    merges = [list(i) for i in merges]
    for i in merges:
        i.sort()

    order = [j for i in merges for j in i]

    if merges:
        logging.warning("Merging chains.")
        logging.warning("This may change the order of atoms and will change the number of topology files.")
        logging.info("Merges: " + ", ".join([str([j+1 for j in i]) for i in merges]))

    if len(merges) == 1 and len(merges[0]) > 1 and set(merges[0]) == set(chainIndex):
        logging.info("All chains will be merged in a single moleculetype")

    # Determine the order for writing; merged chains go first
    merges.extend([[j] for j in chainIndex if not j in order])
    order.extend([j for j in chainIndex if not j in order])

    return order, merges


## !! NOTE !! ##
## The chain class needs to be simplified by extracting things to separate functions/classes

class Chain:
    # Attributes defining a chain
    # When copying a chain, or slicing, the attributes in this list have to
    # be handled accordingly.
    _attributes = ("residues","sequence","seq","ss","ssclass","sstypes")

    def __init__(self,residuelist=[],name=None,multiscale=False):
        self.residues   = residuelist
        self._atoms     = [atom[:3] for residue in residuelist for atom in residue]
        self.sequence   = [residue[0][1] for residue in residuelist]
        # *NOTE*: Check for unknown residues and remove them if requested
        #         before proceeding.
        self.seq        = "".join([AA321.get(i,"x") for i in self.sequence])
        self.ss         = ""
        self.ssclass    = ""
        self.sstypes    = ""
        self.mapping    = []
        self.multiscale = multiscale

        # Unknown residues
        self.unknowns   = "x" in self.seq

        # Determine the type of chain
        self._type      = ""
        self.type()

        # Determine number of atoms
        self.natoms     = len(self._atoms) 

        # BREAKS: List of indices of residues where a new fragment starts
        # Only when polymeric (protein, DNA, RNA, ...)
        self.breaks     = self.type() in ("Protein","Nucleic") and breaks(self.residues) or []

        # LINKS:  List of pairs of pairs of indices of linked residues/atoms
        # This list is used for cysteine bridges and peptide bonds involving side chains
        # The list has items like ((#resi1, #atid1), (#resi2, #atid2))
        # When merging chains, the residue number needs ot be update, but the atom id
        # remains unchanged.
        # For the coarse grained system, it needs to be checked which beads the respective
        # atoms fall in, and bonded terms need to be added there.
        self.links      = []                   

        # Chain identifier; try to read from residue definition if no name is given
        self.id         = name or residuelist and residuelist[0][0][3] or ""

        # Container for coarse grained beads
        self._cg        = None
        
    def __len__(self):
        # Return the number of residues
        return len(self.seq)

    def __add__(self,other):
        newchain = Chain(name=self.id+"+"+other.id)
        # Combine the chain items that can be simply added
        for attr in self._attributes:
            setattr(newchain, attr, getattr(self,attr) + getattr(other,attr))
        # Set chain items, shifting the residue numbers
        shift  = len(self)
        newchain.breaks     = self.breaks + [shift] + [i+shift for i in other.breaks]
        newchain.links      = self.links + [((i[0]+shift,i[1]),(j[0]+shift,j[1])) for i,j in other.links]
        newchain.natoms     = len(newchain.atoms())
        newchain.multiscale = self.multiscale or other.multiscale
        # Return the merged chain
        return newchain

    def __eq__(self,other):
        return (self.seq        == other.seq    and 
                self.ss         == other.ss     and
                self.breaks     == other.breaks and
                self.links      == other.links  and
                self.multiscale == other.multiscale)

    # Extract a residue by number or the list of residues of a given type
    # This facilitates selecting residues for links, like chain["CYS"]
    def __getitem__(self,other):
        if type(other) == str:
            if not other in self.sequence:
                return []
            return [i for i in self.residues if i[0][1] == other]
        elif type(other) == tuple:
            # This functionality is set up for links
            # between coarse grained beads. So these are
            # checked first,
            for i in self.cg():
                if other == i[:4]:
                    return i
            else:
                for i in self.atoms():
                    if other[:3] == i[:3]:
                        return i
                else:
                    return []
        return self.sequence[other]

    # Extract a piece of a chain as a new chain
    def __getslice__(self,i,j):
	newchain = Chain(name=self.id)        
        # Extract the slices from all lists
        for attr in self._attributes:           
            setattr(newchain, attr, getattr(self,attr)[i:j])
        newchain.multiscale = self.multiscale
        newchain.natoms     = len(newchain.atoms())
        newchain.type()
        # Return the chain slice
        return newchain

    def _contains(self,atomlist,atom):
        atnm,resn,resi,chn = atom
        
        # If the chain does not match, bail out
        if chn != self.id:
            return False

        # Check if the whole tuple is in
        if atnm and resn and resi:
            return (atnm,resn,resi) in self.atoms()

        # Fetch atoms with matching residue id
        match = (not resi) and atomlist or [j for j in atomlist if j[2] == resi]
        if not match:
            return False

        # Select atoms with matching residue name
        match = (not resn) and match or [j for j in match if j[1] == resn]
        if not match:
            return False

        # Check whether the atom is given and listed
        if not atnm or [j for j in match if j[0] == atnm]:
            return True

        # It just is not in the list!
        return False

    def __contains__(self,other):
        return self._contains(self.atoms(),other) or self._contains(self.cg(),other)

    def __hash__(self):
        return id(self)

    def atoms(self):
        if not self._atoms:
            self._atoms = [atom[:3] for residue in self.residues for atom in residue]
        return self._atoms

    # Split a chain based on residue types; each subchain can have only one type
    def split(self):
        chains = []
        chainStart = 0
        for i in range(len(self.sequence)-1):
            if residueTypes.get(self.sequence[i],"Unknown") != residueTypes.get(self.sequence[i+1],"Unknown"):
                chains.append(self[chainStart:i+1])
                chainStart = i+1
        if chains:
            logging.debug('Splitting chain %s in %s chains'%(self.id,len(chains)+1))
        return chains + [self[chainStart:]]

    def getname(self,basename=None):
        name = []
        if basename:                      name.append(basename)
        if self.type() and not basename:  name.append(self.type())
        if type(self.id) == int:
            name.append(chr(64+self.id))
        elif self.id.strip():               
            name.append(str(self.id))
        return "_".join(name)

    def set_ss(self,ss,source="self"):
        if len(ss) == 1:
            self.ss = len(self)*ss
        else:
            self.ss = ss
        # Infer the Martini backbone secondary structure types
        self.ssclass, self.sstypes = ssClassification(self.ss,source)

    def dss(self,method=None,executable=None):
        # The method should take a list of atoms and return a 
        # string of secondary structure classifications       
        if self.type() == "Protein":
            if method:
                atomlist = [atom for residue in self.residues for atom in residue]
                self.set_ss(ssDetermination[method](self,atomlist,executable),source=method)
            else:
                self.set_ss(len(self)*"C")
        else:
            self.ss = len(self.sequence)*"-" 
        return self.ss

    def type(self,other=None):
        if other:
            self._type = other
        elif not self._type and len(self):
            # Determine the type of chain
            self._type     = set([residueTypes.get(i,"Unknown") for i in set(self.sequence)])
            self._type     = len(self._type) > 1 and "Mixed" or list(self._type)[0]
        return self._type


    # The following (at least the greater part of it) should be made a separate function, put under "MAPPING"

    def cg(self,force=False):
        # Generate the coarse grained structure
        # Set the b-factor field to something that reflects the secondary structure
        # 
        # TODO: Add CONECT records (maybe add option -conect?)
        
        # If the coarse grained structure is set already, just return, 
        # unless regeneration is forced.
        if self._cg and not force:
            return self._cg

        self._cg = []
        atid     = 1
        bb       = [1]
        fail     = False
        for residue,rss in zip(self.residues,self.sstypes):
            if residue[0][1] in ("SOL","HOH","TIP"):
                continue
            if not residue[0][1] in CG.mapping.keys():
                logging.warning("Skipped unknown residue %s\n"%residue[0][1])
                continue
            # Get the mapping for this residue
            # CG.map returns bead coordinates and mapped atoms
            # This will fail if there are (too many) atoms missing, which is
            # only problematic if a mapped structure is written; the topology
            # is inferred from the sequence. So this is the best place to raise 
            # an error
            try:
                beads, ids = CG.map(residue,ca2bb=Elnedyn)
                beads      = zip(CG.names[residue[0][1]],beads,ids)
            except ValueError:
                logging.error("Too many atoms missing from residue %s %d%s:",residue[0][1],residue[0][2],residue[0][3])
                logging.error(repr([ i[0] for i in residue ]))
                fail = True

            for name,(x,y,z),ids in beads:                    
                # Add the bead with coordinates and secondary structure id to the list
                self._cg.append((name,residue[0][1][:3],residue[0][2],residue[0][3],x,y,z,ss2num[rss]))
                # Add the ids to the list, after converting them to indices to the list of atoms
                self.mapping.append([atid+i for i in ids])

            # Increment the atom id; This pertains to the atoms that are included in the output.
            atid += len(residue)

            # Keep track of the numbers for CONECTing
            bb.append(bb[-1]+len(beads))

        if fail:
            logging.error("Unable to generate coarse grained structure due to missing atoms.")
            sys.exit(1)

        return self._cg

    def conect(self):
        # Return pairs of numbers that should be CONECTed
        # First extract the backbone IDs
        cg = self.cg()
        bb = [i+1 for i,j in zip(range(len(cg)),cg) if j[0] == "BB"]
        bb = zip(bb,bb[1:]+[len(bb)])
        # Set the backbone CONECTs (check whether the distance is consistent with binding)        
        conect = [(i,j) for i,j in bb[:-1] if distance2(cg[i-1][4:7],cg[j-1][4:7]) < 14]
        # Now add CONECTs for sidechains
        for i,j in bb:
            nsc = j-i-1

            
##################
## 7 # TOPOLOGY ##  -> @TOP <-
##################


# This is a generic class for Topology Bonded Type definitions
# Clement, it'd be nice to have proper docstrings for classes :)
class Bonded:
    atoms, type, parameters, comments, category = [], -1, [], [], None 

    # The init method is generic to the bonded types,
    # but may call the set method if atoms are given
    # as (ID, ResidueName, SecondaryStructure) tuples
    # The set method is specific to the different types.
    def __init__(self,other=None,**kwargs):
        if other:
            # If other is given, then copy the attributes
            # if it is of the same class or set the 
            # attributes according to the key names if
            # it is a dictionary
            if other.__class__ == self.__class__:
                for attr in dir(other):
                    if not attr[0] == "_":
                        setattr(self,attr,getattr(other,attr))
            elif type(other) == dict:
                for attr in other.keys():
                    setattr(self,attr,other[attr])
            elif type(other) in (list,tuple):
                self.atoms = other

        # For every item in the kwargs keys, set the attribute
        # with the same name. This can be used to specify the 
        # attributes directly or to override attributes 
        # copied from the 'other' argument.
        for key in kwargs:
            setattr(self,key,kwargs[key])
        
        # If atoms are given as tuples of
        # (ID, ResidueName[, SecondaryStructure])
        # then determine the corresponding parameters 
        # from the lists above
        if self.atoms and type(self.atoms[0]) == tuple:
            self.set(self.atoms,**kwargs)          

    def __nonzero__(self):
        return bool(self.atoms) 

    def __str__(self):
        if not self.atoms or not self.parameters:
            return ""
        s = ["%5d" % i for i in self.atoms]
        s.append(" %5d " % self.type)
        # Print integers and floats in proper format and neglect None terms
        s.extend([formatString(i) for i in self.parameters if i != None])
        if self.comments:
            s.append(';')
            if type(self.comments) == str:
                s.append(self.comments)
            else:
                s.extend([str(i) for i in self.comments])
        return " ".join(s)

    def __iadd__(self,num):
        self.atoms = [i+int(num) for i in self.atoms]
        return self

    def __add__(self,num):
        out  = self.__class__(self)
        out += num
        return out

    def __eq__(self,other):
        if type(other) in (list,tuple):
            return self.atoms == other
        else:
            return self.atoms == other.atoms and self.type == other.type and self.parameters == other.parameters

    # This function needs to be overridden for descendents
    def set(self,atoms,**kwargs):
        pass


# The set method of this class will look up parameters for backbone beads
# Side chain bonds ought to be set directly, using the constructor
# providing atom numbers, bond type, and parameters
# Constraints are bonds with kb = None, which can be extracted 
# using the category
class Bond(Bonded):
    def set(self,atoms,**kwargs):
        ids,r,ss,ca     = zip(*atoms)     
        self.atoms      = ids
        self.type       = 1
        self.positionCa = ca
        self.comments   = "%s(%s)-%s(%s)" % (r[0],ss[0],r[1],ss[1])
        # The category can be used to keep bonds sorted
        self.category   = kwargs.get("category")

        # Retrieve parameters for each residue from table defined above
        b1 = bbBondDictS.get(r[0],bbBondDictD).get(ss[0],bbBondDictD.get(ss[0]))              
        b2 = bbBondDictS.get(r[1],bbBondDictD).get(ss[1],bbBondDictD.get(ss[1]))              
        # Determine which parameters to use for the bond
        self.parameters = ( (b1[0]+b2[0])/2, min(b1[1],b2[1]) )                               

        # Elnedyn takes bond length from structure.
        # Temperary construct, need something with dictionary like for choice of dssp
        if Elnedyn:
            self.parameters = ( math.sqrt(distance2(ca[0],ca[1]))/10., 150000 )

    # Overriding __str__ method to suppress printing of bonds with Fc of 0
    def __str__(self):
        if len(self.parameters) > 1 and self.parameters[1] == 0:
            return ""
        return Bonded.__str__(self)


# Similar to the preceding class
class Angle(Bonded):
    def set(self,atoms,**kwargs):
        ids,r,ss,ca     = zip(*atoms)
        self.atoms      = ids
        self.type       = 2
        self.positionCa = ca
        self.comments   = "%s(%s)-%s(%s)-%s(%s)" % (r[0],ss[0],r[1],ss[1],r[2],ss[2])
        self.category   = kwargs.get("category")
        
        # PRO in helices is dominant
        if r[1] == "PRO" and ss[1] in "H123":
            self.parameters = bbAngleDictS["PRO"].get(ss[1])
        else:
            # Retrieve parameters for each residue from table defined above
            a = [ bbAngleDictS.get(r[0],bbAngleDictD).get(ss[0],bbAngleDictD.get(ss[0])),        
                  bbAngleDictS.get(r[1],bbAngleDictD).get(ss[1],bbAngleDictD.get(ss[1])),        
                  bbAngleDictS.get(r[2],bbAngleDictD).get(ss[2],bbAngleDictD.get(ss[2])) ]       

            # Sort according to force constant
            a.sort(key=lambda i: (i[1],i[0]))

            # This selects the set with the smallest force constant and the smallest angle
            self.parameters = a[0]

            # To select the set with the smallest force constant having the largest angles,
            # uncomment the following lines
            #if a[0][1] != a[1][1]:
            #    self.parameters = a[0]
            #elif a[1][1] != a[2][1]:
            #    self.parameters = a[1]
            #else:
            #    self.parameters = a[2]
        # Elnedyn takes angles for structure, with fc=40
        # The way Elnedyn is selected has to change,
        # probably with a dictionary comparable to the dssp method is selected
        if Elnedyn:
            angle = math.acos(cos_angle([i-j for i,j in zip(ca[0],ca[1])],[i-j for i,j in zip(ca[2],ca[1])]))/d2r 
            self.parameters = (angle,40)
        

# Similar to the preceding class
class Dihedral(Bonded):
    def set(self,atoms,**kwargs):
        ids,r,ss,ca     = zip(*atoms)
        self.atoms      = ids
        self.type       = 1
        self.positionCa = ca
        self.comments   = "%s(%s)-%s(%s)-%s(%s)-%s(%s)" % (r[0],ss[0],r[1],ss[1],r[2],ss[2],r[3],ss[3])
        self.category   = kwargs.get("category")

        if ss == 'FFFF':
            # Collagen
            self.parameters = bbDihedDictD['F']
        elif ss == 'EEEE' and ExtendedDihedrals:
            # Use dihedrals
            self.parameters = bbDihedDictD['E']
        elif set(ss).issubset("H123"):
            # Helix
            self.parameters = bbDihedDictD['H']
        else:
            self.parameters = None


# This list allows to retrieve Bonded class items based on the category
# If standard, dictionary type indexing is used, only exact matches are
# returned. Alternatively, partial matching can be achieved by setting
# a second 'True' argument. 
class CategorizedList(list):
    def __getitem__(self,tag): 
        if type(tag) == int:
            # Call the parent class __getitem__
            return list.__getitem__(self,tag)

        if type(tag) == str:
            return [i for i in self if i.category == tag]

        if tag[1]:
            return [i for i in self if tag[0] in i.category]
        else:
            return [i for i in self if i.category == tag[0]]


class Topology:
    def __init__(self,other=None,name="",multi=False):
        self.name        = ''
        self.nrexcl      = 1
        self.atoms       = CategorizedList()
        self.bonds       = CategorizedList()
        self.angles      = CategorizedList()
        self.dihedrals   = CategorizedList()
        self.impropers   = CategorizedList()
        self.constraints = CategorizedList()
        self.posres      = CategorizedList()
        self.sequence    = []
        self.secstruc    = ""
        # Okay, this is sort of funny; we will add a 
        #   #define mapping virtual_sitesn
        # to the topology file, followed by a header
        #   [ mapping ]
        self.mapping     = []
        # For multiscaling we have to keep track of the number of 
        # real atoms that correspond to the beads in the topology
        self.natoms      = 0        
        self.multiscale  = multi

        if not other:
            # Returning an empty instance
            return
        elif isinstance(other,Topology):
            for attrib in ["atoms","bonds","angles","dihedrals","impropers","constraints","posres"]:
                setattr(self,attrib,getattr(other,attrib,[]))
        elif isinstance(other,Chain):
            if other.type() == "Protein":
                self.fromAminoAcidSequence(other)
            elif other.type == "Nucleic":
                # Currently there are no Martini Nucleic Acids
                self.fromNucleicAcidSequence(other)
            elif other.type == "Mixed":
                logging.warning('Mixed Amino Acid /Nucleic Acid chains are not yet implemented')
                # How can you have a mixed chain?
                # Well, you could get a covalently bound lipid or piece of DNA to a protein :S
                # But how to deal with that?
                # Probably one should separate the chains into blocks of specified type,
                # determine the locations of links, then construct the topologies for the 
                # blocks and combine them according to the links.
                pass
            else:
                # This chain should not be polymeric, but a collection of molecules
                # For each unique residue type fetch the proper moleculetype 
                self.fromMoleculeList(other)
        if name:
            self.name = name

    def __iadd__(self,other):
        if not isinstance(other,Topology):
            other = Topology(other)
        shift     = len(self.atoms)
        last      = self.atoms[-1]
        atoms     = zip(*other.atoms)
        atoms[0]  = [i+shift for i in atoms[0]]   # Update atom numbers
        atoms[2]  = [i+last[2] for i in atoms[2]] # Update residue numbers
        atoms[5]  = [i+last[5] for i in atoms[5]] # Update charge group numbers
        self.atoms.extend(zip(*atoms))
        for attrib in ["bonds","angles","dihedrals","impropers","constraints"]:
            getattr(self,attrib).extend([source+shift for source in getattr(other,attrib)])
        return self

    def __add__(self,other):
        out = Topology(self)
        if not isinstance(other,Topology):
            other = Topology(other)
        out += other
        return out

    def __str__(self):
	if self.multiscale:
             out  = [ '; MARTINI 2.1 Multiscale virtual sites topology section for "%s"' %self.name ]
        else:
             out  = [ '; MARTINI 2.1 Coarse Grained topology file for "%s"' %self.name ]
        if self.sequence:
            out += [
                '; Sequence:',
                '; ' + ''.join([ AA321.get(AA) for AA in self.sequence ]),
                '; Secondary Structure:',
                '; ' + self.secstruc,
                ]
        
        # Do not print a molecule name when multiscaling
        # In that case, the topology created here needs to be appended
        # at the end of an atomistic moleculetype
        if not self.multiscale:
            out += [ '\n[ moleculetype ]',
                     '; Name         Exclusions',  
                     '%-15s %3d' % (self.name,self.nrexcl)]

        out.append('\n[ atoms ]')

        out.extend(['%5d %5s %5d %5s %5s %5d %7.4f ; %s'%i for i in self.atoms])

        if self.multiscale:
            out += ['\n;\n; Coarse grained to atomistic mapping\n;',
                    '#define mapping virtual_sitesn',
                    '[ mapping ]']
            for i,j in self.mapping:
                out.append( ("%5d     2 "%i)+" ".join(["%5d"%k for k in j]) )
            
            logging.info('Created virtual sites section for multiscaled topology')
            return "\n".join(out)

        # Bonds in order: backbone, backbone-sidechain, sidechain, short elastic, long elastic        
        out.append("\n[ bonds ]")       
        # Backbone-backbone
        bonds = [str(i) for i in self.bonds["BB"]]
        if bonds:
            out.append("; Backbone bonds")
            out.extend(bonds)
        # Rubber Bands
        bonds = [str(i) for i in self.bonds["Rubber",True]]
        if bonds:
            # Add a CPP style directive to allow control over the elastic network
            out.append("#ifdef RUBBER_BANDS")
            out.append("#ifndef RUBBER_FC\n#define RUBBER_FC %f\n#endif"%ElasticMaximumForce)
            out.extend(bonds)
            out.append("#endif")
        # Backbone-Sidechain/Sidechain-Sidechain
        bonds = [str(i) for i in self.bonds["SC"]]
        if bonds:
            out.append("; Sidechain bonds")
            out.extend(bonds)
        # Short elastic/Long elastic
        bonds = [str(i) for i in self.bonds["Elastic short"]]
        if bonds:
            out.append("; Short elastic bonds for extended regions")
            out.extend(bonds)
        bonds = [str(i) for i in self.bonds["Elastic long"]]
        if bonds:
            out.append("; Long elastic bonds for extended regions")
            out.extend(bonds)
        # Cystine bridges
        bonds = [str(i) for i in self.bonds["Cystine"]]
        if bonds:
            out.append("; Cystine bridges")
            out.extend(bonds)
        # Other links
        bonds = [str(i) for i in self.bonds["Link"]]
        if bonds:
            out.append("; Link")
            out.extend(bonds)

        # Constraints
        out.append("\n[ constraints ]")
        out.extend([str(i) for i in self.bonds["Constraint"]])

        # Angles
        out.append("\n[ angles ]")
        out.append("; Backbone angles")
        out.extend([str(i) for i in self.angles["BBB"]])
        out.append("; Backbone-sidechain angles")
        out.extend([str(i) for i in self.angles["BBS"]])
        out.append("; Sidechain angles")
        out.extend([str(i) for i in self.angles["SC"]])

        # Dihedrals
        out.append("\n[ dihedrals ]")
        out.append("; Backbone dihedrals")
        out.extend([str(i) for i in self.dihedrals["BBBB"] if i.parameters])
        out.append("; Sidechain improper dihedrals")
        out.extend([str(i) for i in self.dihedrals["SC"] if i.parameters])

        # Postition Restraints
        if self.posres:
            out.append("\n#ifdef POSRES")
            out.append("#ifndef POSRES_FC\n#define POSRES_FC %.2f\n#endif"%PosResForce)
            out.append(" [ position_restraints ]")
            out.extend(['  %5d    1    POSRES_FC    POSRES_FC    POSRES_FC'%i for i in self.posres])
            out.append("#endif")

        logging.info('Created coarsegrained topology')
        return "\n".join(out)

  
    # The sequence function can be used to generate the topology for 
    # a sequence :) either given as sequence or as chain
    def fromAminoAcidSequence(self,sequence,secstruc=None,links=None,breaks=None,
                              mapping=None,rubber=False,multi=False):
        # Shift for the atom numbers of the atomistic part in a chain 
        # that is being multiscaled
        shift = 0
        # First check if we get a sequence or a Chain instance
        if isinstance(sequence, Chain):
            chain         = sequence
            links         = chain.links
            breaks        = chain.breaks
            # If the mapping is not specified, the actual mapping is taken,
            # used to construct the coarse grained system from the atomistic one.
            # The function argument "mapping" could be used to use a default 
            # mapping scheme in stead, like the mapping for the GROMOS96 force field.
            mapping = mapping or chain.mapping
            multi   = multi   or chain.multiscale
            self.secstruc = chain.sstypes or len(chain)*"C"
            self.sequence = chain.sequence
            # If anything hints towards multiscaling, do multiscaling
            self.multiscale = self.multiscale or chain.multiscale or multi
            if self.multiscale:
                shift        = self.natoms
                self.natoms += len(chain.atoms())
        elif not secstruc:
            # If no secondary structure is provided, set all to coil
            chain         = None
            self.secstruc = len(self.sequence)*"C"
        else:
            # If a secondary structure is provided, use that. chain is none.
            chain         = None
            self.secstruc = secstruc

        logging.debug(self.secstruc)
        logging.debug(self.sequence)

        # Fetch the sidechains
        # Pad with empty lists for atoms, bonds, angles 
        # and dihedrals, and take the first four lists out
        # This will avoid errors for residues for which 
        # these are not defined.
        sc = [(sidechains[res]+4*[[]])[:4] for res in self.sequence]

        # ID of the first atom/residue
        # The atom number and residue number follow from the last 
        # atom c.q. residue id in the list processed in the topology
        # thus far. In the case of multiscaling, the real atoms need 
        # also be accounted for.
        startAtom = self.natoms + 1 
        startResi = self.atoms and self.atoms[-1][2]+1 or 1

        # Backbone bead atom IDs
        bbid = [startAtom]
        for i in zip(*sc)[0]:
            bbid.append(bbid[-1]+len(i)+1)

        # Calpha positions, to get Elnedyn BBB-angles and BB-bond lengths
        positionCa = [residue[1][4:] for residue in chain.residues]

        # Residue numbers for this moleculetype topology
        resid = range(startResi,startResi+len(self.sequence))     
        
        # This contains the information for deriving backbone bead types,
        # bb bond types, bbb/bbs angle types, and bbbb dihedral types and
        # Elnedyn BB-bondlength BBB-angles
        seqss = zip(bbid,self.sequence,self.secstruc,positionCa)

        # Fetch the proper backbone beads          
        bb = [bbGetBead(res,typ) for num,res,typ,Ca in seqss]

        # If termini need to be charged, change the bead types
        if not NeutralTermini:
            bb[0]  ="Qd"
            bb[-1] = "Qa"

        # If breaks need to be charged, change the bead types 
        if ChargesAtBreaks:
            for i in breaks:
                bb[i]   = "Qd"
                bb[i-1] = "Qa"

        # For backbone parameters, iterate over fragments, inferred from breaks
        for i,j in zip([0]+breaks,breaks+[-1]):
            # Extract the fragment
            frg = j==-1 and seqss[i:] or seqss[i:j]

            # Iterate over backbone bonds
            # For Elnedyn it should get those from the structure (with fc=150000)
            self.bonds.extend([Bond(pair,category="BB") for pair in zip(frg,frg[1:])])

            # Iterate over backbone angles
            # Don't skip the first and last residue in the fragment
            # For Elnedyn it should get those from the structure (with fc=40)
            self.angles.extend([Angle(triple,category="BBB") for triple in zip(frg,frg[1:],frg[2:])])

            # Get backbone quadruples
            quadruples = zip(frg,frg[1:],frg[2:],frg[3:])

            # No i-1,i,i+1,i+2 interactions defined for Elnedyn
            # Once again, this construction works, but should be made in to a more sturdy construct,
            # in order to be more flexible
            if not Elnedyn:
                # Process dihedrals
                for q in quadruples:
                    id,rn,ss,ca = zip(*q)
                    # Maybe do local elastic networks
                    if ss == ("E","E","E","E") and not ExtendedDihedrals:
                        # This one may already be listed as the 2-4 bond of a previous one
                        if not (id[0],id[2]) in self.bonds:
                            self.bonds.append(Bond(atoms=(id[0],id[2]),parameters=ebonds['short'],type=1,
                                                   comments="%s(%s)-%s(%s) 1-3"%(rn[0],id[0],rn[2],id[2]),
                                                   category="Elastic short"))
                        self.bonds.append(Bond(atoms=(id[1],id[3]),parameters=ebonds['short'],type=1,
                                               comments="%s(%s)-%s(%s) 2-4"%(rn[1],id[1],rn[3],id[3]),
                                               category="Elastic short"))
                        self.bonds.append(Bond(atoms=(id[0],id[3]),parameters=ebonds['long'],type=1,
                                               comments="%s(%s)-%s(%s) 1-4"%(rn[0],id[0],rn[3],id[3]),
                                               category="Elastic long"))
                    else:
                        # Since dihedrals can return None, we first collect them separately and then
                        # add the non-None ones to the list
                        dihed = Dihedral(q,category="BBBB")
                        if dihed:
                            self.dihedrals.append(dihed)

            # Elnedyn does not use backbone-backbone-angles
            # This has to be put away in a nice way, so the script is more extendable to other FF's
            if not Elnedyn:
                # Backbone-Backbone-Sidechain angles
                # If the first residue has a sidechain, we take SBB, otherwise we skip it
                # For other sidechains, we 'just' take BBS
                if len(frg) > 1 and frg[1][0]-frg[0][0] > 1:
                    self.angles.append(Bond(atoms=(frg[0][0]+1,frg[0][0],frg[1][0]),parameters=bbsangle,type=2,
                                            comments="%s(%s)-%s(%s) SBB"%(frg[0][1],frg[0][2],frg[1][1],frg[1][2]),
                                            category="BBS"))
    
                # Start from first residue: connects sidechain of second residue
                for (ai,ni,si,ci),(aj,nj,sj,cj),s in zip(frg[0:],frg[1:],sc[1:]):
                    if s[0]:
                        self.angles.append(Bond(atoms=(ai,aj,aj+1),parameters=bbsangle,type=2,
                                                comments="%s(%s)-%s(%s) SBB"%(ni,si,nj,sj),
                                                category="BBS"))

        # Now do the atom list, and take the sidechains along
        #
        # AtomID AtomType ResidueID ResidueName AtomName ChargeGroup Charge ; Comments
        # 
        counter,atid  = 0,startAtom
        for resi,resname,bbb,sidechn,ss in zip(resid,self.sequence,bb,sc,self.secstruc):
            scatoms, bon_par, ang_par, dih_par = sidechn

            # Side chain bonded terms
            # Collect bond, angle and dihedral connectivity
            bon_con,ang_con,dih_con = (connectivity[len(scatoms)]+3*[[]])[:3]

            # Match connectivity with parameters (no need to do for dihedrals)
            # This might have to change if we incorporate Elnedyn: all parameters are given.
            #if bon_con: bon_par = [bon_par[0]] + [bon_par[1] for i in range(len(bon_con)) if i >0 ]
            #if ang_con: ang_par = (2*ang_par)[:len(ang_con)]

            # Side Chain Bonds/Constraints
            for atids,par in zip(bon_con,bon_par):
                if par[1] == None:
                    self.bonds.append(Bond(atoms=atids,parameters=[par[0]],type=1,
                                           comments=resname,category="Constraint"))
                else:
                    self.bonds.append(Bond(atoms=atids,parameters=par,type=1,
                                           comments=resname,category="SC"))
                # Shift the atom numbers
                self.bonds[-1] += atid

            # Side Chain Angles
            for atids,par in zip(ang_con,ang_par):
                self.angles.append(Angle(atoms=atids,parameters=par,type=2,
                                         comments=resname,category="SC"))
                # Shift the atom numbers
                self.angles[-1] += atid

            # Side Chain Dihedrals
            for atids,par in zip(dih_con,dih_par):
                self.dihedrals.append(Dihedral(atoms=atids,parameters=par,type=2,
                                               comments=resname,category="SC"))
                # Shift the atom numbers
                self.dihedrals[-1] += atid

            # All residue atoms
            for atype,aname in zip([bbb]+list(scatoms),CG.residue_bead_names):
                if self.multiscale:
                    atype,aname = "v"+atype,"v"+aname
                self.atoms.append((atid,atype,resi,resname,aname,atid,charges.get(atype,0),ss))
                # Doing this here save going over all the atoms onesmore.
                # Generate position restraints for all atoms or Backbone beads only.
                if 'all' in PosRes:
                    self.posres.append((atid)) 
                elif aname in PosRes:
                    self.posres.append((atid))
                if mapping:
                    self.mapping.append((atid,[i+shift for i in mapping[counter]]))
                atid    += 1
                counter += 1

        # The rubber bands are best applied outside of the chain class, as that gives
        # more control when chains need to be merged. The possibility to do it on the 
        # chain level is retained to allow building a complete chain topology in 
        # a straightforward manner after importing this script as module.
        if rubber and chain:
            rubberList = rubberBands(
                [(i[0],j[4:7]) for i,j in zip(self.atoms,chain.cg()) if i[4] in ElasticBeads],
                ElasticLowerBound,ElasticUpperBound,
                ElasticDecayFactor,ElasticDecayPower,
                ElasticMaximumForce,ElasticMinimumForce)
            self.bonds.extend([Bond(i,type=6,category="Rubber band") for i in rubberList])
        
        # Note the equivalent of atomistic atoms that have been processed 
        if chain and self.multiscale:
            self.natoms += len(chain.atoms())

    def fromNucleicAcidSequence(self,other):
        logging.warning('Nucleic Acid parameters are not available in MARTINI. Maybe *you* should create them?')
        pass

    def fromMoleculeList(self,other):
        pass

##  |||               |||  ##
##  ||| SERIOUS STUFF |||  ##
##  VVV               VVV  ##


#############
## 8 # MAIN #  -> @MAIN <-
#############


# Check whether to read from a gro/pdb file or from stdin
# We use an iterator to wrap around the stream to allow
# inferring the file type, without consuming lines already
inStream = streamTag(options["-f"] and options["-f"].value or sys.stdin)


# The streamTag iterator first yields the file type, which 
# is used to specify the function for reading frames
fileType = inStream.next()
if fileType == "GRO":
    frameIterator = groFrameIterator
else:
    frameIterator = pdbFrameIterator


## ITERATE OVER FRAMES IN STRUCTURE FILE ##

# Now iterate over the frames in the stream
# This should become a StructureFile class with a nice .next method
model     = 1
cgOutPDB  = None
ssTotal   = []
cysteines = []
for title,atoms,box in frameIterator(inStream):

    if fileType == "PDB":
        # The PDB file can have chains, in which case we list and process them specifically
        # TER statements are interpreted as chain separators
        # A chain may have breaks in which case the breaking residues are flagged
        chains = [ Chain([i for i in residues(chain)]) for chain in pdbChains(atoms) ]        
    else:
        # The GRO file does not define chains. Here breaks in the backbone are
        # interpreted as chain separators. 
        residuelist = [residue for residue in residues(atoms)]
        # The breaks are indices to residues
        broken = breaks(residuelist)
        # Reorder, such that each chain is specified with (i,j,k)
        # where i and j are the start and end of the chain, and 
        # k is a chain identifier
        chains = zip([0]+broken,broken+[len(residuelist)],range(len(broken)+1))
        chains = [ Chain(residuelist[i:j],name=chr(65+k)) for i,j,k in chains ]

    for chain in chains:
        chain.multiscale = "all" in multi or chain.id in multi

    # Check the chain identifiers
    if model == 1 and len(chains) != len(set([i.id for i in chains])):
        # Ending down here means that non-consecutive blocks of atoms in the 
        # PDB file have the same chain ID. The warning pertains to PDB files only, 
        # since chains from GRO files get a unique chain identifier assigned.
        logging.warning("Several chains have identical chain identifiers in the PDB file.")

    # Check if chains are of mixed type. If so, split them.
    # Note that in some cases HETATM residues are part of a 
    # chain. This will get problematic. But we cannot cover
    # all, probably.
    if not MixedChains:
        demixedChains = []
        for chain in chains:
            demixedChains.extend(chain.split())
        chains = demixedChains

    n = 1
    logging.info("Found %d chains:"%len(chains))
    for chain in chains:
        logging.info("  %2d:   %s (%s), %d atoms in %d residues."%(n,chain.id,chain.type(),chain.natoms,len(chain)))
        n += 1

    # Check all chains
    keep = []
    for chain in chains:
        if chain.type() == "Water":
            logging.info("Removing %d water molecules (chain %s)."%(len(chain),chain.id))
        elif chain.type() in ("Protein","Nucleic"):
            keep.append(chain)
        elif RetainHETATM:
            keep.append(chain)
        else:
            logging.info("Removing HETATM chain %s consisting of %d residues."%(chain.id,len(chain)))
    chains = keep

    
    # Check which chains need merging
    if model == 1:
        order, merge = check_merge(chains, mergeList, linkList, CystineCheckBonds and CystineMaxDist2)


    # Get the total length of the sequence
    seqlength = sum([len(chain) for chain in chains])
    logging.info('Total size of the system: %s residues.'%seqlength)


    ## SECONDARY STRUCTURE

    ss = '' 
    if Collagen:
        for chain in chains:
            chain.set_ss("F")
            ss += chain.ss
    elif options["-ss"]:
        # If the string given for the sequence consists strictly of upper case letters
        # and does not appear to be a file, assume it is the secondary structure
        # Is that safe or silly?
        if (options["-ss"].value.isalnum() and   
            options["-ss"].value.isupper() and  
            not os.path.exists(options["-ss"].value)):
            ss = options["-ss"].value
            logging.info('Secondary structure read from command-line:\n'+ss)
        else:
            # There ought to be a file with the name specified
            ssfile = [ i.strip() for i in open(options["-ss"].value) ]

            # Try to read the file as a Gromacs Secondary Structure Dump
            # Those have an integer as first line
            if ssfile[0].isdigit():
                logging.info('Will read secondary structure from file (assuming Gromacs ssdump).')
                ss = "".join([ i for i in ssfile[1:] ])
            else:
                # Get the secondary structure type from DSSP output
                logging.info('Will read secondary structure from file (assuming DSSP output).')
                pss = re.compile(r"^([ 0-9]{4}[0-9]){2}")
                ss  = "".join([i[16] for i in open(options["-ss"].value) if re.match(pss,i)])        
        
        # Now set the secondary structure for each of the chains
        sstmp = ss
        for chain in chains:
            ln = min(len(sstmp),len(chain)) 
            chain.set_ss(sstmp[:ln])
            sstmp = ss[:ln]                         
    else:
        if options["-dssp"]:
            method, executable = "dssp", options["-dssp"].value
        elif options["-pymol"]:
            method, executable = "pymol", options["-pymol"].value
        else:
            logging.warning("No secondary structure or determination method speficied. Protein chains will be set to 'COIL'.")
            method, executable = None, None

        for chain in chains:
            ss += chain.dss(method, executable)

        if method in ("dssp","pymol"):
            logging.debug('%s determined secondary structure:\n'%method.upper()+ss)

    # Collect the secondary structure classifications for different frames
    ssTotal.append(ss)    

    # Write the coarse grained structure if requested
    if options["-x"].value:
        logging.info("Writing coarse grained structure.")
        if cgOutPDB == None:
            cgOutPDB = open(options["-x"].value,"w")
        cgOutPDB.write("MODEL %8d\n"%model)
        cgOutPDB.write(title)
        cgOutPDB.write(pdbBoxString(box))
        atid = 1
        for i in order:
            ci = chains[i]
            if ci.multiscale:
                for r in ci.residues:
                    for name,resn,resi,chain,x,y,z in r:
                        insc  = resi>>20
                        resi -= insc<<20
                        cgOutPDB.write(pdbAtomLine%(atid,name,resn[:3],chain,resi,chr(insc),x,y,z,1,0))
                        atid += 1
            coarseGrained = ci.cg()
            if coarseGrained:
                for name,resn,resi,chain,x,y,z,ssid in coarseGrained:
                    insc  = resi>>20
                    resi -= insc<<20
                    if ci.multiscale:
                        name = "v"+name
                    cgOutPDB.write(pdbAtomLine%(atid,name,resn[:3],chain,resi,chr(insc),x,y,z,1,ssid))
                    atid += 1 
                cgOutPDB.write("TER\n")          
            else:
                logging.warning("No mapping for coarse graining chain %s (%s); chain is skipped."%(ci.id,ci.type()))
        cgOutPDB.write("ENDMDL\n")

    # Gather cysteine sulphur coordinates
    cyslist = [cys["SG"] for chain in chains for cys in chain["CYS"]]
    cysteines.append([cys for cys in cyslist if cys])

    model += 1


# Write the index file if requested
if options["-n"].value:
    logging.info("Writing index file.")
    NAA,NVZ,NCG = [],[],[]
    atid = 1
    for i in order:
        ci = chains[i]
        coarseGrained = ci.cg()
        if ci.multiscale:
            NAA.extend([" %5d"%(a+atid) for a in range(ci.natoms)]) 
            atid += ci.natoms
        if coarseGrained:
            if ci.multiscale:
                NVZ.extend([" %5d"%(a+atid) for a in range(len(coarseGrained))])
            else:
                NCG.extend([" %5d"%(a+atid) for a in range(len(coarseGrained))])
            atid += len(coarseGrained)               
    outNDX   = open(options["-n"].value,"w")
    outNDX.write("\n[ AA ]\n"+"\n".join([" ".join(NAA[i:i+15]) for i in range(0,len(NAA),15)]))
    outNDX.write("\n[ VZ ]\n"+"\n".join([" ".join(NVZ[i:i+15]) for i in range(0,len(NVZ),15)]))
    outNDX.write("\n[ CG ]\n"+"\n".join([" ".join(NCG[i:i+15]) for i in range(0,len(NCG),15)]))
    outNDX.close()


# Collect the secondary structure stuff and decide what to do with it
# First rearrange by the residue
ssTotal = zip(*ssTotal)
ssAver  = []
for i in ssTotal:
    si = list(set(i))
    if len(si) == 1:
        # Only one type -- consensus
        ssAver.append(si[0])
    else:
        # Transitions between secondary structure types
        i = list(i)
        si = [(1.0*i.count(j)/len(i),j) for j in si]
        si.sort()
        if si[-1][0] > options["-ssc"].value:
            ssAver.append(si[-1][1])
        else:
            ssAver.append(" ")

ssAver = "".join(ssAver)
logging.info('(Average) Secondary structure has been determined (see head of .itp-file).')


# Divide the secondary structure according to the division in chains
# This will set the secondary structure types to be used for the 
# topology.
for chain in chains:
    chain.set_ss(ssAver[:len(chain)])
    ss = ssAver[len(chain):]


# Now the chains are complete, each consisting of a residuelist, 
# and a secondary structure designation if the chain is of type 'Protein'.
# There may be mixed chains, there may be HETATM things. 
# Water has been discarded. Maybe this has to be changed at some point.
# The order in the coarse grained files matches the order in the set of chains.
#
# If there are no merges to be done, i.e. no global Elnedyn network, no 
# disulphide bridges, no links, no distance restraints and no explicit merges,
# then we can write out the topology, which will match the coarse grained file.
#
# If there are merges to be done, the order of things may be changed, in which
# case the coarse grained structure will not match with the topology...


## CYSTINE BRIDGES ##

# Extract the cysteine coordinates (for all frames) and the cysteine identifiers
if CystineCheckBonds:
    logging.info("Checking for cystine bridges, based on sulphur (SG) atoms lying closer than %.4f nm"%math.sqrt(CystineMaxDist2/100))

    cyscoord  = zip(*[[j[4:7] for j in i] for i in cysteines])
    cysteines = [i[:4] for i in cysteines[0]]

    bl, kb    = special[(("SC1","CYS"),("SC1","CYS"))]

    # Check the distances and add the cysteines to the link list if the 
    # SG atoms have a distance smaller than the cutoff.
    rlc = range(len(cysteines))
    for i in rlc[:-1]:
        for j in rlc[i+1:]:
            # Checking the minimum distance over all frames
            # But we could also take the maximum, or the mean
            d2 = min([distance2(a,b) for a,b in zip(cyscoord[i],cyscoord[j])])
            if d2 <= CystineMaxDist2:
                a, b = cysteines[i], cysteines[j]
                logging.info("Detected SS bridge between %s and %s (%f nm)"%(a,b,math.sqrt(d2)/10))
                linkListCG.append((("SC1","CYS",a[2],a[3]),("SC1","CYS",b[2],b[3]),bl,kb))


## REAL ITP STUFF ##

# Check whether we have identical chains, in which case we 
# only write the ITP for one...
# This means making a distinction between chains and 
# moleculetypes.

molecules = [tuple([chains[i] for i in j]) for j in merge]

# At this point we should have a list or dictionary of chains
# Each chain should be given a unique name, based on the value
# of options["-o"] combined with the chain identifier and possibly
# a number if there are chains with identical identifiers.
# For each chain we then write an ITP file using the name for 
# moleculetype and name + ".itp" for the topology include file.
# In addition we write a master topology file, using the value of
# options["-o"], with an added extension ".top" if not given.

# *NOTE*: This should probably be gathered in a 'Universe' class
itp = 0
moleculeTypes = {}
for mi in range(len(molecules)):
    mol = molecules[mi]
    # Check if the moleculetype is already listed
    # If not, generate the topology from the chain definition
    if not mol in moleculeTypes or SeparateTop:
        # Name of the moleculetype
        # NOTE: The naming should be changed; now it becomes Protein_X+Protein_Y+...
        name = "+".join([chain.getname(options['-name'].value) for chain in mol])
        moleculeTypes[mol] = name

        # Write the molecule type topology
        top = Topology(mol[0],name=name)
        for m in mol[1:]:
            top += Topology(m)

        # Have to add the connections, like the connecting network
        # Gather coordinates
        mcg, coords = zip(*[(j[:4],j[4:7]) for m in mol for j in m.cg()])
        mcg         = list(mcg)

        # Run through the link list and add connections
        for atomA,atomB,bondlength,forceconst in linkListCG:
            if bondlength == -1 and forceconst == -1:
                bondlength, forceconst = special[(atomA[:2],atomB[:2])]
            # Check whether this link applies to this group
            atomA = atomA in mcg and mcg.index(atomA)+1
            atomB = atomB in mcg and mcg.index(atomB)+1
            if atomA and atomB:
                cat = (mcg[atomA][1] == "CYS" and mcg[atomB][1] == "CYS") and "Cystine" or "Link"
                top.bonds.append(Bond((atomA,atomB),type=1,parameters=(bondlength,forceconst),category=cat))

        # Elastic Network
        # The elastic network is added after the topology is constructed, since that
        # is where the correct atom list with numbering and the full set of 
        # coordinates for the merged chains are available. 
        if ElasticNetwork:
	    rubberType = Elnedyn and 1 or 6
            rubberList = rubberBands(
                [(i[0],j) for i,j in zip(top.atoms,coords) if i[4] in ElasticBeads],
                ElasticLowerBound,ElasticUpperBound,
                ElasticDecayFactor,ElasticDecayPower,
                ElasticMaximumForce,ElasticMinimumForce)
            top.bonds.extend([Bond(i,type=rubberType,category="Rubber band") for i in rubberList])

        # Write out the MoleculeType topology
        destination = options["-o"] and open(moleculeTypes[mol]+".itp",'w') or sys.stdout
        destination.write(str(top))        

        itp += 1

    # Check whether other chains are equal to this one 
    # Skip this step if we are to write all chains to separate moleculetypes
    if not SeparateTop:
        for j in range(mi+1,len(molecules)):
            if not molecules[j] in moleculeTypes and mol == molecules[j]:
                # Molecule j is equal to a molecule mi
                # Set the name of the moleculetype to the one of that molecule
                moleculeTypes[molecules[j]] = moleculeTypes[mol]

logging.info('Written %d ITP file%s'%(itp,itp>1 and "s" or ""))
        

#----+--------------------------------------------
## B # MORE WORK -- WRITING THE MASTER TOPOLOGY ##
#----+--------------------------------------------


# Processing stuff

# Output stream
top  = options["-o"] and open(options['-o'].value,'w') or sys.stdout

# ITP file listing
itps = '\n'.join(['#include "%s.itp"'%molecule for molecule in set(moleculeTypes.values())])

# Molecule listing
logging.info("Output contains %d molecules:"%len(molecules))
n = 1
for molecule in molecules:
    stuff = (n, moleculeTypes[molecule], len(molecule)>1 and "s" or " ", " ".join([i.id for i in molecule]))
    logging.info("  %2d->  %s (chain%s %s)"%stuff)
    n += 1
molecules   = '\n'.join(['%s \t 1'%moleculeTypes[molecule] for molecule in molecules])

# Set a define if we are to use rubber bands
useRubber   = ElasticNetwork and "#define RUBBER_BANDS" or ""

# Do not set a define for position restrains here, as people are more used to do it in mdp file?

top.write('''
#include "martini.itp"
; #include "martini_v2.0_lipids.itp"
; #include "W.itp"

%s

%s

[ system ]
; name
Martini system from %s

[ molecules ]
; name        number
%s
''' % (useRubber, itps, options["-f"] and options["-f"].value or "stdin", molecules))

logging.info('Written topology files')

print "\n\tThere you are. One MARTINI. Shaken, not stirred.\n"

Q = martiniq.pop(random.randint(0,len(martiniq)-1))
print "\n", Q[1], "\n%80s"%("--"+Q[0]), "\n"