chapter 11


3-Dimensional Molecular Structure Tables

In this chapter, various ways are discussed in which tables might be used to store 3-dimensional molecular structures. In these tables, each row represents a structure. The columns contain molecular properties, which may consist of arrays of atom properties. In previous chapters, the use of new data types was introduced to improve the way some data are stored and searched. Array data types were suggested as a way of storing atomic coordinates for a molecule. In this chapter, other ways will be shown in which molecular structures can be stored and searched in a RDBMS. These include the use of SMILES and entire files from external sources, such as molfiles or SDF files. Methods are shown to input, output, convert and search molecular structures from within the database. The SQL statements shown are valid in PostgreSQL, but may not be valid using other RDBMS. For example, the array data type is implemented differently in Oracle compared to PostgreSQL. [view wordle] [hide wordle]

Page  
126
Create Schema vla4;
Create Table vla4.sdf (id Integer, molfile Text);
Create Table vla4.structure (id Integer, name Text, cansmiles Text, coord Float[][3], atom Integer[]);
Create Table vla4.property (id Integer, name Text, tvalue Text, nvalue Numeric);
127
print <<EOSQL;
  Create Schema vla4;
  Create Sequence vla4.structure_id_seq;
  Create Table vla4.sdf (id Integer
  Default Nextval('vla4.structure_id_seq'), molfile Text);
  Create Table vla4.structure (id Integer Primary Key
  Default Nextval('vla4.structure_id_seq'), name Text, cansmiles Text coord Numeric[][3], atom Integer[]);
  Create Table vla4.property (id Integer References vla4.structure (id), name Text, tvalue Text, nvalue Numeric);
  Copy vla4.sdf (molfile) From Stdin;
EOSQL
while (<stdin>) {
  if (/\$\$\$\$/) {
    print;
  } else {
    s/\r//; chomp; print; print "\\n";
  }
}
perl loader <vla-4.sdf | psql mydb
128
Create Type openbabel.mol As (name Text, cansmiles Text, coords Float[][3], atoms Integer[]); 
Insert Into vla4.structure (id, name, cansmiles, coords, atoms)
 Select id, (openbabel.molfile_mol(molfile)).* from vla4.sdf;
       
129
Alter Table vla4.sdf Add Constraint sdf_id_fk Foreign Key (id) References vla4.structure (id); 
Create Type openbabel.named_property As (name Text, value Text); 
130
Insert Into vla4.property (id, name, tvalue)
Select id, (molfile_properties).name, (molfile_properties).value From 
  (Select id, openbabel.molfile_properties(molfile) From vla4.sdf) atmp
Update vla4.property Set nvalue = tvalue::numeric
  Where tvalue ~ E'^[+-]?[0-9]+(\\\\.[0-9]*)?([Ee][+-]?[0-9]+)?\$';
131
Select smiles,smiles_to_symbols(smiles), smiles_to_bonds(smiles)
  from nci.structure where cas = '1467-70-5';
132
def getdata(self, name, table): 
  _insql = "select smiles_to_symbols(cansmiles) as symbols, coords from %s where name=''%s''" % (table, name) 
  _sql = "select * from symbol_coords('%s') as (symbol text, x numeric, y numeric, z numeric)" % (_insql) 
  for (_sym, _x, _y, _z) in ((self.conn).query(_sql).getresult()): 
    (self.symbols).append(_sym) 
    (self.coords).append( (_x, _y, _z) ) 
    self.natoms = len(self.symbols) 
  return 
select * from
 symbol_coords('select openbabel.smiles_to_symbols(cansmiles) as symbols,
 coords from vla4.structure where name=''BMCL-1051-38''')
 as (symbol text, x numeric, y numeric, z numeric);
133
Create Or Replace Function align(amol float[][3], float[][3]) Returns float[3] As $$ 
 Select difference(center($1), center($2)); 
$$ Language SQL Immutable;        
Create Or Replace Function center(amol float[][3]) Returns float[3] As $$ 
  Declare 
    centrum Float[3] := Array[0., 0., 0.]; 
    natoms Integer := array_upper(amol, 1); 
    i Integer; 
  Begin 
    For i In 1 .. natoms Loop 
      centrum[1] = centrum[1] + amol[i][1]; 
      centrum[2] = centrum[2] + amol[i][2]; 
      centrum[3] = centrum[3] + amol[i][3];
    End Loop; 
    centrum[1] = centrum[1] / natoms; 
    centrum[2] = centrum[2] / natoms; 
    centrum[3] = centrum[3] / natoms; 
    Return centrum; 
  End; 
$$ Language plpgsql Immutable; 
134
Create Or Replace Function difference(float[3], float[3]) Returns float[3] As $$ 
  Select ARRAY[$2[1] - $1[1], $2[2] - $1[2], $2[3] - $1[3]]; 
$$ Language SQL Immutable; 
Select ref.name as ref, others.name, align(ref.coords, others.coords) from
  (Select name, coords from vla4.structure where name = 'BMCL-805-1') ref,
  (Select name, coords from vla4.structure) others;
Select ref.name as ref, others.name, align(ref.coords, others.coords) from
  (Select name, subset(cansmiles,'CSC', coords) as coords
     from vla4.structure where name = 'BMCL-805-1') ref,
  (Select name, subset(cansmiles, 'CSC', coords) as coords
     from vla4.structure Where matches(cansmiles, 'CSC')) others
Create Or Replace Function subset(smiles text, smarts text, coords float[][3]) Returns float[][3] As $$ 
  Declare 
    scoords Float[][3]; 
    atoms Integer[]; 
    i Integer; 
    j Integer; 
    nmatch Integer; 
  Begin 
    Select gnova.list_matches($1, $2, 1, 1) into atoms; 
    If atoms Is Null Then 
      Return Null; 
    End If; 
    nmatch = array_upper(atoms, 1); 
    i = atoms[1]; 
    scoords = coords[i:i][1:3]; 
    For j In 2 .. nmatch Loop 
      i = atoms[j]; 
      scoords = scoords || coords[i:i][1:3]; 
    End Loop; 
    --Return scoords[2:nmatch+1]; 
    Return scoords; 
  End; 
$$ Language plpgsql Strict Immutable; 
Comment On Function subset(text, text, float[]) Is 
  'Return subset of molecule''s atomic coordinates for atoms matching SMARTS';
135  
136
Select structure.cansmiles, structure.name, conformation.energy, conformation.coords
  From vla4.structure Join vla4.conformation Using (id) Where name = 'BMCL-1051-14';