chapter 11

3-Dimensional Molecular Structure Tables

In this chapter, various ways are discussed in which tables might be used to store 3-dimensional molecular structures. In these tables, each row represents a structure. The columns contain molecular properties, which may consist of arrays of atom properties. In previous chapters, the use of new data types was introduced to improve the way some data are stored and searched. Array data types were suggested as a way of storing atomic coordinates for a molecule. In this chapter, other ways will be shown in which molecular structures can be stored and searched in a RDBMS. These include the use of SMILES and entire files from external sources, such as molfiles or SDF files. Methods are shown to input, output, convert and search molecular structures from within the database. The SQL statements shown are valid in PostgreSQL, but may not be valid using other RDBMS. For example, the array data type is implemented differently in Oracle compared to PostgreSQL. [view wordle] [hide wordle]

Create Schema vla4;
Create Table vla4.sdf (id Integer, molfile Text);
Create Table vla4.structure (id Integer, name Text, cansmiles Text, coord Float[][3], atom Integer[]);
Create Table (id Integer, name Text, tvalue Text, nvalue Numeric);
print <<EOSQL;
  Create Schema vla4;
  Create Sequence vla4.structure_id_seq;
  Create Table vla4.sdf (id Integer
  Default Nextval('vla4.structure_id_seq'), molfile Text);
  Create Table vla4.structure (id Integer Primary Key
  Default Nextval('vla4.structure_id_seq'), name Text, cansmiles Text coord Numeric[][3], atom Integer[]);
  Create Table (id Integer References vla4.structure (id), name Text, tvalue Text, nvalue Numeric);
  Copy vla4.sdf (molfile) From Stdin;
while (<stdin>) {
  if (/\$\$\$\$/) {
  } else {
    s/\r//; chomp; print; print "\\n";
perl loader <vla-4.sdf | psql mydb
Create Type openbabel.mol As (name Text, cansmiles Text, coords Float[][3], atoms Integer[]); 
Insert Into vla4.structure (id, name, cansmiles, coords, atoms)
 Select id, (openbabel.molfile_mol(molfile)).* from vla4.sdf;
Alter Table vla4.sdf Add Constraint sdf_id_fk Foreign Key (id) References vla4.structure (id); 
Create Type openbabel.named_property As (name Text, value Text); 
Insert Into (id, name, tvalue)
Select id, (molfile_properties).name, (molfile_properties).value From 
  (Select id, openbabel.molfile_properties(molfile) From vla4.sdf) atmp
Update Set nvalue = tvalue::numeric
  Where tvalue ~ E'^[+-]?[0-9]+(\\\\.[0-9]*)?([Ee][+-]?[0-9]+)?\$';
Select smiles,smiles_to_symbols(smiles), smiles_to_bonds(smiles)
  from nci.structure where cas = '1467-70-5';
def getdata(self, name, table): 
  _insql = "select smiles_to_symbols(cansmiles) as symbols, coords from %s where name=''%s''" % (table, name) 
  _sql = "select * from symbol_coords('%s') as (symbol text, x numeric, y numeric, z numeric)" % (_insql) 
  for (_sym, _x, _y, _z) in ((self.conn).query(_sql).getresult()): 
    (self.coords).append( (_x, _y, _z) ) 
    self.natoms = len(self.symbols) 
select * from
 symbol_coords('select openbabel.smiles_to_symbols(cansmiles) as symbols,
 coords from vla4.structure where name=''BMCL-1051-38''')
 as (symbol text, x numeric, y numeric, z numeric);
Create Or Replace Function align(amol float[][3], float[][3]) Returns float[3] As $$ 
 Select difference(center($1), center($2)); 
$$ Language SQL Immutable;        
Create Or Replace Function center(amol float[][3]) Returns float[3] As $$ 
    centrum Float[3] := Array[0., 0., 0.]; 
    natoms Integer := array_upper(amol, 1); 
    i Integer; 
    For i In 1 .. natoms Loop 
      centrum[1] = centrum[1] + amol[i][1]; 
      centrum[2] = centrum[2] + amol[i][2]; 
      centrum[3] = centrum[3] + amol[i][3];
    End Loop; 
    centrum[1] = centrum[1] / natoms; 
    centrum[2] = centrum[2] / natoms; 
    centrum[3] = centrum[3] / natoms; 
    Return centrum; 
$$ Language plpgsql Immutable; 
Create Or Replace Function difference(float[3], float[3]) Returns float[3] As $$ 
  Select ARRAY[$2[1] - $1[1], $2[2] - $1[2], $2[3] - $1[3]]; 
$$ Language SQL Immutable; 
Select as ref,, align(ref.coords, others.coords) from
  (Select name, coords from vla4.structure where name = 'BMCL-805-1') ref,
  (Select name, coords from vla4.structure) others;
Select as ref,, align(ref.coords, others.coords) from
  (Select name, subset(cansmiles,'CSC', coords) as coords
     from vla4.structure where name = 'BMCL-805-1') ref,
  (Select name, subset(cansmiles, 'CSC', coords) as coords
     from vla4.structure Where matches(cansmiles, 'CSC')) others
Create Or Replace Function subset(smiles text, smarts text, coords float[][3]) Returns float[][3] As $$ 
    scoords Float[][3]; 
    atoms Integer[]; 
    i Integer; 
    j Integer; 
    nmatch Integer; 
    Select gnova.list_matches($1, $2, 1, 1) into atoms; 
    If atoms Is Null Then 
      Return Null; 
    End If; 
    nmatch = array_upper(atoms, 1); 
    i = atoms[1]; 
    scoords = coords[i:i][1:3]; 
    For j In 2 .. nmatch Loop 
      i = atoms[j]; 
      scoords = scoords || coords[i:i][1:3]; 
    End Loop; 
    --Return scoords[2:nmatch+1]; 
    Return scoords; 
$$ Language plpgsql Strict Immutable; 
Comment On Function subset(text, text, float[]) Is 
  'Return subset of molecule''s atomic coordinates for atoms matching SMARTS';
Select structure.cansmiles,,, conformation.coords
  From vla4.structure Join vla4.conformation Using (id) Where name = 'BMCL-1051-14';