In this chapter, various ways are discussed in which tables might be used to
store 3-dimensional molecular structures. In these tables, each row represents
a structure. The columns contain molecular properties, which may consist of
arrays of atom properties. In previous chapters, the use of new data types was
introduced to improve the way some data are stored and searched. Array data
types were suggested as a way of storing atomic coordinates for a molecule.
In this chapter, other ways will be shown in which molecular structures can
be stored and searched in a RDBMS. These include the use of SMILES and entire
files from external sources, such as molfiles or SDF files. Methods are shown
to input, output, convert and search molecular structures from within the database.
The SQL statements shown are valid in PostgreSQL, but may not be valid using
other RDBMS. For example, the array data type is implemented differently in
Oracle compared to PostgreSQL. [view
wordle] [hide wordle]
| Page |
|
| 126 |
Create Schema vla4;
Create Table vla4.sdf (id Integer, molfile Text);
Create Table vla4.structure (id Integer, name Text, cansmiles Text, coord Float[][3], atom Integer[]);
Create Table vla4.property (id Integer, name Text, tvalue Text, nvalue Numeric); |
| 127 |
print <<EOSQL;
Create Schema vla4;
Create Sequence vla4.structure_id_seq;
Create Table vla4.sdf (id Integer
Default Nextval('vla4.structure_id_seq'), molfile Text);
Create Table vla4.structure (id Integer Primary Key
Default Nextval('vla4.structure_id_seq'), name Text, cansmiles Text coord Numeric[][3], atom Integer[]);
Create Table vla4.property (id Integer References vla4.structure (id), name Text, tvalue Text, nvalue Numeric);
Copy vla4.sdf (molfile) From Stdin;
EOSQL
while (<stdin>) {
if (/\$\$\$\$/) {
print;
} else {
s/\r//; chomp; print; print "\\n";
}
}
perl loader <vla-4.sdf | psql mydb |
| 128 |
Create Type openbabel.mol As (name Text, cansmiles Text, coords Float[][3], atoms Integer[]);
Insert Into vla4.structure (id, name, cansmiles, coords, atoms)
Select id, (openbabel.molfile_mol(molfile)).* from vla4.sdf;
|
| 129 |
Alter Table vla4.sdf Add Constraint sdf_id_fk Foreign Key (id) References vla4.structure (id);
Create Type openbabel.named_property As (name Text, value Text); |
| 130 |
Insert Into vla4.property (id, name, tvalue)
Select id, (molfile_properties).name, (molfile_properties).value From
(Select id, openbabel.molfile_properties(molfile) From vla4.sdf) atmp
Update vla4.property Set nvalue = tvalue::numeric
Where tvalue ~ E'^[+-]?[0-9]+(\\\\.[0-9]*)?([Ee][+-]?[0-9]+)?\$'; |
| 131 |
Select smiles,smiles_to_symbols(smiles), smiles_to_bonds(smiles)
from nci.structure where cas = '1467-70-5'; |
| 132 |
def getdata(self, name, table):
_insql = "select smiles_to_symbols(cansmiles) as symbols, coords from %s where name=''%s''" % (table, name)
_sql = "select * from symbol_coords('%s') as (symbol text, x numeric, y numeric, z numeric)" % (_insql)
for (_sym, _x, _y, _z) in ((self.conn).query(_sql).getresult()):
(self.symbols).append(_sym)
(self.coords).append( (_x, _y, _z) )
self.natoms = len(self.symbols)
return
select * from
symbol_coords('select openbabel.smiles_to_symbols(cansmiles) as symbols,
coords from vla4.structure where name=''BMCL-1051-38''')
as (symbol text, x numeric, y numeric, z numeric); |
| 133 |
Create Or Replace Function align(amol float[][3], float[][3]) Returns float[3] As $$
Select difference(center($1), center($2));
$$ Language SQL Immutable;
Create Or Replace Function center(amol float[][3]) Returns float[3] As $$
Declare
centrum Float[3] := Array[0., 0., 0.];
natoms Integer := array_upper(amol, 1);
i Integer;
Begin
For i In 1 .. natoms Loop
centrum[1] = centrum[1] + amol[i][1];
centrum[2] = centrum[2] + amol[i][2];
centrum[3] = centrum[3] + amol[i][3];
End Loop;
centrum[1] = centrum[1] / natoms;
centrum[2] = centrum[2] / natoms;
centrum[3] = centrum[3] / natoms;
Return centrum;
End;
$$ Language plpgsql Immutable; |
| 134 |
Create Or Replace Function difference(float[3], float[3]) Returns float[3] As $$
Select ARRAY[$2[1] - $1[1], $2[2] - $1[2], $2[3] - $1[3]];
$$ Language SQL Immutable;
Select ref.name as ref, others.name, align(ref.coords, others.coords) from
(Select name, coords from vla4.structure where name = 'BMCL-805-1') ref,
(Select name, coords from vla4.structure) others;
Select ref.name as ref, others.name, align(ref.coords, others.coords) from
(Select name, subset(cansmiles,'CSC', coords) as coords
from vla4.structure where name = 'BMCL-805-1') ref,
(Select name, subset(cansmiles, 'CSC', coords) as coords
from vla4.structure Where matches(cansmiles, 'CSC')) others
Create Or Replace Function subset(smiles text, smarts text, coords float[][3]) Returns float[][3] As $$
Declare
scoords Float[][3];
atoms Integer[];
i Integer;
j Integer;
nmatch Integer;
Begin
Select gnova.list_matches($1, $2, 1, 1) into atoms;
If atoms Is Null Then
Return Null;
End If;
nmatch = array_upper(atoms, 1);
i = atoms[1];
scoords = coords[i:i][1:3];
For j In 2 .. nmatch Loop
i = atoms[j];
scoords = scoords || coords[i:i][1:3];
End Loop;
--Return scoords[2:nmatch+1];
Return scoords;
End;
$$ Language plpgsql Strict Immutable;
Comment On Function subset(text, text, float[]) Is
'Return subset of molecule''s atomic coordinates for atoms matching SMARTS';
|
| 135 |
|
| 136 |
Select structure.cansmiles, structure.name, conformation.energy, conformation.coords
From vla4.structure Join vla4.conformation Using (id) Where name = 'BMCL-1051-14'; |