Appendix
This Appendix contains SQL functions and tables too large or complex for the explanatory nature of the earlier chapters. These functions and tables are practical, rather than explanatory. They all follow PostgreSQL syntax. Some of them require the core functions described in Chapter 7 of this book, e.g. matches, cansmiles and count_matches. Those functions are available in the CHORD product from gNova, Inc. This Appendix also contains a PerlMol implementation, a FROWNS implementation and an OpenBabel implementation of the core functions for PostgreSQL. [view wordle] [hide wordle]
| Page |
|
|---|---|
| 174 | Create or Replace Function ctable(text) Returns Setof Record As $EOSQL$
-- Called with $1 as SQL selecting integer[] as bonds
-- Example caller:
-- select * from
-- ctable('select smiles_to_bonds(cansmiles) as bonds
-- from vla4.structure where name=''BMCL-1051-38''')
-- as (atom1 integer, atom2 integer, bond_order integer);
Declare
bonds Record;
b Record;
i Integer;
Begin
For b In Execute $1 Loop
For i in 1 .. array_upper(b.bonds,1) Loop
Select b.bonds[i][1], b.bonds[i][2], b.bonds[i][3] Into bonds;
Return Next bonds;
End Loop;
End Loop;
End;
$EOSQL$ Language plpgsql;
Create or Replace Function symbol_coords(text) Returns Setof Record As $EOSQL$
-- Called with $1 as SQL selecting text[] as symbols, numeric[] as coords
-- Example caller:
-- select * from
-- symbol_coords('select smiles_to_symbols(cansmiles) as symbols,
-- coords from vla4.structure where name=''BMCL-1051-38''')
-- as (symbol text, x numeric, y numeric, z numeric);
Declare
sym_coord Record;
sc Record;
i Integer;
Begin
For sc In Execute $1 Loop
For i in 1 .. array_upper(sc.symbols,1) Loop
Select sc.symbols[i], sc.coords[i][1], sc.coords[i][2], sc.coords[i][3] Into sym_coord;
Return Next sym_coord;
End Loop;
End Loop;
End;
$EOSQL$ Language plpgsql;
|
| 175 | -- New table to hold all references identified by unique id Create Table literature_refs (refid Serial, reference Text); -- Populate table with unique references found in logp table Insert Into literature_refs (reference) Select Distinct ref From logp Group By ref; -- Create column in logp to hold reference id instead of reference Alter Table logp Add Column refid integer; -- Populate logp table's reference id column with appropriate values Update logp Set refid = (Select refid From literature_refs Where ref=reference); -- No need for reference column anymore Alter Table logp Drop Column ref; Select cas, reference From logp Join literature_refs Using (refid); |
| 176 | Create Function public166keys(character varying) Returns bit(166) As $EOSQL$
Select orsum(bit_set(0::Bit(166), bit)) From public166keys Where openbabel.count_matches($1,smarts) > count;
$EOSQL$ Language SQL;
Create Aggregate orsum ( Basetype = bit, Sfunc = bitor, Stype = bit ); |
| 177 | Create Function tanimoto(bit, bit) Returns Real As $EOSQL$ Select nbits_set($1 & $2)::real / (nbits_set($1) + nbits_set($2) - nbits_set($1 & $2))::real; $EOSQL$ Language SQL; Create Function euclid(bit, bit) Returns Real As $EOSQL$
Select sqrt((nbits_set($1 & $2) + nbits_set(~$1 & ~$2))::real /length($1))::real;
$EOSQL$ Language SQL;
Create Function hamming(bit, bit) Returns Real As $EOSQL$
Select ((nbits_set($1 & ~$2) + nbits_set(~$1 & $2))::real / length($1))::real;
$EOSQL$ Language SQL;
Create Or Replace Function nbits_set(bits bit) Returns Integer As $EOPY$
return bits.count('1');
$EOPY$ Language plpythonu Immutable; |
| 178 | download average molecular weight table A.2 |
| 179 | |
| 180 | |
| 181 | Create Function gnova.tpsa(character varying) Returns Numeric As $EOSQL$ Select sum(psa*count_matches($1,smarts)) From tpsa; $EOSQL$ Language SQL; |
| 182 | |
| 183 | download public166keys table A.4 |
| 184 | |
| 185 | |
| 186 | |
| 187 | |
| 188 | createlang plperl createlang plpythonu download PerlMol core function definitions sudo -u postgres psql mydb <perlmol-core.sql |
| 189 | |
| 190 | |
| 191 | |
| 192 | download FROWNS core function definitions sudo -u postgres psql mydb <frowns-core.sql |
| 193 | |
| 194 | |
| 195 | |
| 196 | |
| 197 | download OpenBabel core function definitions |
| 198 | |
| 199 | |
| 200 | |
| 201 | |
| 202 | |
| 203 | sudo -u postgres psql mydb <openbabel-core.sql |
| 204 | #include "postgres.h"
/* general Postgres declarations */
#include "fmgr.h" /* for argument/result macros */
#include "executor/executor.h" /* for GetAttributeByName() */
#include "utils/varbit.h" /* These prototypes just prevent possible warnings from gcc. */
Datum nbits_set(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(nbits_set);
Datum nbits_set(PG_FUNCTION_ARGS) {
/* how many bits are set in a bitstring? */
VarBit *a = PG_GETARG_VARBIT_P(0);
int n=0;
/*
* VARBITLENTOTAL is the total size of the struct in bytes.
* VARBITLEN is the size of the string in bits.
* VARBITBYTES is the size of the string in bytes.
* VARBITHDRSZ is the total size of the header in bytes.
* VARBITS is a pointer to the data region of the struct.
*/
int i;
unsigned char *ap = VARBITS(a);
unsigned char aval;
for (i=0; i < VARBITBYTES(a); ++i) {
aval = *ap; ++ap;
if (aval == 0) continue;
if (aval & 1) ++n;
if (aval & 2) ++n;
if (aval & 4) ++n;
if (aval & 8) ++n;
if (aval & 16) ++n;
if (aval & 32) ++n;
if (aval & 64) ++n;
if (aval & 128) ++n;
}
PG_RETURN_INT32(n);
}
gcc -shared -o bits.so -I/usr/include/postgresql/8.2/server bits.c cp bits.so /usr/lib/postgresql/8.2/lib/bits.so |
| 205 | Create Or Replace Function nbits_set(bit) Returns integer AS 'bits', 'nbits_set' Language c Immutable Strict; Comment On Function nbits_set(bit) Is 'number of bits set'; |
| 206 | |
| 207 | #! /usr/bin/perl
$schema = $ARGV[0];
die "Schema name required\nusage: loader schema\n" unless ($schema);
print <<EOSQL;
Drop Schema If Exists $schema Cascade;
Create Schema $schema;
Create Sequence $schema.structure_id_seq;
Create Table $schema.structure (
id Integer Primary Key Default Nextval('$schema.structure_id_seq'),
name Text,
smiles Text,
isosmiles Text,
fp Bit Varying);
Copy $schema.structure (smiles,name) From Stdin;
EOSQL
while (<stdin>) {
s/\r//; chomp;
($smi,$name) = split;
print "$smi\t$name\n";
}
print <<EOSQL;
\\.
set search_path=openbabel;
Update $schema.structure Set fp=fp(smiles), isosmiles=isosmiles(smiles) Where valid(smiles);
EOSQL
perl smiloader drugs <drugs.smi | psql mydb |
| 208 | #! /usr/bin/perl$schema = $ARGV[0];
die "Schema name required\nusage: loader schema\n" unless ($schema);
print <<EOSQL;
Drop Schema If Exists $schema Cascade;
Create Schema $schema;
Create Sequence $schema.structure_id_seq;
Create Table $schema.sdf (id Integer Default Nextval('$schema.structure_id_seq'), molfile Text);
Create Table $schema.structure (
id Integer Primary Key Default Nextval('$schema.structure_id_seq'),
name Text,
isosmiles Text,
cansmiles Text,
fp Bit Varying, coords Numeric[][3],
atoms Integer[]);
Create Table $schema.property (
id Integer References $schema.structure (id),
name Text,
tvalue Text,
nvalue Numeric);
Copy $schema.sdf (molfile) From Stdin;
EOSQL
while (<stdin>) {
if (/\$\$\$\$/) {
print;
} else {
s/\r//; chomp; print; print "\\n";
}
}
print <<EOSQL;
\\.
set search_path=openbabel;
Insert Into $schema.structure (id, name, isosmiles, coords, atoms)
Select id, (molfile_mol(molfile)).* from $schema.sdf;
Update $schema.structure Set cansmiles=cansmiles(isosmiles) Where valid(isosmiles);
Update $schema.structure Set fp=fp(cansmiles) Where valid(cansmiles);
Alter Table $schema.sdf Add Constraint sdf_id_fk Foreign Key (id) References $schema.structure (id);
Insert into $schema.property (id, name, tvalue) select id, (p).name, (p).value from
(select id, molfile_properties(molfile) as p from $schema.sdf) atmp;
-- This regexp may not catch all numeric values
Update $schema.property Set nvalue = tvalue::numeric
Where tvalue ~ E'^[+-]?[0-9]+(\\\\.[0-9]*)?([Ee][+-]?[0-9]+)?\$';
-- You may choose to name colums to be converted to numeric
--Update $schema.property Set nvalue = tvalue::numeric
-- Where Name = 'IC50_uM';
EOSQL
perl sdfloader vla4 <vla-4.sdf | psql mydb |
| 209 | download drugs.smi |