Appendix

This Appendix contains SQL functions and tables too large or complex for the explanatory nature of the earlier chapters. These functions and tables are practical, rather than explanatory. They all follow PostgreSQL syntax. Some of them require the core functions described in Chapter 7 of this book, e.g. matches, cansmiles and count_matches. Those functions are available in the CHORD product from gNova, Inc. This Appendix also contains a PerlMol implementation, a FROWNS implementation and an OpenBabel implementation of the core functions for PostgreSQL. [view wordle] [hide wordle]

Page

 

174
Create or Replace Function ctable(text) Returns Setof Record As $EOSQL$        
-- Called with $1 as SQL selecting integer[] as bonds 
-- Example caller: 
-- select * from 
-- ctable('select smiles_to_bonds(cansmiles) as bonds 
-- from vla4.structure where name=''BMCL-1051-38''') 
-- as (atom1 integer, atom2 integer, bond_order integer);

  Declare 
    bonds Record; 
    b Record; 
    i Integer; 
  Begin 
    For b In Execute $1 Loop 
      For i in 1 .. array_upper(b.bonds,1) Loop 
        Select b.bonds[i][1], b.bonds[i][2], b.bonds[i][3] Into bonds; 
        Return Next bonds; 
      End Loop; 
    End Loop; 
  End; 
$EOSQL$ Language plpgsql;

Create or Replace Function symbol_coords(text) Returns Setof Record As $EOSQL$ 
-- Called with $1 as SQL selecting text[] as symbols, numeric[] as coords          
-- Example caller: 
-- select * from 
-- symbol_coords('select smiles_to_symbols(cansmiles) as symbols, 
-- coords from vla4.structure where name=''BMCL-1051-38''') 
-- as (symbol text, x numeric, y numeric, z numeric);
  Declare 
    sym_coord Record; 
    sc Record; 
    i Integer; 
  Begin 
    For sc In Execute $1 Loop 
      For i in 1 .. array_upper(sc.symbols,1) Loop 
        Select sc.symbols[i], sc.coords[i][1], sc.coords[i][2], sc.coords[i][3] Into sym_coord; 
        Return Next sym_coord; 
      End Loop; 
    End Loop; 
  End; 
$EOSQL$ Language plpgsql; 
175
-- New table to hold all references identified by unique id
Create Table literature_refs (refid Serial, reference Text);
-- Populate table with unique references found in logp table
Insert Into literature_refs (reference) Select Distinct ref From logp Group By ref;
-- Create column in logp to hold reference id instead of reference
Alter Table logp Add Column refid integer;
-- Populate logp table's reference id column with appropriate values
Update logp Set refid = (Select refid From literature_refs Where ref=reference);
-- No need for reference column anymore
Alter Table logp Drop Column ref;
Select cas, reference From logp Join literature_refs Using (refid);
176
Create Function public166keys(character varying) Returns bit(166) As $EOSQL$
 Select orsum(bit_set(0::Bit(166), bit)) From public166keys Where openbabel.count_matches($1,smarts) > count;
$EOSQL$ Language SQL;
Create Aggregate orsum ( 
  Basetype = bit, 
  Sfunc = bitor, 
  Stype = bit 
);
177
Create Function tanimoto(bit, bit) Returns Real As $EOSQL$
  Select nbits_set($1 & $2)::real / (nbits_set($1) + nbits_set($2) - nbits_set($1 & $2))::real;
$EOSQL$ Language SQL;
Create Function euclid(bit, bit)  Returns Real As $EOSQL$
  Select sqrt((nbits_set($1 & $2) + nbits_set(~$1 & ~$2))::real /length($1))::real;
$EOSQL$ Language SQL;
Create Function hamming(bit, bit) Returns Real As $EOSQL$
  Select ((nbits_set($1 & ~$2) + nbits_set(~$1 & $2))::real / length($1))::real;
$EOSQL$ Language SQL;
Create Or Replace Function nbits_set(bits bit) Returns Integer As $EOPY$          
         return bits.count('1'); 
         $EOPY$ Language plpythonu Immutable; 
178 download average molecular weight table A.2
179  
180  
181
Create Function gnova.tpsa(character varying) Returns Numeric As $EOSQL$ 
  Select sum(psa*count_matches($1,smarts)) From tpsa; 
$EOSQL$ Language SQL; 

download TPSA table A.3

182  
183 download public166keys table A.4
184  
185  
186  
187  
188
createlang plperl
createlang plpythonu

download PerlMol core function definitions

sudo -u postgres psql mydb <perlmol-core.sql
189  
190  
191  
192

download FROWNS core function definitions

sudo -u postgres psql mydb <frowns-core.sql
193  
194  
195  
196  
197 download OpenBabel core function definitions
198  
199  
200  
201  
202  
203
sudo -u postgres psql mydb <openbabel-core.sql
204
#include "postgres.h"

/* general Postgres declarations */ 
#include "fmgr.h"              /* for argument/result macros */ 
#include "executor/executor.h" /* for GetAttributeByName() */        
#include "utils/varbit.h"      /* These prototypes just prevent possible warnings from gcc. */

Datum nbits_set(PG_FUNCTION_ARGS); 
PG_FUNCTION_INFO_V1(nbits_set); 

Datum  nbits_set(PG_FUNCTION_ARGS) { 
/* how many bits are set in a bitstring? */
  VarBit *a = PG_GETARG_VARBIT_P(0); 
  int n=0;
 /* 
  * VARBITLENTOTAL is the total size of the struct in bytes. 
  * VARBITLEN is the size of the string in bits. 
  * VARBITBYTES is the size of the string in bytes. 
  * VARBITHDRSZ is the total size of the header in bytes. 
  * VARBITS is a pointer to the data region of the struct. 
  */ 
  int i; 
  unsigned char *ap = VARBITS(a); 
  unsigned char aval; 
  for (i=0; i < VARBITBYTES(a); ++i) { 
    aval = *ap; ++ap; 
    if (aval == 0) continue; 
    if (aval & 1) ++n; 
    if (aval & 2) ++n; 
    if (aval & 4) ++n; 
    if (aval & 8) ++n; 
    if (aval & 16) ++n; 
    if (aval & 32) ++n; 
    if (aval & 64) ++n; 
    if (aval & 128) ++n; 
  } 
  PG_RETURN_INT32(n); 
}
gcc -shared -o bits.so -I/usr/include/postgresql/8.2/server bits.c
cp bits.so /usr/lib/postgresql/8.2/lib/bits.so
205
Create Or Replace Function nbits_set(bit) Returns integer AS 'bits', 'nbits_set' Language c Immutable Strict; 
Comment On Function nbits_set(bit) Is 'number of bits set'; 

download dbutils functions

206  
207
#! /usr/bin/perl
$schema = $ARGV[0];
die "Schema name required\nusage: loader schema\n" unless ($schema);

print <<EOSQL;
Drop Schema If Exists $schema Cascade;
Create Schema $schema;
Create Sequence $schema.structure_id_seq;
Create Table $schema.structure (
 id Integer Primary Key Default Nextval('$schema.structure_id_seq'),
 name Text,
 smiles Text,
 isosmiles Text,
 fp Bit Varying);
Copy $schema.structure (smiles,name) From Stdin;
EOSQL

while (<stdin>) {
  s/\r//; chomp;
  ($smi,$name) = split;
  print "$smi\t$name\n";
}

print <<EOSQL;
\\.
set search_path=openbabel;
Update $schema.structure Set fp=fp(smiles), isosmiles=isosmiles(smiles) Where valid(smiles);
EOSQL
perl smiloader drugs <drugs.smi | psql mydb
208
#! /usr/bin/perl$schema = $ARGV[0];
die "Schema name required\nusage: loader schema\n" unless ($schema);

print <<EOSQL;
Drop Schema If Exists $schema Cascade;
Create Schema $schema;
Create Sequence $schema.structure_id_seq;
Create Table $schema.sdf (id Integer Default Nextval('$schema.structure_id_seq'), molfile Text); 
Create Table $schema.structure (
 id Integer Primary Key Default Nextval('$schema.structure_id_seq'),
 name Text,
 isosmiles Text,
 cansmiles Text,
 fp Bit Varying, coords Numeric[][3],
 atoms Integer[]);
Create Table $schema.property (
 id Integer References $schema.structure (id),
 name Text,
 tvalue Text,
 nvalue Numeric);
Copy $schema.sdf (molfile) From Stdin;
EOSQL

while (<stdin>) {
  if (/\$\$\$\$/) {
    print;
  } else {
    s/\r//; chomp; print; print "\\n";
  }
}

print <<EOSQL;
\\.
set search_path=openbabel;
Insert Into $schema.structure (id, name, isosmiles, coords, atoms)
   Select id, (molfile_mol(molfile)).* from $schema.sdf;

Update $schema.structure Set cansmiles=cansmiles(isosmiles) Where valid(isosmiles);
Update $schema.structure Set fp=fp(cansmiles) Where valid(cansmiles);

Alter Table $schema.sdf Add Constraint sdf_id_fk Foreign Key (id) References $schema.structure (id);

Insert into $schema.property (id, name, tvalue) select id, (p).name, (p).value from 
  (select id, molfile_properties(molfile) as p from $schema.sdf) atmp;

-- This regexp may not catch all numeric values
Update $schema.property Set nvalue = tvalue::numeric
  Where tvalue ~ E'^[+-]?[0-9]+(\\\\.[0-9]*)?([Ee][+-]?[0-9]+)?\$';

-- You may choose to name colums to be converted to numeric
--Update $schema.property Set nvalue = tvalue::numeric
-- Where Name = 'IC50_uM';
EOSQL
perl sdfloader vla4 <vla-4.sdf | psql mydb
209 download drugs.smi