chapter 13


Applications

There are many uses for a chemical relational database. Some of these have been mentioned in earlier chapters. In this chapter, three general types applications will be discussed. The purpose is not to present complete working applications, but to indicate important issues to consider when designing such applications. Sample schemas are proposed. The use within each application of the core functions described earlier is discussed. Each of these applications might be developed as a web application or a client application on a user's desktop. Any computer language might be used, although the ability to connect to a RDBMS is essential. [view wordle] [hide wordle]

Page  
155
Drop Schema registration Cascade;
Create Schema registration;
Set search_path=registration;
-- search_path directs following into the registration schema
Create Table structure ( 
  smi Text Unique Not Null, 
  cansmi Text Not Null, 
  id Serial Primary Key, 
  fp Bit Varying); 
Create Function add_new_structure() Returns Trigger As $EOSQL$
  Begin
    NEW.smi = isosmiles(NEW.smi);
    NEW.cansmi = cansmiles(NEW.smi);
    NEW.fp = fp(NEW.smi);
    Return NEW;
  End;
$EOSQL$ Language plpgsql;
Create Trigger add_new_structure Before Insert Or Update Onstructure
  For Each Row Execute Procedure add_new_structure();
Create Index cansmi_index On structure (cansmi); 
156  
157
Create Table error_log (smi text,
  attempt timestamp(0) Default current_timestamp ); 
Create Function add_new_structure() Returns Trigger As $EOSQL$ 
  Begin 
    NEW.smi = isosmiles(NEW.smi); 
    NEW.cansmi = cansmiles(NEW.smi); 
    NEW.fp = fp(NEW.smi); 
    Return NEW; 
    Exception
      When OTHERS Then 
        Insert Into error_log (smi) Values (NEW.smi); 
      Return Null; 
  End; 
$EOSQL$ Language plpgsql; 
Select id, smi from structure Where cansmi=cansmiles('c1ccccc1C(=O)NC'));
Select id, smi From structure Where smi=isosmiles('F[C@H](C)Cl'));
158
Select id,smi From structure Where
  contains(fp, fp('c1ccccc1C(=O)NC')) And
  matches(smi, 'c1ccccc1C(=O)NC'));
Create Table names (cid integer References structure (id), name text);
Select smi, name From structure Join names On (id=cid); 
159  
160
Create Function add_new_structure() Returns Trigger As $EOSQL$ 
  Declare 
    std_smiles Text; 
    smirks Text; 
    std Record; 
  Begin 
    For std In Select * from std_smirks Loop 
      std_smiles = xform(NEW.smi, std.smirks); 
      If std_smiles != NEW.smi Then 
        NEW.smi = std_smiles; 
      End If; 
    End Loop; 
    NEW.smi = isosmiles(NEW.smi); 
    NEW.cansmi = cansmiles(NEW.smi);
    NEW.fp = fp(NEW.smi); 
    Return NEW; 
    Exception
      When OTHERS Then 
        Insert Into error_log (smi) Values (NEW.smi); 
      Return Null; 
  End; 
$EOSQL$ Language plpgsql; 
161
Select graph('Oc1ccc(O)c2ccccc12') = graph('O=C3CCC(=O)c4ccccc34');
Create Table structure ( 
   smi Text Unique Not Null, 
   cansmi Text Not Null, 
   grf Text Not Null, 
   id Serial Primary Key, 
   fp Bit Varying);
Create Index grf_index On structure (grf); 
Create Function add_new_structure() Returns Trigger As $EOSQL$ 
   Declare 
     std_smiles Text; 
     smirks Text; 
     std Record; 
   Begin 
     For std In Select * from std_smirks Loop 
       std_smiles = xform(NEW.smi, std.smirks); 
       If std_smiles != NEW.smi Then 
         NEW.smi = std_smiles; 
       End If; 
     End Loop; 
     NEW.smi = isosmiles(NEW.smi); 
     NEW.cansmi = cansmiles(NEW.smi);
     NEW.grf = graph(NEW.smi);
     NEW.fp = fp(NEW.smi); 
     Return NEW; 
     Exception 
       When OTHERS Then 
         Insert Into error_log (smi) Values (NEW.smi); 
         Return Null; 
   End; 
$EOSQL$ Language plpgsql; 
162
Select isosmiles, count(isosmiles) from structure
  group by isosmiles having count(isosmiles) > 1;
163
Create Schema hiv;
Create Table hiv.protease (id Integer References registry.structure,
   ic50 Float, ec50 Float, updated Timestamp Default current_timestamp);
164
Create Sequence uniq_samples; 
Create Table sample (id Integer Default Nextval('uniq_samples'),
    cid Integer References registry.structure (id),
    prepared Timestamp(0) Default current_timestamp,
    Unique (id, cid)); 
Insert into sample (id, cid) Values (currval('uniq_samples'), 55);
165
Create Or Replace Function registry.compound_id (Text) Returns Integer As $EOSQL $ 
  Declare 
    cid Integer; 
  Begin 
    Select id Into cid from structure Where smi = isosmiles($1); 
    If cid Is Null Then 
      Insert into structure (smi) Values ($1); 
      Select id Into cid From structure Where smi = isosmiles($1); 
    End If; 
    Return cid; 
  End; 
$EOSQL$ Language plpgsql; 
Select registry.compound_id('SCCS');
166
Create Table list_description (id Serial, details Text); 
Create Table lists (id Integer References list_description (id), cid Integer References strcture (id)); 
167
perl smiloader drugs <drugs.smi | psql mydb
perl sdfloader vla4 <vla-4.sdf | psql mydb
168
molgrep drugs 'c1ccccc1C(=O)NC'
molcat vla4
169
molview vla4 'c1ccccc1C(=O)' >mtest.html
170
molarb vla4 5 2
molarb vla4 5 8
molrandom vla4 5
171
molnear vla4 'c1ccccc1C(=O)NC' 0.2 
molsame drugs pubtest