1
2
3
4
5
6 from rdkit import Chem
7 from rdkit.Chem import AllChem
8 from rdkit.Chem import Lipinski,Descriptors,Crippen
9 from rdkit.Dbase.DbConnection import DbConnect
10 from rdkit.Dbase import DbModule
11 import re
12
13
14 import rdkit.RDLogger as logging
15 logger = logging.logger()
16 logger.setLevel(logging.INFO)
17
18 -def ProcessMol(mol,typeConversions,globalProps,nDone,nameProp='_Name',nameCol='compound_id',
19 redraw=False,keepHs=False,
20 skipProps=False,addComputedProps=False,
21 skipSmiles=False,
22 uniqNames=None,namesSeen=None):
78
79 -def ConvertRows(rows,globalProps,defaultVal,skipSmiles):
80 for i,row in enumerate(rows):
81 newRow = []
82 newRow.append(row[0])
83 pD=row[-1]
84 for pn in globalProps:
85 pv = pD.get(pn,defaultVal)
86 newRow.append(pv)
87 newRow.append(row[1])
88 if not skipSmiles:
89 newRow.append(row[2])
90 rows[i] = newRow
91
92 -def LoadDb(suppl,dbName,nameProp='_Name',nameCol='compound_id',silent=False,
93 redraw=False,errorsTo=None,keepHs=False,defaultVal='N/A',skipProps=False,
94 regName='molecules',skipSmiles=False,maxRowsCached=-1,
95 uniqNames=False,addComputedProps=False,lazySupplier=False):
96 if not lazySupplier:
97 nMols = len(suppl)
98 else:
99 nMols=-1
100 if not silent:
101 logger.info("Generating molecular database in file %s"%dbName)
102 if not lazySupplier:
103 logger.info(" Processing %d molecules"%nMols)
104 rows = []
105 globalProps = {}
106 namesSeen = set()
107 nDone = 0
108 typeConversions={0:('varchar',str),1:('float',float),2:('int',int)}
109 for m in suppl:
110 nDone +=1
111 if not m:
112 if errorsTo:
113 if hasattr(suppl,'GetItemText'):
114 d = suppl.GetItemText(nDone-1)
115 errorsTo.write(d)
116 else:
117 logger.warning('full error file support not complete')
118 continue
119
120 row=ProcessMol(m,typeConversions,globalProps,nDone,nameProp=nameProp,
121 nameCol=nameCol,redraw=redraw,
122 keepHs=keepHs,skipProps=skipProps,
123 addComputedProps=addComputedProps,skipSmiles=skipSmiles,
124 uniqNames=uniqNames,namesSeen=namesSeen)
125 if row is None: continue
126 rows.append(row)
127 if not silent and not nDone%100:
128 logger.info(' done %d'%nDone)
129 if len(rows)==maxRowsCached:
130 break
131
132 nameDef='%s varchar not null'%nameCol
133 if uniqNames:
134 nameDef += ' unique'
135 typs = [nameDef]
136 pns = []
137 for pn,v in globalProps.iteritems():
138 addNm = re.sub(r'[\W]','_',pn)
139 typs.append('%s %s'%(addNm,typeConversions[v][0]))
140 pns.append(pn.lower())
141
142 if not skipSmiles:
143 if 'smiles' not in pns:
144 typs.append('smiles varchar')
145 else:
146 typs.append('cansmiles varchar')
147 typs.append('molpkl %s'%(DbModule.binaryTypeName))
148 conn = DbConnect(dbName)
149 curs = conn.GetCursor()
150 try:
151 curs.execute('drop table %s'%regName)
152 except:
153 pass
154 curs.execute('create table %s (%s)'%(regName,','.join(typs)))
155 qs = ','.join([DbModule.placeHolder for x in typs])
156
157
158 ConvertRows(rows,globalProps,defaultVal,skipSmiles)
159 curs.executemany('insert into %s values (%s)'%(regName,qs),rows)
160 conn.Commit()
161
162 rows = []
163 while 1:
164 nDone +=1
165 try:
166 m = suppl.next()
167 except StopIteration:
168 break
169 if not m:
170 if errorsTo:
171 if hasattr(suppl,'GetItemText'):
172 d = suppl.GetItemText(nDone-1)
173 errorsTo.write(d)
174 else:
175 logger.warning('full error file support not complete')
176 continue
177 tmpProps={}
178 row=ProcessMol(m,typeConversions,globalProps,nDone,nameProp=nameProp,
179 nameCol=nameCol,redraw=redraw,
180 keepHs=keepHs,skipProps=skipProps,
181 addComputedProps=addComputedProps,skipSmiles=skipSmiles,
182 uniqNames=uniqNames,namesSeen=namesSeen)
183 if not row: continue
184 rows.append(row)
185 if not silent and not nDone%100:
186 logger.info(' done %d'%nDone)
187 if len(rows)==maxRowsCached:
188 ConvertRows(rows,globalProps,defaultVal,skipSmiles)
189 curs.executemany('insert into %s values (%s)'%(regName,qs),rows)
190 conn.Commit()
191 rows = []
192 if len(rows):
193 ConvertRows(rows,globalProps,defaultVal,skipSmiles)
194 curs.executemany('insert into %s values (%s)'%(regName,qs),rows)
195 conn.Commit()
196