1
2
3
4
5
6
7 """ class definitions for similarity screening
8
9 See _SimilarityScreener_ for overview of required API
10
11 """
12 from rdkit import DataStructs
13 from rdkit.DataStructs import TopNContainer
14 from rdkit import RDConfig
15
17 """ base class
18
19 important attributes:
20 probe: the probe fingerprint against which we screen.
21
22 metric: a function that takes two arguments and returns a similarity
23 measure between them
24
25 dataSource: the source pool from which to draw, needs to support
26 a next() method
27
28 fingerprinter: a function that takes a molecule and returns a
29 fingerprint of the appropriate format
30
31
32 **Notes**
33 subclasses must support either an iterator interface
34 or __len__ and __getitem__
35 """
36 - def __init__(self,probe=None,metric=None,dataSource=None,fingerprinter=None):
37 self.metric = metric
38 self.dataSource = dataSource
39 self.fingerprinter = fingerprinter
40 self.probe = probe
41
43 """ used to reset screeners that behave as iterators
44 """
45 pass
46
47
49 """ sets our probe fingerprint """
50 self.probe = probeFingerprint
51
53 """ returns a fingerprint for a single probe object
54
55 This is potentially useful in initializing our internal
56 probe object.
57
58 """
59 return self.fingerprinter(probe)
60
62 """ Used to return all compounds that have a similarity
63 to the probe beyond a threshold value
64
65 **Notes**:
66
67 - This is as lazy as possible, so the data source isn't
68 queried until the client asks for a hit.
69
70 - In addition to being lazy, this class is as thin as possible.
71 (Who'd have thought it was possible!)
72 Hits are *not* stored locally, so if a client resets
73 the iteration and starts over, the same amount of work must
74 be done to retrieve the hits.
75
76 - The thinness and laziness forces us to support only forward
77 iteration (not random access)
78
79 """
84
85
87 """ *Internal use only* """
88 done = 0
89 res = None
90 sim = 0
91 while not done:
92
93
94 obj = self.dataIter.next()
95 fp = self.fingerprinter(obj)
96 sim = DataStructs.FingerprintSimilarity(fp,self.probe,self.metric)
97 if sim >= self.threshold:
98 res = obj
99 done = 1
100 return sim,res
101
103 """ used to reset our internal state so that iteration
104 starts again from the beginning
105 """
106 self.dataSource.reset()
107 self.dataIter = iter(self.dataSource)
109 """ returns an iterator for this screener
110 """
111 self.Reset()
112 return self
114 """ required part of iterator interface """
115 return self._nextMatch()
116
118 """ A screener that only returns the top N hits found
119
120 **Notes**
121
122 - supports forward iteration and getitem
123
124 """
130
139 if self._pos >= self.numToGet:
140 raise StopIteration
141 else:
142 res = self.topN[self._pos]
143 self._pos += 1
144 return res
145
152
154 if self.topN is None:
155 self._initTopN()
156 return self.numToGet
157
162