12#ifndef RD_FINGERPRINTGEN_H_2018_05
13#define RD_FINGERPRINTGEN_H_2018_05
29 std::map<std::uint64_t,
30 std::vector<std::pair<std::uint32_t, std::uint32_t>>>;
31 using bitPathsType = std::map<std::uint64_t, std::vector<std::vector<int>>>;
51 atomToBits = atomToBitsHolder.get();
55 bitInfoMap = bitInfoMapHolder.get();
59 bitPaths = bitPathsHolder.get();
63 atomCounts = atomCountsHolder.get();
67 std::unique_ptr<atomToBitsType> atomToBitsHolder;
68 std::unique_ptr<bitInfoMapType> bitInfoMapHolder;
69 std::unique_ptr<bitPathsType> bitPathsHolder;
70 std::unique_ptr<atomCountsType> atomCountsHolder;
82 const std::vector<std::uint32_t> countBounds,
84 std::uint32_t numBitsPerFeature = 1,
85 bool includeChirality =
false);
86 bool df_countSimulation =
false;
87 bool df_includeChirality =
false;
89 std::uint32_t d_fpSize = 2048;
90 std::uint32_t d_numBitsPerFeature = 1;
117template <
typename OutputType>
132 const std::vector<std::uint32_t> *atomInvariants,
133 const std::vector<std::uint32_t> *bondInvariants,
135 const bool hashResults =
false,
136 const std::uint64_t fpSize = 0)
const = 0;
138 size_t bitId)
const = 0;
147template <
typename OutputType>
149 :
private boost::noncopyable {
180 const std::vector<std::uint32_t> *fromAtoms =
nullptr,
181 const std::vector<std::uint32_t> *ignoreAtoms =
nullptr,
183 const std::vector<std::uint32_t> *atomInvariants =
nullptr,
184 const std::vector<std::uint32_t> *bondInvariants =
nullptr,
185 const bool hashResults =
false)
const = 0;
211 :
private boost::noncopyable {
222 const ROMol &mol)
const = 0;
241 :
private boost::noncopyable {
252 const ROMol &mol)
const = 0;
299template <
typename OutputType>
301 :
private boost::noncopyable {
306 const bool df_ownsAtomInvGenerator;
307 const bool df_ownsBondInvGenerator;
309 std::unique_ptr<SparseIntVect<OutputType>> getFingerprintHelper(
311 const std::uint64_t fpSize = 0)
const;
319 bool ownsAtomInvGenerator =
false,
bool ownsBondInvGenerator =
false);
325 return dp_fingerprintArguments;
341 const std::vector<const ROMol *> &mols,
int numThreads = 1)
const;
344 const std::vector<const ROMol *> &mols,
int numThreads = 1)
const;
346 std::vector<std::unique_ptr<SparseIntVect<std::uint32_t>>>
348 int numThreads = 1)
const;
350 std::vector<std::unique_ptr<SparseIntVect<OutputType>>>
352 int numThreads = 1)
const;
355 const ROMol &mol,
const std::vector<std::uint32_t> *fromAtoms =
nullptr,
356 const std::vector<std::uint32_t> *ignoreAtoms =
nullptr,
int confId = -1,
358 const std::vector<std::uint32_t> *customAtomInvariants =
nullptr,
359 const std::vector<std::uint32_t> *customBondInvariants =
nullptr)
const {
361 additionalOutput, customAtomInvariants,
362 customBondInvariants);
363 return getSparseCountFingerprint(mol, ffa).release();
367 const ROMol &mol,
const std::vector<std::uint32_t> *fromAtoms =
nullptr,
368 const std::vector<std::uint32_t> *ignoreAtoms =
nullptr,
int confId = -1,
370 const std::vector<std::uint32_t> *customAtomInvariants =
nullptr,
371 const std::vector<std::uint32_t> *customBondInvariants =
nullptr)
const {
373 additionalOutput, customAtomInvariants,
374 customBondInvariants);
375 return getSparseFingerprint(mol, ffa).release();
379 const ROMol &mol,
const std::vector<std::uint32_t> *fromAtoms =
nullptr,
380 const std::vector<std::uint32_t> *ignoreAtoms =
nullptr,
int confId = -1,
382 const std::vector<std::uint32_t> *customAtomInvariants =
nullptr,
383 const std::vector<std::uint32_t> *customBondInvariants =
nullptr)
const {
385 additionalOutput, customAtomInvariants,
386 customBondInvariants);
387 return getCountFingerprint(mol, ffa).release();
391 const ROMol &mol,
const std::vector<std::uint32_t> *fromAtoms =
nullptr,
392 const std::vector<std::uint32_t> *ignoreAtoms =
nullptr,
int confId = -1,
394 const std::vector<std::uint32_t> *customAtomInvariants =
nullptr,
395 const std::vector<std::uint32_t> *customBondInvariants =
nullptr)
const {
397 additionalOutput, customAtomInvariants,
398 customBondInvariants);
399 return getFingerprint(mol, ffa).release();
407 const ROMol &,
const std::vector<std::uint32_t> *,
409 const std::vector<std::uint32_t> *,
410 const std::vector<std::uint32_t> *)
const;
413 const ROMol &,
const std::vector<std::uint32_t> *,
415 const std::vector<std::uint32_t> *,
416 const std::vector<std::uint32_t> *)
const;
419 const ROMol &,
const std::vector<std::uint32_t> *,
421 const std::vector<std::uint32_t> *,
422 const std::vector<std::uint32_t> *)
const;
425 const ROMol &,
const std::vector<std::uint32_t> *,
427 const std::vector<std::uint32_t> *,
428 const std::vector<std::uint32_t> *)
const;
431 const ROMol &,
const std::vector<std::uint32_t> *,
433 const std::vector<std::uint32_t> *,
434 const std::vector<std::uint32_t> *)
const;
437 const ROMol &,
const std::vector<std::uint32_t> *,
439 const std::vector<std::uint32_t> *,
440 const std::vector<std::uint32_t> *)
const;
443 const ROMol &,
const std::vector<std::uint32_t> *,
445 const std::vector<std::uint32_t> *,
446 const std::vector<std::uint32_t> *)
const;
449 const ROMol &,
const std::vector<std::uint32_t> *,
451 const std::vector<std::uint32_t> *,
452 const std::vector<std::uint32_t> *)
const;
459 :
public std::exception {
a class for bit vectors that are densely occupied
abstract base class that generates atom-environments from a molecule
virtual ~AtomEnvironmentGenerator()
virtual std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments *arguments, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, const int confId=-1, const AdditionalOutput *additionalOutput=nullptr, const std::vector< std::uint32_t > *atomInvariants=nullptr, const std::vector< std::uint32_t > *bondInvariants=nullptr, const bool hashResults=false) const =0
generate and return all atom-envorinments from a molecule
const FingerprintArguments * dp_fingerprintArguments
virtual std::string infoString() const =0
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
virtual OutputType getResultSize() const =0
Returns the size of the fingerprint based on arguments.
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
virtual ~AtomEnvironment()
virtual void updateAdditionalOutput(AdditionalOutput *AdditionalOutput, size_t bitId) const =0
virtual OutputType getBitId(FingerprintArguments *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, AdditionalOutput *AdditionalOutput, const bool hashResults=false, const std::uint64_t fpSize=0) const =0
calculates and returns the bit id to be set for this atom-environment
abstract base class for atom invariants generators
virtual ~AtomInvariantsGenerator()
virtual std::string infoString() const =0
method that returns information about this /c AtomInvariantsGenerator and its arguments
virtual AtomInvariantsGenerator * clone() const =0
virtual std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const =0
get atom invariants from a molecule
abstract base class for bond invariants generators
virtual std::string infoString() const =0
method that returns information about this /c BondInvariantsGenerator and its arguments
virtual ~BondInvariantsGenerator()
virtual BondInvariantsGenerator * clone() const =0
virtual std::vector< std::uint32_t > * getBondInvariants(const ROMol &mol) const =0
get bond invariants from a molecule
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
virtual ~FingerprintArguments()
virtual std::string infoString() const =0
method that returns information string about the fingerprint specific argument set and the arguments ...
FingerprintArguments()=default
FingerprintArguments(bool countSimulation, const std::vector< std::uint32_t > countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature=1, bool includeChirality=false)
std::string commonArgumentsString() const
method that returns information string about common fingerprinting arguments' values
std::vector< std::uint32_t > d_countBounds
class that generates same fingerprint style for different output formats
std::unique_ptr< SparseBitVect > getSparseFingerprint(const ROMol &mol, FingerprintFuncArguments &args) const
FingerprintGenerator(AtomEnvironmentGenerator< OutputType > *atomEnvironmentGenerator, FingerprintArguments *fingerprintArguments, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, BondInvariantsGenerator *bondInvariantsGenerator=nullptr, bool ownsAtomInvGenerator=false, bool ownsBondInvGenerator=false)
std::unique_ptr< ExplicitBitVect > getFingerprint(const ROMol &mol, FingerprintFuncArguments &args) const
std::string infoString() const
std::unique_ptr< SparseIntVect< std::uint32_t > > getCountFingerprint(const ROMol &mol, FingerprintFuncArguments &args) const
std::vector< std::unique_ptr< ExplicitBitVect > > getFingerprints(const std::vector< const ROMol * > &mols, int numThreads=1) const
std::vector< std::unique_ptr< SparseIntVect< OutputType > > > getSparseCountFingerprints(const std::vector< const ROMol * > &mols, int numThreads=1) const
std::vector< std::unique_ptr< SparseIntVect< std::uint32_t > > > getCountFingerprints(const std::vector< const ROMol * > &mols, int numThreads=1) const
std::vector< std::unique_ptr< SparseBitVect > > getSparseFingerprints(const std::vector< const ROMol * > &mols, int numThreads=1) const
std::unique_ptr< SparseIntVect< OutputType > > getSparseCountFingerprint(const ROMol &mol, FingerprintFuncArguments &args) const
SparseIntVect< std::uint32_t > * getCountFingerprint(const ROMol &mol, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, int confId=-1, AdditionalOutput *additionalOutput=nullptr, const std::vector< std::uint32_t > *customAtomInvariants=nullptr, const std::vector< std::uint32_t > *customBondInvariants=nullptr) const
SparseBitVect * getSparseFingerprint(const ROMol &mol, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, int confId=-1, AdditionalOutput *additionalOutput=nullptr, const std::vector< std::uint32_t > *customAtomInvariants=nullptr, const std::vector< std::uint32_t > *customBondInvariants=nullptr) const
ExplicitBitVect * getFingerprint(const ROMol &mol, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, int confId=-1, AdditionalOutput *additionalOutput=nullptr, const std::vector< std::uint32_t > *customAtomInvariants=nullptr, const std::vector< std::uint32_t > *customBondInvariants=nullptr) const
FingerprintArguments * getOptions()
SparseIntVect< OutputType > * getSparseCountFingerprint(const ROMol &mol, const std::vector< std::uint32_t > *fromAtoms=nullptr, const std::vector< std::uint32_t > *ignoreAtoms=nullptr, int confId=-1, AdditionalOutput *additionalOutput=nullptr, const std::vector< std::uint32_t > *customAtomInvariants=nullptr, const std::vector< std::uint32_t > *customBondInvariants=nullptr) const
const FingerprintArguments * getOptions() const
a class for efficiently storing sparse vectors of ints
UnimplementedFPException(const char *msg)
construct with an error message
~UnimplementedFPException() noexcept override=default
UnimplementedFPException(std::string msg)
construct with an error message
const char * what() const noexcept override
get the error message
a class for bit vectors that are sparsely occupied.
#define RDKIT_FINGERPRINTS_EXPORT
RDKIT_FINGERPRINTS_EXPORT SparseBitVect * getSparseFP(const ROMol &mol, FPType fPType)
bool rdvalue_is(const RDValue_cast_t)
RDKIT_FINGERPRINTS_EXPORT std::vector< SparseBitVect * > * getSparseFPBulk(const std::vector< const ROMol * > molVector, FPType fPType)
RDKIT_FINGERPRINTS_EXPORT std::vector< ExplicitBitVect * > * getFPBulk(const std::vector< const ROMol * > molVector, FPType fPType)
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::uint64_t > * getSparseCountFP(const ROMol &mol, FPType fPType)
RDKIT_FINGERPRINTS_EXPORT std::vector< SparseIntVect< std::uint32_t > * > * getCountFPBulk(const std::vector< const ROMol * > molVector, FPType fPType)
RDKIT_FINGERPRINTS_EXPORT std::vector< SparseIntVect< std::uint64_t > * > * getSparseCountFPBulk(const std::vector< const ROMol * > molVector, FPType fPType)
RDKIT_FINGERPRINTS_EXPORT SparseIntVect< std::uint32_t > * getCountFP(const ROMol &mol, FPType fPType)
RDKIT_FINGERPRINTS_EXPORT ExplicitBitVect * getFP(const ROMol &mol, FPType fPType)
void allocateAtomCounts()
std::vector< std::vector< std::uint64_t > > atomToBitsType
std::vector< unsigned int > atomCountsType
void allocateBitInfoMap()
void allocateAtomToBits()
std::map< std::uint64_t, std::vector< std::vector< int > > > bitPathsType
std::map< std::uint64_t, std::vector< std::pair< std::uint32_t, std::uint32_t > > > bitInfoMapType
struct that makes calling the fingerprint generation functions easier
const std::vector< std::uint32_t > * customAtomInvariants
AdditionalOutput * additionalOutput
const std::vector< std::uint32_t > * customBondInvariants
const std::vector< std::uint32_t > * fromAtoms
const std::vector< std::uint32_t > * ignoreAtoms
FingerprintFuncArguments(const std::vector< std::uint32_t > *fromAtoms_arg, const std::vector< std::uint32_t > *ignoreAtoms_arg, int confId_arg, AdditionalOutput *additionalOutput_arg, const std::vector< std::uint32_t > *customAtomInvariants_arg, const std::vector< std::uint32_t > *customBondInvariants_arg)
FingerprintFuncArguments()=default