RDKit
Open-source cheminformatics and machine learning.
RDKitFPGenerator.h
Go to the documentation of this file.
1//
2// Copyright (C) 2018 Boran Adas, Google Summer of Code
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_RDFINGERPRINTGEN_H_2018_07
13#define RD_RDFINGERPRINTGEN_H_2018_07
14
16
17namespace RDKit {
18namespace RDKitFP {
19
20template <typename OutputType>
22 : public FingerprintArguments<OutputType> {
23 public:
24 const unsigned int d_minPath;
25 const unsigned int d_maxPath;
26 const bool df_useHs;
27 const bool df_branchedPaths;
28 const bool df_useBondOrder;
29
30 OutputType getResultSize() const override;
31
32 std::string infoString() const override;
33
34 /**
35 \brief Construct a new RDKitFPArguments object
36
37 \param minPath the minimum path length (in bonds) to be included
38 \param maxPath the maximum path length (in bonds) to be included
39 \param useHs toggles inclusion of Hs in paths (if the molecule has
40 explicit Hs)
41 \param branchedPaths toggles generation of branched subgraphs, not just
42 linear paths
43 \param useBondOrder toggles inclusion of bond orders in the path hashes
44 \param countSimulation if set, use count simulation while
45 generating the fingerprint
46 \param countBounds boundaries for count simulation, corresponding bit will
47 be set if the count is higher than the number provided for that spot
48 \param fpSize size of the generated fingerprint, does not affect the sparse
49 versions
50 \param numBitsPerFeature controls the number of bits that are set for each
51 path/subgraph found
52
53 */
54 RDKitFPArguments(unsigned int minPath, unsigned int maxPath, bool useHs,
55 bool branchedPaths, bool useBondOrder, bool countSimulation,
56 const std::vector<std::uint32_t> countBounds,
57 std::uint32_t fpSize, std::uint32_t numBitsPerFeature);
58};
59
62 public:
63 std::vector<std::uint32_t> *getAtomInvariants(
64 const ROMol &mol) const override;
65
66 std::string infoString() const override;
67 RDKitFPAtomInvGenerator *clone() const override;
68};
69
70template <typename OutputType>
72 : public AtomEnvironment<OutputType> {
73 const OutputType d_bitId;
74 const boost::dynamic_bitset<> d_atomsInPath;
75 const INT_VECT d_bondPath;
76
77 public:
79 const std::vector<std::uint32_t> *atomInvariants,
80 const std::vector<std::uint32_t> *bondInvariants,
81 const AdditionalOutput *additionalOutput,
82 bool hashResults = false,
83 const std::uint64_t fpSize = 0) const override;
84
85 /**
86 \brief Construct a new RDKitFPAtomEnv object
87
88 \param bitId bitId generated for this environment
89 \param atomsInPath holds atoms in this environment to set additional output
90 \param bondPath the bond path defining the environment
91
92 */
93 RDKitFPAtomEnv(const OutputType bitId, boost::dynamic_bitset<> atomsInPath,
94 INT_VECT bondPath)
95 : d_bitId(bitId),
96 d_atomsInPath(std::move(atomsInPath)),
97 d_bondPath(std::move(bondPath)) {}
98};
99
100template <typename OutputType>
102 : public AtomEnvironmentGenerator<OutputType> {
103 public:
104 std::vector<AtomEnvironment<OutputType> *> getEnvironments(
105 const ROMol &mol, FingerprintArguments<OutputType> *arguments,
106 const std::vector<std::uint32_t> *fromAtoms,
107 const std::vector<std::uint32_t> *ignoreAtoms, int confId,
108 const AdditionalOutput *additionalOutput,
109 const std::vector<std::uint32_t> *atomInvariants,
110 const std::vector<std::uint32_t> *bondInvariants,
111 bool hashResults = false) const override;
112
113 std::string infoString() const override;
114};
115
116/**
117 \brief Get a RDKit fingerprint generator with given parameters
118
119 \tparam OutputType determines the size of the bitIds and the result, can be 32
120 or 64 bit unsigned integer
121 \param minPath the minimum path length (in bonds) to be included
122 \param maxPath the maximum path length (in bonds) to be included
123 \param useHs toggles inclusion of Hs in paths (if the molecule has
124 explicit Hs)
125 \param branchedPaths toggles generation of branched subgraphs, not just
126 linear paths
127 \param useBondOrder toggles inclusion of bond orders in the path hashes
128 \param atomInvariantsGenerator custom atom invariants generator to use
129 \param countSimulation if set, use count simulation while
130 generating the fingerprint
131 \param countBounds boundaries for count simulation, corresponding bit will be
132 set if the count is higher than the number provided for that spot
133 \param fpSize size of the generated fingerprint, does not affect the sparse
134 versions
135 \param numBitsPerFeature controls the number of bits that are set for each
136 path/subgraph found
137 \param ownsAtomInvGen if set atom invariants generator is destroyed with the
138 fingerprint generator
139
140 /return FingerprintGenerator<OutputType>* that generates RDKit fingerprints
141
142 This generator supports the following \c AdditionalOutput types:
143 - \c atomToBits : which bits each atom is involved in
144 - \c atomCounts : how many bits each atom sets
145 - \c bitPaths : map from bitId to vectors of bond indices for the individual
146 subgraphs
147
148 */
149template <typename OutputType>
151 unsigned int minPath = 1, unsigned int maxPath = 7, bool useHs = true,
152 bool branchedPaths = true, bool useBondOrder = true,
153 AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
154 bool countSimulation = false,
155 const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
156 std::uint32_t fpSize = 2048, std::uint32_t numBitsPerFeature = 2,
157 bool ownsAtomInvGen = false);
158
159} // namespace RDKitFP
160} // namespace RDKit
161
162#endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
class that generates same fingerprint style for different output formats
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
RDKitFPArguments(unsigned int minPath, unsigned int maxPath, bool useHs, bool branchedPaths, bool useBondOrder, bool countSimulation, const std::vector< std::uint32_t > countBounds, std::uint32_t fpSize, std::uint32_t numBitsPerFeature)
Construct a new RDKitFPArguments object.
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
RDKitFPAtomEnv(const OutputType bitId, boost::dynamic_bitset<> atomsInPath, INT_VECT bondPath)
Construct a new RDKitFPAtomEnv object.
OutputType getBitId(FingerprintArguments< OutputType > *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const AdditionalOutput *additionalOutput, bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
RDKitFPAtomInvGenerator * clone() const override
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments< OutputType > *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:177
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getRDKitFPGenerator(unsigned int minPath=1, unsigned int maxPath=7, bool useHs=true, bool branchedPaths=true, bool useBondOrder=true, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, bool countSimulation=false, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, std::uint32_t fpSize=2048, std::uint32_t numBitsPerFeature=2, bool ownsAtomInvGen=false)
Get a RDKit fingerprint generator with given parameters.
Std stuff.
Definition: Abbreviations.h:19
std::vector< int > INT_VECT
Definition: types.h:278