RDKit
Open-source cheminformatics and machine learning.
MorganGenerator.h
Go to the documentation of this file.
1//
2// Copyright (C) 2018 Boran Adas, Google Summer of Code
3//
4// @@ All Rights Reserved @@
5// This file is part of the RDKit.
6// The contents are covered by the terms of the BSD license
7// which is included in the file license.txt, found at the root
8// of the RDKit source tree.
9//
10
11#include <RDGeneral/export.h>
12#ifndef RD_MORGANGEN_H_2018_07
13#define RD_MORGANGEN_H_2018_07
14
16#include <cstdint>
17
18namespace RDKit {
19
20namespace MorganFingerprint {
21
22/**
23 \brief Default atom invariants generator for Morgan fingerprint, generates
24 ECFP-type invariants
25
26 */
29 const bool df_includeRingMembership;
30
31 public:
32 /**
33 \brief Construct a new MorganAtomInvGenerator object
34
35 \param includeRingMembership : if set, whether or not the atom is in a ring
36 will be used in the invariant list.
37 */
38 MorganAtomInvGenerator(const bool includeRingMembership = true);
39
40 std::vector<std::uint32_t> *getAtomInvariants(
41 const ROMol &mol) const override;
42
43 std::string infoString() const override;
44 MorganAtomInvGenerator *clone() const override;
45};
46
47/**
48 \brief Alternative atom invariants generator for Morgan fingerprint, generate
49 FCFP-type invariants
50
51 */
54 std::vector<const ROMol *> *dp_patterns;
55
56 public:
57 /**
58 \brief Construct a new MorganFeatureAtomInvGenerator object
59
60 \param patterns : if provided should contain the queries used to assign
61 atom-types. if not provided, feature definitions adapted from reference:
62 Gobbi and Poppinger, Biotech. Bioeng. _61_ 47-54 (1998) will be used for
63 Donor, Acceptor, Aromatic, Halogen, Basic, Acidic.
64 */
65 MorganFeatureAtomInvGenerator(std::vector<const ROMol *> *patterns = nullptr);
66
67 std::vector<std::uint32_t> *getAtomInvariants(
68 const ROMol &mol) const override;
69
70 std::string infoString() const override;
72};
73
74/**
75 \brief Bond invariants generator for Morgan fingerprint
76
77 */
80 const bool df_useBondTypes;
81 const bool df_useChirality;
82
83 public:
84 /**
85 \brief Construct a new MorganBondInvGenerator object
86
87 \param useBondTypes : if set, bond types will be included as a part of the
88 bond invariants
89 \param useChirality : if set, chirality information will be included as a
90 part of the bond invariants
91 */
92 MorganBondInvGenerator(const bool useBondTypes = true,
93 const bool useChirality = false);
94
95 std::vector<std::uint32_t> *getBondInvariants(
96 const ROMol &mol) const override;
97
98 std::string infoString() const override;
99 MorganBondInvGenerator *clone() const override;
100 ~MorganBondInvGenerator() override = default;
101};
102
103/**
104 \brief Class for holding Morgan fingerprint specific arguments
105
106 */
107template <typename OutputType>
109 : public FingerprintArguments<OutputType> {
110 public:
113 const unsigned int d_radius;
114
115 OutputType getResultSize() const override;
116
117 std::string infoString() const override;
118
119 /**
120 \brief Construct a new MorganArguments object
121
122 \param radius the number of iterations to grow the fingerprint
123 \param countSimulation if set, use count simulation while generating the
124 fingerprint
125 \param includeChirality if set, chirality information will be added to the
126 generated bit id, independently from bond invariants
127 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
128 have a nonzero invariant
129 \param countBounds boundaries for count simulation, corresponding bit will
130 be set if the count is higher than the number provided for that spot
131 \param fpSize size of the generated fingerprint, does not affect the sparse
132 versions
133 */
134 MorganArguments(const unsigned int radius, const bool countSimulation = false,
135 const bool includeChirality = false,
136 const bool onlyNonzeroInvariants = false,
137 const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
138 const std::uint32_t fpSize = 2048);
139};
140
141/**
142 \brief Class for holding the bit-id created from Morgan fingerprint
143 environments and the additional data necessary extra outputs
144
145 */
146template <typename OutputType>
148 : public AtomEnvironment<OutputType> {
149 const OutputType d_code;
150 const unsigned int d_atomId;
151 const unsigned int d_layer;
152
153 public:
155 const std::vector<std::uint32_t> *atomInvariants,
156 const std::vector<std::uint32_t> *bondInvariants,
157 const AdditionalOutput *additionalOutput,
158 const bool hashResults = false,
159 const std::uint64_t fpSize = 0) const override;
160
161 /**
162 \brief Construct a new MorganAtomEnv object
163
164 \param code bit id generated from this environment
165 \param atomId atom id of the atom at the center of this environment
166 \param layer radius of this environment
167 */
168 MorganAtomEnv(const std::uint32_t code, const unsigned int atomId,
169 const unsigned int layer);
170};
171
172/**
173 \brief Class that generates atom environments for Morgan fingerprint
174
175 */
176template <typename OutputType>
178 : public AtomEnvironmentGenerator<OutputType> {
179 public:
180 std::vector<AtomEnvironment<OutputType> *> getEnvironments(
181 const ROMol &mol, FingerprintArguments<OutputType> *arguments,
182 const std::vector<std::uint32_t> *fromAtoms,
183 const std::vector<std::uint32_t> *ignoreAtoms, const int confId,
184 const AdditionalOutput *additionalOutput,
185 const std::vector<std::uint32_t> *atomInvariants,
186 const std::vector<std::uint32_t> *bondInvariants,
187 const bool hashResults = false) const override;
188
189 std::string infoString() const override;
190};
191
192/**
193 \brief Get a fingerprint generator for Morgan fingerprint
194
195 \tparam OutputType determines the size of the bitIds and the result, can be 32
196 or 64 bit unsigned integer
197
198 \param radius the number of iterations to grow the fingerprint
199
200 \param countSimulation if set, use count simulation while generating the
201 fingerprint
202
203 \param includeChirality if set, chirality information will be added to the
204 generated bit id, independently from bond invariants
205
206 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
207 have a nonzero invariant
208
209 \param countBounds boundaries for count simulation, corresponding bit will be
210 set if the count is higher than the number provided for that spot
211
212 \param fpSize size of the generated fingerprint, does not affect the sparse
213 versions
214 \param countSimulation if set, use count simulation while generating the
215 fingerprint
216 \param includeChirality sets includeChirality flag for both MorganArguments
217 and the default bond generator MorganBondInvGenerator
218 \param useBondTypes if set, bond types will be included as a part of the
219 default bond invariants
220 \param onlyNonzeroInvariants if set, bits will only be set from atoms that
221 have a nonzero invariant
222 \param atomInvariantsGenerator custom atom invariants generator to use
223 \param bondInvariantsGenerator custom bond invariants generator to use
224 \param ownsAtomInvGen if set atom invariants generator is destroyed with the
225 fingerprint generator
226 \param ownsBondInvGen if set bond invariants generator is destroyed with the
227 fingerprint generator
228
229 \return FingerprintGenerator<OutputType>* that generates Morgan fingerprints
230
231This generator supports the following \c AdditionalOutput types:
232 - \c atomToBits : which bits each atom is the central atom for
233 - \c atomCounts : how many bits each atom sets
234 - \c bitInfoMap : map from bitId to (atomId, radius) pairs
235
236 */
237template <typename OutputType>
239 const unsigned int radius, const bool countSimulation = false,
240 const bool includeChirality = false, const bool useBondTypes = true,
241 const bool onlyNonzeroInvariants = false,
242 AtomInvariantsGenerator *atomInvariantsGenerator = nullptr,
243 BondInvariantsGenerator *bondInvariantsGenerator = nullptr,
244 const std::uint32_t fpSize = 2048,
245 const std::vector<std::uint32_t> countBounds = {1, 2, 4, 8},
246 const bool ownsAtomInvGen = false, const bool ownsBondInvGen = false);
247
248} // namespace MorganFingerprint
249} // namespace RDKit
250
251#endif
abstract base class that generates atom-environments from a molecule
abstract base class that holds atom-environments that will be hashed to generate the fingerprint
abstract base class for atom invariants generators
abstract base class for bond invariants generators
Abstract base class that holds molecule independent arguments that are common amongst all fingerprint...
class that generates same fingerprint style for different output formats
Class for holding Morgan fingerprint specific arguments.
std::string infoString() const override
method that returns information string about the fingerprint specific argument set and the arguments ...
MorganArguments(const unsigned int radius, const bool countSimulation=false, const bool includeChirality=false, const bool onlyNonzeroInvariants=false, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, const std::uint32_t fpSize=2048)
Construct a new MorganArguments object.
OutputType getResultSize() const override
Returns the size of the fingerprint based on arguments.
Class for holding the bit-id created from Morgan fingerprint environments and the additional data nec...
MorganAtomEnv(const std::uint32_t code, const unsigned int atomId, const unsigned int layer)
Construct a new MorganAtomEnv object.
OutputType getBitId(FingerprintArguments< OutputType > *arguments, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const AdditionalOutput *additionalOutput, const bool hashResults=false, const std::uint64_t fpSize=0) const override
calculates and returns the bit id to be set for this atom-environment
Default atom invariants generator for Morgan fingerprint, generates ECFP-type invariants.
MorganAtomInvGenerator(const bool includeRingMembership=true)
Construct a new MorganAtomInvGenerator object.
MorganAtomInvGenerator * clone() const override
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
Bond invariants generator for Morgan fingerprint.
std::string infoString() const override
method that returns information about this /c BondInvariantsGenerator and its arguments
MorganBondInvGenerator * clone() const override
MorganBondInvGenerator(const bool useBondTypes=true, const bool useChirality=false)
Construct a new MorganBondInvGenerator object.
std::vector< std::uint32_t > * getBondInvariants(const ROMol &mol) const override
get bond invariants from a molecule
Class that generates atom environments for Morgan fingerprint.
std::string infoString() const override
method that returns information about this /c AtomEnvironmentGenerator and its arguments if any
std::vector< AtomEnvironment< OutputType > * > getEnvironments(const ROMol &mol, FingerprintArguments< OutputType > *arguments, const std::vector< std::uint32_t > *fromAtoms, const std::vector< std::uint32_t > *ignoreAtoms, const int confId, const AdditionalOutput *additionalOutput, const std::vector< std::uint32_t > *atomInvariants, const std::vector< std::uint32_t > *bondInvariants, const bool hashResults=false) const override
generate and return all atom-envorinments from a molecule
Alternative atom invariants generator for Morgan fingerprint, generate FCFP-type invariants.
MorganFeatureAtomInvGenerator(std::vector< const ROMol * > *patterns=nullptr)
Construct a new MorganFeatureAtomInvGenerator object.
std::vector< std::uint32_t > * getAtomInvariants(const ROMol &mol) const override
get atom invariants from a molecule
MorganFeatureAtomInvGenerator * clone() const override
std::string infoString() const override
method that returns information about this /c AtomInvariantsGenerator and its arguments
#define RDKIT_FINGERPRINTS_EXPORT
Definition: export.h:177
RDKIT_FINGERPRINTS_EXPORT FingerprintGenerator< OutputType > * getMorganGenerator(const unsigned int radius, const bool countSimulation=false, const bool includeChirality=false, const bool useBondTypes=true, const bool onlyNonzeroInvariants=false, AtomInvariantsGenerator *atomInvariantsGenerator=nullptr, BondInvariantsGenerator *bondInvariantsGenerator=nullptr, const std::uint32_t fpSize=2048, const std::vector< std::uint32_t > countBounds={1, 2, 4, 8}, const bool ownsAtomInvGen=false, const bool ownsBondInvGen=false)
Get a fingerprint generator for Morgan fingerprint.
Std stuff.
Definition: Abbreviations.h:19