RDKit
Open-source cheminformatics and machine learning.
RGroupDecompData.h
Go to the documentation of this file.
1//
2// Copyright (C) 2017-2022 Novartis Institutes for BioMedical Research and
3// other RDKit contributors
4//
5// @@ All Rights Reserved @@
6// This file is part of the RDKit.
7// The contents are covered by the terms of the BSD license
8// which is included in the file license.txt, found at the root
9// of the RDKit source tree.
10//
11#ifndef RGROUP_DECOMP_DATA
12#define RGROUP_DECOMP_DATA
13
14#include "RGroupCore.h"
15#include "RGroupDecomp.h"
16#include "RGroupMatch.h"
17#include "RGroupScore.h"
19#include "RGroupGa.h"
20#include <vector>
21#include <map>
22
23// #define VERBOSE 1
24
25namespace RDKit {
26
27extern const std::string _rgroupInputDummy;
28
30 // matches[mol_idx] == vector of potential matches
31 std::map<int, RCore> cores;
32 std::map<std::string, int> newCores; // new "cores" found along the way
34 // this caches the running product of permutations
35 // across calls to process()
37 // this caches the size of the previous matches vector
38 // such that the size of the current chunk can be inferred
40 // the default for Greedy/GreedyChunks is keeping only the best
41 // permutation after each call to process()
42 bool prunePermutations = true;
44
45 std::vector<std::vector<RGroupMatch>> matches;
46 std::set<int> labels;
47 std::vector<size_t> permutation;
48 unsigned int pruneLength = 0U;
50 std::map<int, std::vector<int>> userLabels;
51
52 std::vector<int> processedRlabels;
53
54 std::map<int, int> finalRlabelMapping;
56
57 RGroupDecompData(const RWMol &inputCore,
59 : params(std::move(inputParams)) {
60 addCore(inputCore);
62 }
63
64 RGroupDecompData(const std::vector<ROMOL_SPTR> &inputCores,
66 : params(std::move(inputParams)) {
67 for (const auto &core : inputCores) {
68 addCore(*core);
69 }
71 }
72
73 void addCore(const ROMol &inputCore) {
75 RWMol core(inputCore);
77 cores[cores.size()] = RCore(core);
78 } else {
79 cores[cores.size()] = RCore(inputCore);
80 }
81 }
82
83 void prepareCores() {
84 for (auto &core : cores) {
85 RWMol *alignCore = core.first ? cores[0].core.get() : nullptr;
86 CHECK_INVARIANT(params.prepareCore(*core.second.core, alignCore),
87 "Could not prepare at least one core");
88 core.second.init();
89 core.second.labelledCore.reset(new RWMol(*core.second.core));
90 }
91 }
92
93 void setRlabel(Atom *atom, int rlabel) {
94 PRECONDITION(rlabel > 0, "RLabels must be >0");
96 atom->setAtomMapNum(rlabel);
97 }
98
100 std::string dLabel = "R" + std::to_string(rlabel);
102 setAtomRLabel(atom, rlabel);
103 }
104
106 atom->setIsotope(rlabel);
107 }
108 }
109
110 int getRlabel(Atom *atom) const {
112 return atom->getAtomMapNum();
113 }
115 return atom->getIsotope();
116 }
117
119 unsigned int label = 0;
121 return label;
122 }
123 }
124
125 CHECK_INVARIANT(0, "no valid r label found");
126 }
127
128 double scoreFromPrunedData(const std::vector<size_t> &permutation,
129 bool reset = true) {
132 "Scoring method is not fingerprint variance!");
133
135 "Illegal permutation prune length");
136 if (permutation.size() < pruneLength * 1.5) {
137 for (unsigned int pos = pruneLength; pos < permutation.size(); ++pos) {
139 pos, permutation[pos], matches, labels);
140 }
141 double score =
143 if (reset) {
144 for (unsigned int pos = pruneLength; pos < permutation.size(); ++pos) {
146 pos, permutation[pos], matches, labels);
147 }
148 } else {
149 pruneLength = permutation.size();
150 }
151 return score;
152 } else {
153 if (reset) {
155 } else {
157 pruneLength = permutation.size();
160 }
161 }
162 }
163
164 void prune() { // prune all but the current "best" permutation of matches
165 PRECONDITION(permutation.size() <= matches.size(),
166 "permutation.size() should be <= matches.size()");
167 size_t offset = matches.size() - permutation.size();
168 for (size_t mol_idx = 0; mol_idx < permutation.size(); ++mol_idx) {
169 std::vector<RGroupMatch> keepVector;
170 size_t mi = mol_idx + offset;
171 keepVector.push_back(matches[mi].at(permutation[mol_idx]));
172 matches[mi] = keepVector;
173 }
174
175 permutation = std::vector<size_t>(permutation.size(), 0);
179 }
180 }
181
182 // Return the RGroups with the current "best" permutation
183 // of matches.
184 std::vector<RGroupMatch> GetCurrentBestPermutation() const {
185 const bool removeAllHydrogenRGroups =
188
189 std::vector<RGroupMatch> results; // std::map<int, RGroup> > result;
190 bool isPruned = (permutation.size() < matches.size());
191 for (size_t i = 0; i < matches.size(); ++i) {
192 size_t pi = (isPruned ? 0 : permutation.at(i));
193 results.push_back(matches[i].at(pi));
194 }
195
196 // * if a dynamically-added RGroup (i.e., when onlyMatchAtRGroups=false)
197 // is all hydrogens, remove it
198 // * if a user-defined RGroup is all hydrogens and either
199 // params.removeAllHydrogenRGroups==true or
200 // params.removeAllHydrogenRGroupsAndLabels==true, remove it
201
202 // This logic is a bit tricky, find all labels that have common cores
203 // and analyze those sets independently.
204 // i.e. if core 1 doesn't have R1 then don't analyze it in when looking
205 // at label 1
206 std::map<int, std::set<int>> labelCores; // map from label->cores
207 std::set<int> coresVisited;
208 for (auto &position : results) {
209 int core_idx = position.core_idx;
210 if (coresVisited.find(core_idx) == coresVisited.end()) {
211 coresVisited.insert(core_idx);
212 auto core = cores.find(core_idx);
213 if (core != cores.end()) {
214 for (auto rlabels : getRlabels(*core->second.core)) {
215 int rlabel = rlabels.first;
216 labelCores[rlabel].insert(core_idx);
217 }
218 }
219 }
220 }
221
222 for (int label : labels) {
223 if (label > 0 && !removeAllHydrogenRGroups) {
224 continue;
225 }
226 bool allH = true;
227 for (auto &position : results) {
228 R_DECOMP::const_iterator rgroup = position.rgroups.find(label);
229 bool labelHasCore = labelCores[label].find(position.core_idx) !=
230 labelCores[label].end();
231 if (labelHasCore && rgroup != position.rgroups.end() &&
232 !rgroup->second->is_hydrogen) {
233 allH = false;
234 break;
235 }
236 }
237
238 if (allH) {
239 for (auto &position : results) {
240 position.rgroups.erase(label);
241 }
242 }
243 }
244 return results;
245 }
246
248 public:
249 std::set<int> labels_used;
250 bool add(int rlabel) {
251 if (labels_used.find(rlabel) != labels_used.end()) {
252 return false;
253 }
254 labels_used.insert(rlabel);
255 return true;
256 }
257
258 int next() {
259 int i = 1;
260 while (labels_used.find(i) != labels_used.end()) {
261 ++i;
262 }
263 labels_used.insert(i);
264 return i;
265 }
266 };
267
268 void addCoreUserLabels(const RWMol &core, std::set<int> &userLabels) {
269 auto atoms = getRlabels(core);
270 for (const auto &p : atoms) {
271 if (p.first > 0) {
272 userLabels.insert(p.first);
273 }
274 }
275 }
276
277 void addAtoms(RWMol &mol,
278 const std::vector<std::pair<Atom *, Atom *>> &atomsToAdd) {
279 for (const auto &i : atomsToAdd) {
280 mol.addAtom(i.second, false, true);
281 mol.addBond(i.first, i.second, Bond::SINGLE);
282 if (mol.getNumConformers()) {
283 MolOps::setTerminalAtomCoords(mol, i.second->getIdx(),
284 i.first->getIdx());
285 }
286 }
287 }
288
289 void relabelCore(RWMol &core, std::map<int, int> &mappings,
290 UsedLabels &used_labels, const std::set<int> &indexLabels,
291 const std::map<int, std::vector<int>> &extraAtomRLabels) {
292 // Now remap to proper rlabel ids
293 // if labels are positive, they come from User labels
294 // if they are negative, they come from indices and should be
295 // numbered *after* the user labels.
296 //
297 // Some indices are attached to multiple bonds,
298 // these rlabels should be incrementally added last
299 std::map<int, Atom *> atoms = getRlabels(core);
300 // a core only has one labelled index
301 // a secondary structure extraAtomRLabels contains the number
302 // of bonds between this atom and the side chain
303
304 // a sidechain atom has a vector of the attachments back to the
305 // core that takes the place of numBondsToRlabel
306
307 std::map<int, std::vector<int>> bondsToCore;
308 std::vector<std::pair<Atom *, Atom *>> atomsToAdd; // adds -R if necessary
309
310 // Deal with user supplied labels
311 for (const auto &rlabels : atoms) {
312 int userLabel = rlabels.first;
313 if (userLabel < 0) {
314 continue; // not a user specified label
315 }
316 Atom *atom = rlabels.second;
317 mappings[userLabel] = userLabel;
318 used_labels.add(userLabel);
319
320 if (atom->getAtomicNum() == 0 &&
321 atom->getDegree() == 1) { // add to existing dummy/rlabel
322 setRlabel(atom, userLabel);
323 } else { // adds new rlabel
324 auto *newAt = new Atom(0);
325 setRlabel(newAt, userLabel);
326 atomsToAdd.emplace_back(atom, newAt);
327 }
328 }
329
330 // Deal with non-user supplied labels
331 for (auto newLabel : indexLabels) {
332 auto atm = atoms.find(newLabel);
333 if (atm == atoms.end()) {
334 continue;
335 }
336
337 Atom *atom = atm->second;
338
339 int rlabel;
340 auto mapping = mappings.find(newLabel);
341 if (mapping == mappings.end()) {
342 rlabel = used_labels.next();
343 mappings[newLabel] = rlabel;
344 } else {
345 rlabel = mapping->second;
346 }
347
348 if (atom->getAtomicNum() == 0 &&
350 *atom)) { // add to dummy
351 setRlabel(atom, rlabel);
352 } else {
353 auto *newAt = new Atom(0);
354 setRlabel(newAt, rlabel);
355 atomsToAdd.emplace_back(atom, newAt);
356 }
357 }
358
359 // Deal with multiple bonds to the same label
360 for (const auto &extraAtomRLabel : extraAtomRLabels) {
361 auto atm = atoms.find(extraAtomRLabel.first);
362 if (atm == atoms.end()) {
363 continue; // label not used in the rgroup
364 }
365 Atom *atom = atm->second;
366
367 for (size_t i = 0; i < extraAtomRLabel.second.size(); ++i) {
368 int rlabel = used_labels.next();
369 // Is this necessary?
371 atom->getAtomicNum() > 1,
372 "Multiple attachments to a dummy (or hydrogen) is weird.");
373 auto *newAt = new Atom(0);
374 setRlabel(newAt, rlabel);
375 atomsToAdd.emplace_back(atom, newAt);
376 }
377 }
378
379 addAtoms(core, atomsToAdd);
380 for (const auto &rlabels : atoms) {
381 auto atom = rlabels.second;
382 atom->clearProp(RLABEL);
383 atom->clearProp(RLABEL_TYPE);
384 }
385 core.updatePropertyCache(false); // this was github #1550
386 }
387
388 void relabelRGroup(RGroupData &rgroup, const std::map<int, int> &mappings) {
389 PRECONDITION(rgroup.combinedMol.get(), "Unprocessed rgroup");
390
391 RWMol &mol = *rgroup.combinedMol.get();
392
393 if (rgroup.combinedMol->hasProp(done)) {
394 rgroup.labelled = true;
395 return;
396 }
397
398 mol.setProp(done, true);
399 std::vector<std::pair<Atom *, Atom *>> atomsToAdd; // adds -R if necessary
400 std::map<int, int> rLabelCoreIndexToAtomicWt;
401
402 for (RWMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
403 ++atIt) {
404 Atom *atom = *atIt;
405 if (atom->hasProp(SIDECHAIN_RLABELS)) {
406 atom->setIsotope(0);
407 const std::vector<int> &rlabels =
408 atom->getProp<std::vector<int>>(SIDECHAIN_RLABELS);
409 // switch on atom mappings or rlabels....
410
411 for (int rlabel : rlabels) {
412 auto label = mappings.find(rlabel);
413 CHECK_INVARIANT(label != mappings.end(), "Unprocessed mapping");
414
415 if (atom->getAtomicNum() == 0) {
416 if (!atom->hasProp(_rgroupInputDummy)) {
417 setRlabel(atom, label->second);
418 }
419 } else if (atom->hasProp(RLABEL_CORE_INDEX)) {
420 atom->setAtomicNum(0);
421 setRlabel(atom, label->second);
422 } else {
423 auto *newAt = new Atom(0);
424 setRlabel(newAt, label->second);
425 atomsToAdd.emplace_back(atom, newAt);
426 }
427 }
428 }
429 if (atom->hasProp(RLABEL_CORE_INDEX)) {
430 // convert to dummy as we don't want to collapse hydrogens onto the core
431 // match
432 auto rLabelCoreIndex = atom->getProp<int>(RLABEL_CORE_INDEX);
433 rLabelCoreIndexToAtomicWt[rLabelCoreIndex] = atom->getAtomicNum();
434 atom->setAtomicNum(0);
435 }
436 }
437
438 addAtoms(mol, atomsToAdd);
439
441 RDLog::LogStateSetter blocker;
442 bool implicitOnly = false;
443 bool updateExplicitCount = false;
444 bool sanitize = false;
445 MolOps::removeHs(mol, implicitOnly, updateExplicitCount, sanitize);
446 }
447
448 mol.updatePropertyCache(false); // this was github #1550
449 rgroup.labelled = true;
450
451 // Restore any core matches that we have set to dummy
452 for (RWMol::AtomIterator atIt = mol.beginAtoms(); atIt != mol.endAtoms();
453 ++atIt) {
454 Atom *atom = *atIt;
455 if (atom->hasProp(RLABEL_CORE_INDEX)) {
456 // don't need to set IsAromatic on atom - that seems to have been saved
457 atom->setAtomicNum(
458 rLabelCoreIndexToAtomicWt[atom->getProp<int>(RLABEL_CORE_INDEX)]);
459 atom->setNoImplicit(true);
461 }
463 }
464
465#ifdef VERBOSE
466 std::cerr << "Relabel Rgroup smiles " << MolToSmiles(mol) << std::endl;
467#endif
468 }
469
470 // relabel the core and sidechains using the specified user labels
471 // if matches exist for non labelled atoms, these are added as well
472 void relabel() {
473 std::vector<RGroupMatch> best = GetCurrentBestPermutation();
474
475 // get the labels used
476 std::set<int> userLabels;
477 std::set<int> indexLabels;
478
479 // Go through all the RGroups and find out which labels were
480 // actually used.
481
482 // some atoms will have multiple attachment points, i.e. cycles
483 // split these up into new rlabels if necessary
484 // These are detected at match time
485 // This vector will hold the extra (new) labels required
486 std::map<int, std::vector<int>> extraAtomRLabels;
487
488 for (auto &it : best) {
489 for (auto &rgroup : it.rgroups) {
490 if (rgroup.first > 0) {
491 userLabels.insert(rgroup.first);
492 }
493 if (rgroup.first < 0 && !params.onlyMatchAtRGroups) {
494 indexLabels.insert(rgroup.first);
495 }
496
497 std::map<int, int> rlabelsUsedInRGroup =
498 rgroup.second->getNumBondsToRlabels();
499 for (auto &numBondsUsed : rlabelsUsedInRGroup) {
500 // Make space for the extra labels
501 if (numBondsUsed.second > 1) { // multiple rgroup bonds to same atom
502 extraAtomRLabels[numBondsUsed.first].resize(numBondsUsed.second -
503 1);
504 }
505 }
506 }
507 }
508
509 // find user labels that are not present in the decomposition
510 for (auto &core : cores) {
511 core.second.labelledCore.reset(new RWMol(*core.second.core));
512 addCoreUserLabels(*core.second.labelledCore, userLabels);
513 }
514
515 // Assign final RGroup labels to the cores and propagate these to
516 // the scaffold
517 finalRlabelMapping.clear();
518
519 UsedLabels used_labels;
520 // Add all the user labels now to prevent an index label being assigned to a
521 // user label when multiple cores are present (e.g. the user label is
522 // present in the second core, but not the first).
523 for (auto userLabel : userLabels) {
524 used_labels.add(userLabel);
525 }
526 for (auto &core : cores) {
527 relabelCore(*core.second.labelledCore, finalRlabelMapping, used_labels,
528 indexLabels, extraAtomRLabels);
529 }
530
531 for (auto &it : best) {
532 for (auto &rgroup : it.rgroups) {
533 relabelRGroup(*rgroup.second, finalRlabelMapping);
534 }
535 }
536
537 std::set<int> uniqueMappedValues;
538 std::transform(finalRlabelMapping.cbegin(), finalRlabelMapping.cend(),
539 std::inserter(uniqueMappedValues, uniqueMappedValues.end()),
540 [](const std::pair<int, int> &p) { return p.second; });
541 CHECK_INVARIANT(finalRlabelMapping.size() == uniqueMappedValues.size(),
542 "Error in uniqueness of final RLabel mapping");
544 uniqueMappedValues.size() == userLabels.size() + indexLabels.size(),
545 "Error in final RMapping size");
546 }
547
548 double score(const std::vector<size_t> &permutation,
549 FingerprintVarianceScoreData *fingerprintVarianceScoreData =
550 nullptr) const {
551 RGroupScore scoreMethod = static_cast<RGroupScore>(params.scoreMethod);
552 switch (scoreMethod) {
553 case Match:
555 break;
558 fingerprintVarianceScoreData);
559 break;
560 default:;
561 }
562 return NAN;
563 }
564
566 bool finalize = false) {
567 if (matches.empty()) {
568 return RGroupDecompositionProcessResult(false, -1);
569 }
570 auto t0 = std::chrono::steady_clock::now();
571 std::unique_ptr<CartesianProduct> iterator;
573
574 if (params.matchingStrategy == GA) {
575 RGroupGa ga(*this, params.timeout >= 0 ? &t0 : nullptr);
576 if (ga.numberPermutations() < 100 * ga.getPopsize()) {
578 } else {
579 if (params.gaNumberRuns > 1) {
580 auto results = ga.runBatch();
581 auto best = max_element(results.begin(), results.end(),
582 [](const GaResult &a, const GaResult &b) {
583 return a.rGroupScorer.getBestScore() <
584 b.rGroupScorer.getBestScore();
585 });
586 rGroupScorer = best->rGroupScorer;
587 } else {
588 auto result = ga.run();
589 rGroupScorer = result.rGroupScorer;
590 }
591 }
592 }
593 size_t offset = 0;
594 if (params.matchingStrategy != GA) {
595 // Exhaustive search, get the MxN matrix
596 // (M = matches.size(): number of molecules
597 // N = iterator.maxPermutations)
598 std::vector<size_t> permutations;
599
600 if (pruneMatches && params.scoreMethod != FingerprintVariance) {
601 offset = previousMatchSize;
602 }
603 previousMatchSize = matches.size();
604 std::transform(
605 matches.begin() + offset, matches.end(),
606 std::back_inserter(permutations),
607 [](const std::vector<RGroupMatch> &m) { return m.size(); });
608 permutation = std::vector<size_t>(permutations.size(), 0);
609
610 // run through all possible matches and score each
611 // set
612 size_t count = 0;
613#ifdef DEBUG
614 std::cerr << "Processing" << std::endl;
615#endif
616 std::unique_ptr<CartesianProduct> it(new CartesianProduct(permutations));
617 iterator = std::move(it);
618 // Iterates through the permutation idx, i.e.
619 // [m1_permutation_idx, m2_permutation_idx, m3_permutation_idx]
620
621 while (iterator->next()) {
622 if (count > iterator->maxPermutations) {
623 throw ValueErrorException("next() did not finish");
624 }
625#ifdef DEBUG
626 std::cerr << "**************************************************"
627 << std::endl;
628#endif
629 double newscore = params.scoreMethod == FingerprintVariance
630 ? scoreFromPrunedData(iterator->permutation)
631 : score(iterator->permutation);
632
633 if (fabs(newscore - rGroupScorer.getBestScore()) <
634 1e-6) { // heuristic to overcome floating point comparison issues
635 rGroupScorer.pushTieToStore(iterator->permutation);
636 } else if (newscore > rGroupScorer.getBestScore()) {
637#ifdef DEBUG
638 std::cerr << " ===> current best:" << newscore << ">"
639 << rGroupScorer.getBestScore() << std::endl;
640#endif
641 rGroupScorer.setBestPermutation(iterator->permutation, newscore);
643 rGroupScorer.pushTieToStore(iterator->permutation);
644 }
645 ++count;
646 }
647 }
648
649 if (rGroupScorer.tieStoreSize() > 1) {
652 } else {
654 }
656 if (pruneMatches || finalize) {
657 prune();
658 }
659
660 if (finalize) {
661 relabel();
662 }
663
665 }
666};
667} // namespace RDKit
668
669#endif
#define CHECK_INVARIANT(expr, mess)
Definition: Invariant.h:101
#define PRECONDITION(expr, mess)
Definition: Invariant.h:109
The class for representing atoms.
Definition: Atom.h:68
void setNoImplicit(bool what)
sets our noImplicit flag, indicating whether or not we are allowed to have implicit Hs
Definition: Atom.h:220
void setAtomicNum(int newNum)
sets our atomic number
Definition: Atom.h:128
void setIsotope(unsigned int what)
sets our isotope number
int getAtomicNum() const
returns our atomic number
Definition: Atom.h:126
int getAtomMapNum() const
Definition: Atom.h:388
void setAtomMapNum(int mapno, bool strict=true)
Set the atom map Number of the atom.
Definition: Atom.h:376
unsigned int getIsotope() const
returns our isotope number
Definition: Atom.h:240
unsigned int getDegree() const
@ SINGLE
Definition: Bond.h:58
bool getPropIfPresent(const std::string &key, T &res) const
Definition: RDProps.h:121
void clearProp(const std::string &key) const
clears the value of a property
Definition: RDProps.h:137
void getProp(const std::string &key, T &res) const
allows retrieval of a particular property value
Definition: RDProps.h:107
bool hasProp(const std::string &key) const
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: RDProps.h:126
void setProp(const std::string &key, T val, bool computed=false) const
sets a property value
Definition: RDProps.h:77
vector< GaResult > runBatch()
GaResult run(int runNumber=1)
unsigned int numberPermutations() const
Definition: RGroupGa.h:127
void pushTieToStore(const std::vector< size_t > &permutation)
store the passed tied permutation for subsequent processing
void startProcessing()
called when process() starts to initialize State
void setBestPermutation(const std::vector< size_t > &permutation, double score)
set the passed permutation and score as the best one
void clearTieStore()
clear all stored tied permutations
const std::vector< size_t > & getBestPermutation() const
return the best permutation found so far
Definition: RGroupScore.h:83
void breakTies(const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels, const std::unique_ptr< CartesianProduct > &iterator, const std::chrono::steady_clock::time_point &t0, double timeout)
find the best permutation across the tied ones that were stored
double matchScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels)
score the passed permutation of matches
size_t tieStoreSize() const
number of stored tied permutations
Definition: RGroupScore.h:99
double getBestScore() const
return the best score found so far
Definition: RGroupScore.h:101
unsigned int getNumConformers() const
Definition: ROMol.h:560
AtomIterator endAtoms()
get an AtomIterator pointing at the end of our Atoms
void updatePropertyCache(bool strict=true)
calculates any of our lazy properties
AtomIterator beginAtoms()
get an AtomIterator pointing at our first Atom
RWMol is a molecule class that is intended to be edited.
Definition: RWMol.h:32
unsigned int addAtom(bool updateLabel=true)
adds an empty Atom to our collection
unsigned int addBond(unsigned int beginAtomIdx, unsigned int endAtomIdx, Bond::BondType order=Bond::UNSPECIFIED)
adds a Bond between the indicated Atoms
Class to allow us to throw a ValueError from C++ and have it make it back to Python.
Definition: Exceptions.h:40
static std::string to_string(const Descriptor &desc)
Definition: Descriptor.h:54
RDKIT_GRAPHMOL_EXPORT void setTerminalAtomCoords(ROMol &mol, unsigned int idx, unsigned int otherIdx)
RDKIT_GRAPHMOL_EXPORT ROMol * removeHs(const ROMol &mol, bool implicitOnly=false, bool updateExplicitCount=false, bool sanitize=true)
returns a copy of a molecule with hydrogens removed
RDKIT_RDGENERAL_EXPORT const std::string dummyLabel
RDKIT_RDGENERAL_EXPORT const std::string _MolFileRLabel
Std stuff.
Definition: Abbreviations.h:19
@ FingerprintVariance
Definition: RGroupDecomp.h:63
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL_CORE_INDEX
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string done
@ Exhaustive
Definition: RGroupDecomp.h:43
const std::string _rgroupInputDummy
RDKIT_GRAPHMOL_EXPORT void setAtomRLabel(Atom *atm, int rlabel)
RDKIT_SMILESPARSE_EXPORT std::string MolToSmiles(const ROMol &mol, const SmilesWriteParams &params)
returns canonical SMILES for a molecule
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string SIDECHAIN_RLABELS
@ MDLRGroup
Definition: RGroupDecomp.h:51
@ AtomMap
Definition: RGroupDecomp.h:49
@ Isotope
Definition: RGroupDecomp.h:50
bool checkForTimeout(const std::chrono::steady_clock::time_point &t0, double timeout, bool throwOnTimeout=true)
Definition: RGroupDecomp.h:213
bool isAnyAtomWithMultipleNeighborsOrNotUserRLabel(const Atom &atom)
Definition: RGroupUtils.h:74
RDKIT_RGROUPDECOMPOSITION_EXPORT const std::string RLABEL_TYPE
RDKIT_RGROUPDECOMPOSITION_EXPORT double fingerprintVarianceScore(const std::vector< size_t > &permutation, const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr)
const unsigned int EMPTY_CORE_LABEL
Definition: RGroupUtils.h:29
std::map< int, Atom * > getRlabels(const RWMol &mol)
Get the RLabels,atom mapping for the current molecule.
iterate through all possible permutations of the rgroups
Definition: RGroupScore.h:20
void addVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels)
void removeVarianceData(int matchNumber, int permutationNumber, const std::vector< std::vector< RGroupMatch > > &matches, const std::set< int > &labels)
RCore is the core common to a series of molecules.
Definition: RGroupCore.h:24
A single rgroup attached to a given core.
Definition: RGroupData.h:27
boost::shared_ptr< RWMol > combinedMol
Definition: RGroupData.h:28
std::vector< std::vector< RGroupMatch > > matches
FingerprintVarianceScoreData prunedFingerprintVarianceScoreData
RGroupDecompData(const RWMol &inputCore, RGroupDecompositionParameters inputParams)
double score(const std::vector< size_t > &permutation, FingerprintVarianceScoreData *fingerprintVarianceScoreData=nullptr) const
double scoreFromPrunedData(const std::vector< size_t > &permutation, bool reset=true)
void relabelRGroup(RGroupData &rgroup, const std::map< int, int > &mappings)
std::vector< size_t > permutation
std::map< int, std::vector< int > > userLabels
RGroupDecompositionParameters params
std::map< std::string, int > newCores
void addAtoms(RWMol &mol, const std::vector< std::pair< Atom *, Atom * > > &atomsToAdd)
RGroupDecompositionProcessResult process(bool pruneMatches, bool finalize=false)
std::map< int, RCore > cores
void setRlabel(Atom *atom, int rlabel)
void addCore(const ROMol &inputCore)
std::vector< int > processedRlabels
int getRlabel(Atom *atom) const
std::map< int, int > finalRlabelMapping
void addCoreUserLabels(const RWMol &core, std::set< int > &userLabels)
RGroupDecompData(const std::vector< ROMOL_SPTR > &inputCores, RGroupDecompositionParameters inputParams)
void relabelCore(RWMol &core, std::map< int, int > &mappings, UsedLabels &used_labels, const std::set< int > &indexLabels, const std::map< int, std::vector< int > > &extraAtomRLabels)
std::vector< RGroupMatch > GetCurrentBestPermutation() const
void addDummyAtomsToUnlabelledCoreAtoms(RWMol &core)
bool onlyMatchAtRGroups
only allow rgroup decomposition at the specified rgroups
Definition: RGroupDecomp.h:84
bool removeAllHydrogenRGroups
remove all user-defined rgroups that only have hydrogens
Definition: RGroupDecomp.h:86
double timeout
timeout in seconds. <=0 indicates no timeout
Definition: RGroupDecomp.h:97
bool removeHydrogensPostMatch
remove all hydrogens from the output molecules
Definition: RGroupDecomp.h:91
bool prepareCore(RWMol &, const RWMol *alignCore)