10#ifndef GENERAL_FILE_READER_H
11#define GENERAL_FILE_READER_H
13#include <RDStreams/streams.h>
15#include <boost/algorithm/string.hpp>
26namespace GeneralMolSupplier {
46 "sdf",
"mae",
"maegz",
"sdfgz",
"smi",
"csv",
"txt",
"tsv",
"tdt"};
55 std::string& compressionFormat) {
60 if (boost::algorithm::iends_with(path,
".maegz")) {
62 compressionFormat =
"gz";
64 }
else if (boost::algorithm::iends_with(path,
".sdfgz")) {
66 compressionFormat =
"gz";
68 }
else if (boost::algorithm::iends_with(path,
".gz")) {
69 compressionFormat =
"gz";
70 basename = path.substr(0, path.size() - 3);
71 }
else if (boost::algorithm::iends_with(path,
".zst") ||
72 boost::algorithm::iends_with(path,
".bz2") ||
73 boost::algorithm::iends_with(path,
".7z")) {
75 "Unsupported compression extension (.zst, .bz2, .7z) given path: " +
79 compressionFormat =
"";
82 if (boost::algorithm::iends_with(basename,
"." + suffix)) {
88 "Unsupported structure or compression extension given path: " + path);
98std::unique_ptr<MolSupplier>
getSupplier(
const std::string& path,
100 std::string fileFormat =
"";
101 std::string compressionFormat =
"";
106 if (compressionFormat.empty()) {
107 strm =
new std::ifstream(path.c_str(), std::ios::in | std::ios::binary);
109#if RDK_USE_BOOST_IOSTREAMS
110 strm =
new gzstream(path);
117 if (fileFormat ==
"sdf") {
118#ifdef RDK_BUILD_THREADSAFE_SSS
120 MultithreadedSDMolSupplier* sdsup =
new MultithreadedSDMolSupplier(
123 std::unique_ptr<MolSupplier> p(sdsup);
129 std::unique_ptr<MolSupplier> p(sdsup);
133 else if (fileFormat ==
"smi" || fileFormat ==
"csv" || fileFormat ==
"txt" ||
134 fileFormat ==
"tsv") {
135#ifdef RDK_BUILD_THREADSAFE_SSS
137 MultithreadedSmilesMolSupplier* smsup =
138 new MultithreadedSmilesMolSupplier(
141 std::unique_ptr<MolSupplier> p(smsup);
148 std::unique_ptr<MolSupplier> p(smsup);
151#ifdef RDK_BUILD_MAEPARSER_SUPPORT
152 else if (fileFormat ==
"mae") {
153 MaeMolSupplier* maesup =
155 std::unique_ptr<MolSupplier> p(maesup);
159 else if (fileFormat ==
"tdt") {
162 std::unique_ptr<MolSupplier> p(tdtsup);
used by various file parsing classes to indicate a bad file
lazy file parser for Smiles tables
lazy file parser for TDT files
const std::vector< std::string > supportedCompressionFormats
current supported compression formats
const std::vector< std::string > supportedFileFormats
current supported file formats
void determineFormat(const std::string path, std::string &fileFormat, std::string &compressionFormat)
std::unique_ptr< MolSupplier > getSupplier(const std::string &path, const struct SupplierOptions &opt)
unsigned int numWriterThreads