30FileSystem *FileSystem::GetInstance(
const URI &path) {
31 if (path.protocol ==
"file://" || path.protocol.length() == 0) {
34 if (path.protocol ==
"hdfs://" || path.protocol ==
"viewfs://") {
36 if (path.host.length() == 0) {
38 }
else if (path.protocol ==
"viewfs://") {
39 char* defaultFS =
nullptr;
40 hdfsConfGetStr(
"fs.defaultFS", &defaultFS);
41 if (path.host.length() != 0) {
42 CHECK(
"viewfs://" + path.host == defaultFS)
43 <<
"viewfs is only supported as a fs.defaultFS.";
50 LOG(FATAL) <<
"Please compile with DMLC_USE_HDFS=1 to use hdfs";
53 if (path.protocol ==
"s3://" || path.protocol ==
"http://" || path.protocol ==
"https://") {
57 LOG(FATAL) <<
"Please compile with DMLC_USE_S3=1 to use S3";
61 if (path.protocol ==
"azure://") {
65 LOG(FATAL) <<
"Please compile with DMLC_USE_AZURE=1 to use Azure";
69 LOG(FATAL) <<
"unknown filesystem protocol " + path.protocol;
78 return Create(uri_,
nullptr, part, nsplit, type);
82 const char *index_uri_,
88 const size_t batch_size,
89 const bool recurse_directories) {
91 using namespace dmlc::io;
94 if (!strcmp(spec.
uri.c_str(),
"stdin")) {
97 CHECK(part < nsplit) <<
"invalid input parameter for InputSplit::Create";
98 URI path(spec.
uri.c_str());
100 if (!strcmp(type,
"text")) {
102 spec.
uri.c_str(), part, nsplit);
103 }
else if (!strcmp(type,
"indexed_recordio")) {
104 if (index_uri_ !=
nullptr) {
107 spec.
uri.c_str(), index_spec.
uri.c_str(), part, nsplit,
108 batch_size, shuffle, seed);
110 LOG(FATAL) <<
"need to pass index file to use IndexedRecordIO";
112 }
else if (!strcmp(type,
"recordio")) {
114 spec.
uri.c_str(), part, nsplit,
115 recurse_directories);
117 LOG(FATAL) <<
"unknown input split type " << type;
119#if DMLC_ENABLE_STD_THREAD
121 return new ThreadedInputSplit(split, batch_size);
123 return new CachedInputSplit(split, spec.
cache_file.c_str());
127 <<
"to enable cached file, compile with c++11";
133 const char *
const flag,
136 return io::FileSystem::
137 GetInstance(path)->Open(path, flag, try_create);
142 return io::FileSystem::
143 GetInstance(path)->OpenForRead(path, try_create);
interface of i/o stream that support seek
Definition io.h:109
static SeekStream * CreateForRead(const char *uri, bool allow_null=false)
generic factory function create an SeekStream for read only, the stream will close the underlying fil...
Definition io.cc:140
interface of stream I/O for serialization
Definition io.h:30
static Stream * Create(const char *uri, const char *const flag, bool allow_null=false)
generic factory function create an stream, the stream will close the underlying files upon deletion
Definition io.cc:132
static AzureFileSystem * GetInstance(void)
get a singleton of AzureFileSystem when needed
Definition azure_filesys.h:38
static HDFSFileSystem * GetInstance(const std::string &namenode="default")
get a singleton of HDFSFileSystem when needed
Definition hdfs_filesys.h:59
class that splits the recordIO file by record
Definition indexed_recordio_split.h:23
class that split the files by line
Definition line_split.h:20
static LocalFileSystem * GetInstance(void)
get a singleton of LocalFileSystem when needed
Definition local_filesys.h:54
class that split the files by line
Definition recordio_split.h:21
static S3FileSystem * GetInstance(void)
get a singleton of S3FileSystem when needed
Definition s3_filesys.h:64
line split implementation from single FILE simply returns lines of files, used for stdin
Definition single_file_split.h:32
some super set of URI that allows sugars to be passed around Example:
Definition uri_spec.h:28
std::string cache_file
the path to cache file
Definition uri_spec.h:35
std::string uri
the real URI
Definition uri_spec.h:31
defines configuration macros
defines serializable interface of dmlc
defines logging macros of dmlc allows use of GLOG, fall back to internal implementation when disabled
input split that splits indexed recordio files
base class implementation of input splitter
namespace for dmlc
Definition array_view.h:12
input split that splits recordio files
base implementation of line-spliter
common specification of sugars in URI string passed to dmlc Create functions such as local file cache