Medial Code Documentation
Loading...
Searching...
No Matches
c_api_utils.h
1
4#ifndef XGBOOST_C_API_C_API_UTILS_H_
5#define XGBOOST_C_API_C_API_UTILS_H_
6
7#include <algorithm>
8#include <cstddef>
9#include <functional>
10#include <memory> // std::shared_ptr
11#include <string>
12#include <vector>
13
14#include "xgboost/c_api.h"
15#include "xgboost/data.h" // DMatrix
16#include "xgboost/feature_map.h" // for FeatureMap
17#include "xgboost/json.h"
18#include "xgboost/learner.h"
19#include "xgboost/linalg.h" // ArrayInterfaceHandler
20#include "xgboost/logging.h"
21#include "xgboost/string_view.h" // StringView
22
23namespace xgboost {
24/* \brief Determine the output shape of prediction.
25 *
26 * \param strict_shape Whether should we reshape the output with consideration of groups
27 * and forest.
28 * \param type Prediction type
29 * \param rows Input samples
30 * \param cols Input features
31 * \param chunksize Total elements of output / rows
32 * \param groups Number of output groups from Learner
33 * \param rounds end_iteration - beg_iteration
34 * \param out_shape Output shape
35 * \param out_dim Output dimension
36 */
37inline void CalcPredictShape(bool strict_shape, PredictionType type, size_t rows, size_t cols,
38 size_t chunksize, size_t groups, size_t rounds,
39 std::vector<bst_ulong> *out_shape,
40 xgboost::bst_ulong *out_dim) {
41 auto &shape = *out_shape;
42 if (type == PredictionType::kMargin && rows != 0) {
43 // When kValue is used, softmax can change the chunksize.
44 CHECK_EQ(chunksize, groups);
45 }
46
47 switch (type) {
48 case PredictionType::kValue:
49 case PredictionType::kMargin: {
50 if (chunksize == 1 && !strict_shape) {
51 *out_dim = 1;
52 shape.resize(*out_dim);
53 shape.front() = rows;
54 } else {
55 *out_dim = 2;
56 shape.resize(*out_dim);
57 shape.front() = rows;
58 // chunksize can be 1 if it's softmax
59 shape.back() = std::min(groups, chunksize);
60 }
61 break;
62 }
63 case PredictionType::kApproxContribution:
64 case PredictionType::kContribution: {
65 if (groups == 1 && !strict_shape) {
66 *out_dim = 2;
67 shape.resize(*out_dim);
68 shape.front() = rows;
69 shape.back() = cols + 1;
70 } else {
71 *out_dim = 3;
72 shape.resize(*out_dim);
73 shape[0] = rows;
74 shape[1] = groups;
75 shape[2] = cols + 1;
76 }
77 break;
78 }
79 case PredictionType::kApproxInteraction:
80 case PredictionType::kInteraction: {
81 if (groups == 1 && !strict_shape) {
82 *out_dim = 3;
83 shape.resize(*out_dim);
84 shape[0] = rows;
85 shape[1] = cols + 1;
86 shape[2] = cols + 1;
87 } else {
88 *out_dim = 4;
89 shape.resize(*out_dim);
90 shape[0] = rows;
91 shape[1] = groups;
92 shape[2] = cols + 1;
93 shape[3] = cols + 1;
94 }
95 break;
96 }
97 case PredictionType::kLeaf: {
98 if (strict_shape) {
99 shape.resize(4);
100 shape[0] = rows;
101 shape[1] = rounds;
102 shape[2] = groups;
103 auto forest = chunksize / (shape[1] * shape[2]);
104 forest = std::max(static_cast<decltype(forest)>(1), forest);
105 shape[3] = forest;
106 *out_dim = shape.size();
107 } else if (chunksize == 1) {
108 *out_dim = 1;
109 shape.resize(*out_dim);
110 shape.front() = rows;
111 } else {
112 *out_dim = 2;
113 shape.resize(*out_dim);
114 shape.front() = rows;
115 shape.back() = chunksize;
116 }
117 break;
118 }
119 default: {
120 LOG(FATAL) << "Unknown prediction type:" << static_cast<int>(type);
121 }
122 }
123 CHECK_EQ(
124 std::accumulate(shape.cbegin(), shape.cend(), static_cast<bst_ulong>(1), std::multiplies<>{}),
125 chunksize * rows);
126}
127
128// Reverse the ntree_limit in old prediction API.
129inline uint32_t GetIterationFromTreeLimit(uint32_t ntree_limit, Learner *learner) {
130 // On Python and R, `best_ntree_limit` is set to `best_iteration * num_parallel_tree`.
131 // To reverse it we just divide it by `num_parallel_tree`.
132 if (ntree_limit != 0) {
133 learner->Configure();
134 uint32_t num_parallel_tree = 0;
135
136 Json config{Object()};
137 learner->SaveConfig(&config);
138 auto const &booster = get<String const>(config["learner"]["gradient_booster"]["name"]);
139 if (booster == "gblinear") {
140 num_parallel_tree = 0;
141 } else if (booster == "dart") {
142 num_parallel_tree =
143 std::stoi(get<String const>(config["learner"]["gradient_booster"]["gbtree"]
144 ["gbtree_model_param"]["num_parallel_tree"]));
145 } else if (booster == "gbtree") {
146 num_parallel_tree = std::stoi(get<String const>(
147 (config["learner"]["gradient_booster"]["gbtree_model_param"]["num_parallel_tree"])));
148 } else {
149 LOG(FATAL) << "Unknown booster:" << booster;
150 }
151 ntree_limit /= std::max(num_parallel_tree, 1u);
152 }
153 return ntree_limit;
154}
155
156inline float GetMissing(Json const &config) {
157 float missing;
158 auto const &obj = get<Object const>(config);
159 auto it = obj.find("missing");
160 if (it == obj.cend()) {
161 LOG(FATAL) << "Argument `missing` is required.";
162 }
163
164 auto const &j_missing = it->second;
165 if (IsA<Number const>(j_missing)) {
166 missing = get<Number const>(j_missing);
167 } else if (IsA<Integer const>(j_missing)) {
168 missing = get<Integer const>(j_missing);
169 } else {
170 missing = nan("");
171 TypeCheck<Number, Integer>(j_missing, "missing");
172 }
173 return missing;
174}
175
176// Safe guard some global variables from being changed by XGBoost.
178#if defined(XGBOOST_USE_CUDA)
179 int32_t device_id_ {0};
180
181 void SetGPUAttribute();
182 void RestoreGPUAttribute();
183#else
184 void SetGPUAttribute() {}
185 void RestoreGPUAttribute() {}
186#endif
187
188 public:
190 SetGPUAttribute();
191 }
193 RestoreGPUAttribute();
194 }
195};
196
197inline FeatureMap LoadFeatureMap(std::string const& uri) {
198 FeatureMap feat;
199 if (uri.size() != 0) {
200 std::unique_ptr<dmlc::Stream> fs(dmlc::Stream::Create(uri.c_str(), "r"));
201 dmlc::istream is(fs.get());
202 feat.LoadText(is);
203 }
204 return feat;
205}
206
207inline void GenerateFeatureMap(Learner const *learner,
208 std::vector<Json> const &custom_feature_names,
209 size_t n_features, FeatureMap *out_feature_map) {
210 auto &feature_map = *out_feature_map;
211 auto maybe = [&](std::vector<std::string> const &values, size_t i,
212 std::string const &dft) {
213 return values.empty() ? dft : values[i];
214 };
215 if (feature_map.Size() == 0) {
216 // Use the feature names and types from booster.
217 std::vector<std::string> feature_names;
218 // priority:
219 // 1. feature map.
220 // 2. customized feature name.
221 // 3. from booster
222 // 4. default feature name.
223 if (!custom_feature_names.empty()) {
224 CHECK_EQ(custom_feature_names.size(), n_features)
225 << "Incorrect number of feature names.";
226 feature_names.resize(custom_feature_names.size());
227 std::transform(custom_feature_names.begin(), custom_feature_names.end(),
228 feature_names.begin(),
229 [](Json const &name) { return get<String const>(name); });
230 } else {
231 learner->GetFeatureNames(&feature_names);
232 }
233 if (!feature_names.empty()) {
234 CHECK_EQ(feature_names.size(), n_features) << "Incorrect number of feature names.";
235 }
236
237 std::vector<std::string> feature_types;
238 learner->GetFeatureTypes(&feature_types);
239 if (!feature_types.empty()) {
240 CHECK_EQ(feature_types.size(), n_features) << "Incorrect number of feature types.";
241 }
242
243 for (size_t i = 0; i < n_features; ++i) {
244 feature_map.PushBack(
245 i,
246 maybe(feature_names, i, "f" + std::to_string(i)).data(),
247 maybe(feature_types, i, "q").data());
248 }
249 }
250 CHECK_EQ(feature_map.Size(), n_features);
251}
252
253void XGBBuildInfoDevice(Json* p_info);
254
255template <typename JT>
256auto const &RequiredArg(Json const &in, StringView key, StringView func) {
257 auto const &obj = get<Object const>(in);
258 auto it = obj.find(key);
259 if (it == obj.cend() || IsA<Null>(it->second)) {
260 LOG(FATAL) << "Argument `" << key << "` is required for `" << func << "`.";
261 }
262 TypeCheck<JT>(it->second, StringView{key});
263 return get<std::remove_const_t<JT> const>(it->second);
264}
265
266template <typename JT, typename T>
267auto const &OptionalArg(Json const &in, StringView key, T const &dft) {
268 auto const &obj = get<Object const>(in);
269 auto it = obj.find(key);
270 if (it != obj.cend() && !IsA<Null>(it->second)) {
271 TypeCheck<JT>(it->second, key);
272 return get<std::remove_const_t<JT> const>(it->second);
273 }
274 return dft;
275}
276
280inline std::shared_ptr<DMatrix> CastDMatrixHandle(DMatrixHandle const handle) {
281 auto pp_m = static_cast<std::shared_ptr<DMatrix> *>(handle);
282 StringView msg{"Invalid DMatrix handle"};
283 CHECK(pp_m) << msg;
284 auto p_m = *pp_m;
285 CHECK(p_m) << msg;
286 return p_m;
287}
288
289namespace detail {
290template <typename PtrT, typename I, typename T>
291void MakeSparseFromPtr(PtrT const *p_indptr, I const *p_indices, T const *p_data,
292 std::size_t nindptr, std::string *indptr_str, std::string *indices_str,
293 std::string *data_str) {
294 auto ndata = static_cast<Integer::Int>(p_indptr[nindptr - 1]);
295 // Construct array interfaces
296 Json jindptr{Object{}};
297 Json jindices{Object{}};
298 Json jdata{Object{}};
299 CHECK(p_indptr);
300 jindptr["data"] =
301 Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_indptr)}, Json{true}}};
302 jindptr["shape"] = std::vector<Json>{Json{nindptr}};
303 jindptr["version"] = Integer{3};
304
305 CHECK(p_indices);
306 jindices["data"] =
307 Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_indices)}, Json{true}}};
308 jindices["shape"] = std::vector<Json>{Json{ndata}};
309 jindices["version"] = Integer{3};
310
311 CHECK(p_data);
312 jdata["data"] =
313 Array{std::vector<Json>{Json{reinterpret_cast<Integer::Int>(p_data)}, Json{true}}};
314 jdata["shape"] = std::vector<Json>{Json{ndata}};
315 jdata["version"] = Integer{3};
316
317 std::string pindptr_typestr =
318 linalg::detail::ArrayInterfaceHandler::TypeChar<PtrT>() + std::to_string(sizeof(PtrT));
319 std::string ind_typestr =
320 linalg::detail::ArrayInterfaceHandler::TypeChar<I>() + std::to_string(sizeof(I));
321 std::string data_typestr =
322 linalg::detail::ArrayInterfaceHandler::TypeChar<T>() + std::to_string(sizeof(T));
323 if (DMLC_LITTLE_ENDIAN) {
324 jindptr["typestr"] = String{"<" + pindptr_typestr};
325 jindices["typestr"] = String{"<" + ind_typestr};
326 jdata["typestr"] = String{"<" + data_typestr};
327 } else {
328 jindptr["typestr"] = String{">" + pindptr_typestr};
329 jindices["typestr"] = String{">" + ind_typestr};
330 jdata["typestr"] = String{">" + data_typestr};
331 }
332
333 Json::Dump(jindptr, indptr_str);
334 Json::Dump(jindices, indices_str);
335 Json::Dump(jdata, data_str);
336}
337} // namespace detail
338} // namespace xgboost
339#endif // XGBOOST_C_API_C_API_UTILS_H_
static Stream * Create(const char *uri, const char *const flag, bool allow_null=false)
generic factory function create an stream, the stream will close the underlying files upon deletion
Definition io.cc:132
a std::istream class that can can wrap Stream objects, can use istream with that output to underlying...
Definition io.h:385
Feature map data structure to help text model dump. TODO(tqchen) consider make it even more lightweig...
Definition feature_map.h:22
void LoadText(std::istream &is)
load feature map from input stream
Definition feature_map.h:36
static void Dump(Json json, std::string *out, std::ios::openmode mode=std::ios::out)
Encode the JSON object.
Definition json.cc:669
Definition c_api_utils.h:177
Feature map data structure to help visualization and model dump.
Copyright 2015-2023 by XGBoost Contributors.
defines console logging options for xgboost. Use to enforce unified print behavior.
Copyright 2015-2023 by XGBoost Contributors.
Copyright 2021-2023 by XGBoost Contributors.
detail namespace with internal helper functions
Definition json.hpp:249
namespace of xgboost
Definition base.h:90
uint64_t bst_ulong
unsigned long integers
Definition base.h:95
std::shared_ptr< DMatrix > CastDMatrixHandle(DMatrixHandle const handle)
Get shared ptr from DMatrix C handle with additional checks.
Definition c_api_utils.h:280
Definition string_view.h:15
Copyright 2015~2023 by XGBoost Contributors.