4#ifndef XGBOOST_COMMON_RANKING_UTILS_H_
5#define XGBOOST_COMMON_RANKING_UTILS_H_
23#include "xgboost/span.h"
24#include "xgboost/string_view.h"
40static_assert(
MaxRel() == 31);
43 return static_cast<double>((1u << label) - 1);
47 return 1.0 / std::log2(
static_cast<double>(idx) + 2.0);
51 auto inv_idcg = (idcg == 0.0 ? 0.0 : (1.0 / idcg));
55enum class PairMethod : std::int32_t {
66 static constexpr position_t DefaultK() {
return 32; }
67 static constexpr position_t DefaultSamplePairs() {
return 1; }
73 PairMethod lambdarank_pair_method{PairMethod::kTopK};
74 std::size_t lambdarank_num_pair_per_sample{NotSet()};
77 static constexpr position_t NotSet() {
return std::numeric_limits<position_t>::max(); }
80 bool lambdarank_unbiased{
false};
81 double lambdarank_bias_norm{1.0};
83 bool ndcg_exp_gain{
true};
86 return lambdarank_pair_method == that.lambdarank_pair_method &&
87 lambdarank_num_pair_per_sample == that.lambdarank_num_pair_per_sample &&
88 lambdarank_unbiased == that.lambdarank_unbiased &&
89 lambdarank_bias_norm == that.lambdarank_bias_norm && ndcg_exp_gain == that.ndcg_exp_gain;
91 bool operator!=(
LambdaRankParam const& that)
const {
return !(*
this == that); }
93 [[nodiscard]]
double Regularizer()
const {
return 1.0 / (1.0 + this->lambdarank_bias_norm); }
99 if (lambdarank_num_pair_per_sample == NotSet()) {
100 switch (lambdarank_pair_method) {
101 case PairMethod::kMean:
102 return DefaultSamplePairs();
103 case PairMethod::kTopK:
107 return lambdarank_num_pair_per_sample;
109 LOG(FATAL) <<
"Unreachable.";
113 [[nodiscard]]
bool HasTruncation()
const {
return lambdarank_pair_method == PairMethod::kTopK; }
116 [[nodiscard]]
auto TopK()
const {
117 if (HasTruncation()) {
124 DMLC_DECLARE_PARAMETER(LambdaRankParam) {
125 DMLC_DECLARE_FIELD(lambdarank_pair_method)
126 .set_default(PairMethod::kTopK)
127 .add_enum(
"mean", PairMethod::kMean)
128 .add_enum(
"topk", PairMethod::kTopK)
129 .describe(
"Method for constructing pairs.");
130 DMLC_DECLARE_FIELD(lambdarank_num_pair_per_sample)
131 .set_default(NotSet())
133 .describe(
"Number of pairs for each sample in the list.");
134 DMLC_DECLARE_FIELD(lambdarank_unbiased)
136 .describe(
"Unbiased lambda mart. Use extended IPW to debias click position");
137 DMLC_DECLARE_FIELD(lambdarank_bias_norm)
139 .set_lower_bound(0.0)
140 .describe(
"Lp regularization for unbiased lambdarank.");
141 DMLC_DECLARE_FIELD(ndcg_exp_gain)
143 .describe(
"When set to true, the label gain is 2^rel - 1, otherwise it's rel.");
161 std::size_t max_group_size_{0};
163 double weight_norm_{1.0};
181 std::size_t n_cuda_threads_{0};
191 [[nodiscard]] std::size_t MaxGroupSize()
const {
return max_group_size_; }
195 CHECK(param_.GetInitialised());
198 << error::GroupSize() <<
"the size of label.";
201 this->InitOnCPU(ctx, info);
203 this->InitOnCUDA(ctx, info);
206 CHECK_EQ(Groups(), info.
weights_.Size()) << error::GroupWeight();
209 [[nodiscard]] std::size_t MaxPositionSize()
const {
211 if (param_.HasTruncation()) {
216 return std::min(max_group_size_,
static_cast<std::size_t
>(32));
220 group_ptr_.SetDevice(ctx->gpu_id);
221 return ctx->
IsCPU() ? group_ptr_.ConstHostSpan() : group_ptr_.ConstDeviceSpan();
224 [[nodiscard]]
auto const&
Param()
const {
return param_; }
225 [[nodiscard]] std::size_t Groups()
const {
return group_ptr_.Size() - 1; }
226 [[nodiscard]]
double WeightNorm()
const {
return weight_norm_; }
230 if (sorted_idx_cache_.Empty()) {
231 sorted_idx_cache_.SetDevice(ctx->gpu_id);
232 sorted_idx_cache_.Resize(predt.size());
235 return this->MakeRankOnCPU(ctx, predt);
237 return this->MakeRankOnCUDA(ctx, predt);
243 CHECK(ctx->
IsCUDA()) << error::InvalidCUDAOrdinal();
244 if (y_sorted_idx_cache_.Empty()) {
245 y_sorted_idx_cache_.SetDevice(ctx->gpu_id);
246 y_sorted_idx_cache_.Resize(n_samples);
248 return y_sorted_idx_cache_.DeviceSpan();
251 CHECK(ctx->
IsCUDA()) << error::InvalidCUDAOrdinal();
252 if (y_ranked_by_model_.Empty()) {
253 y_ranked_by_model_.SetDevice(ctx->gpu_id);
254 y_ranked_by_model_.Resize(n_samples);
256 return y_ranked_by_model_.DeviceSpan();
262 CHECK(!threads_group_ptr_.Empty());
263 return threads_group_ptr_.ConstDeviceSpan();
265 [[nodiscard]] std::size_t CUDAThreads()
const {
return n_cuda_threads_; }
268 if (roundings_.Size() == 0) {
272 return roundings_.
View(ctx->gpu_id);
275 if (cost_rounding_.Size() == 0) {
276 cost_rounding_.SetDevice(ctx->gpu_id);
277 cost_rounding_.Resize(1);
279 return cost_rounding_.DeviceSpan();
281 template <
typename Type>
283 max_lambdas_.SetDevice(ctx->gpu_id);
284 std::size_t bytes = n *
sizeof(Type);
285 if (bytes != max_lambdas_.Size()) {
286 max_lambdas_.Resize(bytes);
311 this->InitOnCPU(ctx, info);
313 this->InitOnCUDA(ctx, info);
318 return inv_idcg_.
View(ctx->gpu_id);
321 return ctx->
IsCPU() ? discounts_.ConstHostSpan() : discounts_.ConstDeviceSpan();
324 if (dcg_.Size() == 0) {
328 return dcg_.
View(ctx->gpu_id);
338template <
typename NoneOf>
341 auto d_labels = labels.Values();
342 if (p.ndcg_exp_gain) {
343 auto label_is_integer =
344 none_of(d_labels.data(), d_labels.data() + d_labels.size(), []
XGBOOST_DEVICE(
float v) {
345 auto l = std::floor(v);
346 return std::fabs(l - v) >
kRtEps || v < 0.0f;
348 CHECK(label_is_integer)
349 <<
"When using relevance degree as target, label must be either 0 or positive integer.";
352 if (p.ndcg_exp_gain) {
353 auto label_is_valid = none_of(d_labels.data(), d_labels.data() + d_labels.size(),
355 CHECK(label_is_valid) <<
"Relevance degress must be lesser than or equal to " <<
MaxRel()
356 <<
" when the exponential NDCG gain function is used. "
357 <<
"Set `ndcg_exp_gain` to false to use custom DCG gain.";
361template <
typename AllOf>
363 auto s_label = label.Values();
364 return all_of(s_label.data(), s_label.data() + s_label.size(), []
XGBOOST_DEVICE(
float y) {
365 return std::abs(y - 1.0f) <
kRtEps || std::abs(y - 0.0f) <
kRtEps;
374template <
typename AllOf>
376 auto s_label = label.Values();
377 auto is_binary = IsBinaryRel(label, all_of);
378 CHECK(is_binary) << name <<
" can only be used with binary labels.";
391 this->InitOnCPU(ctx, info);
393 this->InitOnCUDA(ctx, info);
399 pre_.SetDevice(ctx->gpu_id);
400 pre_.Resize(this->Groups());
402 return ctx->
IsCPU() ? pre_.HostSpan() : pre_.DeviceSpan();
413 std::size_t n_samples_{0};
422 this->InitOnCPU(ctx, info);
424 this->InitOnCUDA(ctx, info);
429 if (n_rel_.Empty()) {
430 n_rel_.SetDevice(ctx->gpu_id);
431 n_rel_.Resize(n_samples_);
433 return ctx->
IsCPU() ? n_rel_.HostSpan() : n_rel_.DeviceSpan();
437 acc_.SetDevice(ctx->gpu_id);
438 acc_.Resize(n_samples_);
440 return ctx->
IsCPU() ? acc_.HostSpan() : acc_.DeviceSpan();
444 map_.SetDevice(ctx->gpu_id);
445 map_.Resize(this->Groups());
447 return ctx->
IsCPU() ? map_.HostSpan() : map_.DeviceSpan();
Definition host_device_vector.h:87
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition span.h:424
A tensor view with static type and dimension.
Definition linalg.h:293
A tensor storage.
Definition linalg.h:742
void SetDevice(int32_t device) const
Set device ordinal for this tensor.
Definition linalg.h:933
void Reshape(S &&...s)
Reshape the tensor.
Definition linalg.h:887
TensorView< T, kDim > View(int32_t device)
Get a TensorView for this tensor.
Definition linalg.h:837
Definition ranking_utils.h:406
Definition ranking_utils.h:292
Definition ranking_utils.h:381
Common cached items for ranking tasks.
Definition ranking_utils.h:150
Copyright 2014-2023, XGBoost Contributors.
Provide lightweight util to do parameter setup and checking.
A device-and-host vector abstraction layer.
Copyright 2015-2023 by XGBoost Contributors.
#define XGBOOST_DEVICE
Tag function as usable by device.
Definition base.h:64
Copyright 2015-2023 by XGBoost Contributors.
defines console logging options for xgboost. Use to enforce unified print behavior.
macro for using C++11 enum class as DMLC parameter
#define DECLARE_FIELD_ENUM_CLASS(EnumClass)
Specialization of FieldEntry for enum class (backed by int)
Definition parameter.h:50
Copyright 2021-2023 by XGBoost Contributors.
Copyright 2023 by XGBoost contributors.
Definition ranking_utils.cc:22
void CheckNDCGLabels(ltr::LambdaRankParam const &p, linalg::VectorView< float const > labels, NoneOf none_of)
Validate label for NDCG.
Definition ranking_utils.h:339
std::string ParseMetricName(StringView name, StringView param, position_t *topn, bool *minus)
Parse name for ranking metric given parameters.
Definition ranking_utils.cc:137
std::string MakeMetricName(StringView name, position_t topn, bool minus)
Parse name for ranking metric given parameters.
Definition ranking_utils.cc:157
void CheckPreLabels(StringView name, linalg::VectorView< float const > label, AllOf all_of)
Validate label for precision-based metric.
Definition ranking_utils.h:375
std::uint32_t rel_degree_t
Relevance degree.
Definition ranking_utils.h:30
std::uint32_t position_t
top-k position
Definition ranking_utils.h:34
constexpr std::size_t MaxRel()
Maximum relevance degree for NDCG.
Definition ranking_utils.h:39
constexpr bst_float kRtEps
small eps gap for minimum split decision.
Definition base.h:319
Definition parameter_test.cc:4
Runtime context for XGBoost.
Definition context.h:84
bool IsCPU() const
Is XGBoost running on CPU?
Definition context.h:133
bool IsCUDA() const
Is XGBoost running on a CUDA device?
Definition context.h:137
Definition string_view.h:15
Definition parameter.h:84
Definition ranking_utils.h:64
position_t NumPair() const
Get number of pairs for each sample.
Definition ranking_utils.h:98