Medial Code Documentation
Loading...
Searching...
No Matches
lambdarank_obj.h
1
14#ifndef XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_H_
15#define XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_H_
16#include <algorithm> // for min, max
17#include <cassert> // for assert
18#include <cmath> // for log, abs
19#include <cstddef> // for size_t
20#include <functional> // for greater
21#include <memory> // for shared_ptr
22#include <random> // for minstd_rand, uniform_int_distribution
23#include <vector> // for vector
24
25#include "../common/algorithm.h" // for ArgSort
26#include "../common/math.h" // for Sigmoid
27#include "../common/ranking_utils.h" // for CalcDCGGain
28#include "../common/transform_iterator.h" // for MakeIndexTransformIter
29#include "xgboost/base.h" // for GradientPair, XGBOOST_DEVICE, kRtEps
30#include "xgboost/context.h" // for Context
31#include "xgboost/data.h" // for MetaInfo
32#include "xgboost/host_device_vector.h" // for HostDeviceVector
33#include "xgboost/linalg.h" // for VectorView, Vector
34#include "xgboost/logging.h" // for CHECK_EQ
35#include "xgboost/span.h" // for Span
36
37namespace xgboost::obj {
38double constexpr Eps64() { return 1e-16; }
39
40template <bool exp>
41XGBOOST_DEVICE double DeltaNDCG(float y_high, float y_low, std::size_t rank_high,
42 std::size_t rank_low, double inv_IDCG,
43 common::Span<double const> discount) {
44 // Use rank_high instead of idx_high as we are calculating discount based on ranks
45 // provided by the model.
46 double gain_high = exp ? ltr::CalcDCGGain(y_high) : y_high;
47 double discount_high = discount[rank_high];
48
49 double gain_low = exp ? ltr::CalcDCGGain(y_low) : y_low;
50 double discount_low = discount[rank_low];
51
52 double original = gain_high * discount_high + gain_low * discount_low;
53 double changed = gain_low * discount_high + gain_high * discount_low;
54
55 double delta_NDCG = (original - changed) * inv_IDCG;
56 assert(delta_NDCG >= -1.0);
57 assert(delta_NDCG <= 1.0);
58 return delta_NDCG;
59}
60
61XGBOOST_DEVICE inline double DeltaMAP(float y_high, float y_low, std::size_t rank_high,
62 std::size_t rank_low, common::Span<double const> n_rel,
63 common::Span<double const> acc) {
64 double r_h = static_cast<double>(rank_high) + 1.0;
65 double r_l = static_cast<double>(rank_low) + 1.0;
66 double delta{0.0};
67 double n_total_relevances = n_rel.back();
68 assert(n_total_relevances > 0.0);
69 auto m = n_rel[rank_low];
70 double n = n_rel[rank_high];
71
72 if (y_high < y_low) {
73 auto a = m / r_l - (n + 1.0) / r_h;
74 auto b = acc[rank_low - 1] - acc[rank_high];
75 delta = (a - b) / n_total_relevances;
76 } else {
77 auto a = n / r_h - m / r_l;
78 auto b = acc[rank_low - 1] - acc[rank_high];
79 delta = (a + b) / n_total_relevances;
80 }
81 return delta;
82}
83
84template <bool unbiased, typename Delta>
86LambdaGrad(linalg::VectorView<float const> labels, common::Span<float const> predts,
87 common::Span<size_t const> sorted_idx,
88 std::size_t rank_high, // higher index on the model rank list
89 std::size_t rank_low, // lower index on the model rank list
90 Delta delta, // function to calculate delta score
91 linalg::VectorView<double const> t_plus, // input bias ratio
92 linalg::VectorView<double const> t_minus, // input bias ratio
93 double* p_cost) {
94 assert(sorted_idx.size() > 0 && "Empty sorted idx for a group.");
95 std::size_t idx_high = sorted_idx[rank_high];
96 std::size_t idx_low = sorted_idx[rank_low];
97
98 if (labels(idx_high) == labels(idx_low)) {
99 *p_cost = 0;
100 return {0.0f, 0.0f};
101 }
102
103 auto best_score = predts[sorted_idx.front()];
104 auto worst_score = predts[sorted_idx.back()];
105
106 auto y_high = labels(idx_high);
107 float s_high = predts[idx_high];
108 auto y_low = labels(idx_low);
109 float s_low = predts[idx_low];
110
111 // Use double whenever possible as we are working on the exp space.
112 double delta_score = std::abs(s_high - s_low);
113 double const sigmoid = common::Sigmoid(s_high - s_low);
114 // Change in metric score like \delta NDCG or \delta MAP
115 double delta_metric = std::abs(delta(y_high, y_low, rank_high, rank_low));
116
117 if (best_score != worst_score) {
118 delta_metric /= (delta_score + 0.01);
119 }
120
121 if (unbiased) {
122 *p_cost = std::log(1.0 / (1.0 - sigmoid)) * delta_metric;
123 }
124
125 auto lambda_ij = (sigmoid - 1.0) * delta_metric;
126 auto hessian_ij = std::max(sigmoid * (1.0 - sigmoid), Eps64()) * delta_metric * 2.0;
127
128 auto k = t_plus.Size();
129 assert(t_minus.Size() == k && "Invalid size of position bias");
130
131 // We need to skip samples that exceed the maximum number of tracked positions, and
132 // samples that have low probability and might bring us floating point issues.
133 if (unbiased && idx_high < k && idx_low < k && t_minus(idx_low) >= Eps64() &&
134 t_plus(idx_high) >= Eps64()) {
135 // The index should be ranks[idx_low], since we assume label is sorted, this reduces
136 // to `idx_low`, which represents the position on the input list, as explained in the
137 // file header.
138 lambda_ij /= (t_plus(idx_high) * t_minus(idx_low));
139 hessian_ij /= (t_plus(idx_high) * t_minus(idx_low));
140 }
141 auto pg = GradientPair{static_cast<float>(lambda_ij), static_cast<float>(hessian_ij)};
142 return pg;
143}
144
145XGBOOST_DEVICE inline GradientPair Repulse(GradientPair pg) {
146 auto ng = GradientPair{-pg.GetGrad(), pg.GetHess()};
147 return ng;
148}
149
150namespace cuda_impl {
151void LambdaRankGetGradientNDCG(Context const* ctx, std::int32_t iter,
152 HostDeviceVector<float> const& preds, MetaInfo const& info,
153 std::shared_ptr<ltr::NDCGCache> p_cache,
154 linalg::VectorView<double const> t_plus, // input bias ratio
155 linalg::VectorView<double const> t_minus, // input bias ratio
156 linalg::VectorView<double> li, linalg::VectorView<double> lj,
157 HostDeviceVector<GradientPair>* out_gpair);
158
162void MAPStat(Context const* ctx, MetaInfo const& info, common::Span<std::size_t const> d_rank_idx,
163 std::shared_ptr<ltr::MAPCache> p_cache);
164
165void LambdaRankGetGradientMAP(Context const* ctx, std::int32_t iter,
166 HostDeviceVector<float> const& predt, MetaInfo const& info,
167 std::shared_ptr<ltr::MAPCache> p_cache,
168 linalg::VectorView<double const> t_plus, // input bias ratio
169 linalg::VectorView<double const> t_minus, // input bias ratio
170 linalg::VectorView<double> li, linalg::VectorView<double> lj,
171 HostDeviceVector<GradientPair>* out_gpair);
172
173void LambdaRankGetGradientPairwise(Context const* ctx, std::int32_t iter,
174 HostDeviceVector<float> const& predt, const MetaInfo& info,
175 std::shared_ptr<ltr::RankingCache> p_cache,
176 linalg::VectorView<double const> ti_plus, // input bias ratio
177 linalg::VectorView<double const> tj_minus, // input bias ratio
178 linalg::VectorView<double> li, linalg::VectorView<double> lj,
179 HostDeviceVector<GradientPair>* out_gpair);
180
181void LambdaRankUpdatePositionBias(Context const* ctx, linalg::VectorView<double const> li_full,
182 linalg::VectorView<double const> lj_full,
183 linalg::Vector<double>* p_ti_plus,
184 linalg::Vector<double>* p_tj_minus, linalg::Vector<double>* p_li,
185 linalg::Vector<double>* p_lj,
186 std::shared_ptr<ltr::RankingCache> p_cache);
187} // namespace cuda_impl
188
189namespace cpu_impl {
197void MAPStat(Context const* ctx, linalg::VectorView<float const> label,
198 common::Span<std::size_t const> rank_idx, std::shared_ptr<ltr::MAPCache> p_cache);
199} // namespace cpu_impl
200
215template <typename Op>
216void MakePairs(Context const* ctx, std::int32_t iter,
217 std::shared_ptr<ltr::RankingCache> const cache, bst_group_t g,
219 Op op) {
220 auto group_ptr = cache->DataGroupPtr(ctx);
221 ltr::position_t cnt = group_ptr[g + 1] - group_ptr[g];
222
223 if (cache->Param().HasTruncation()) {
224 for (std::size_t i = 0; i < std::min(cnt, cache->Param().NumPair()); ++i) {
225 for (std::size_t j = i + 1; j < cnt; ++j) {
226 op(i, j);
227 }
228 }
229 } else {
230 CHECK_EQ(g_rank.size(), g_label.Size());
231 std::minstd_rand rnd(iter);
232 rnd.discard(g); // fixme(jiamingy): honor the global seed
233 // sort label according to the rank list
234 auto it = common::MakeIndexTransformIter(
235 [&g_rank, &g_label](std::size_t idx) { return g_label(g_rank[idx]); });
236 std::vector<std::size_t> y_sorted_idx =
237 common::ArgSort<std::size_t>(ctx, it, it + cnt, std::greater<>{});
238 // permutation iterator to get the original label
239 auto rev_it = common::MakeIndexTransformIter(
240 [&](std::size_t idx) { return g_label(g_rank[y_sorted_idx[idx]]); });
241
242 for (std::size_t i = 0; i < cnt;) {
243 std::size_t j = i + 1;
244 // find the bucket boundary
245 while (j < cnt && rev_it[i] == rev_it[j]) {
246 ++j;
247 }
248 // Bucket [i,j), construct n_samples pairs for each sample inside the bucket with
249 // another sample outside the bucket.
250 //
251 // n elements left to the bucket, and n elements right to the bucket
252 std::size_t n_lefts = i, n_rights = static_cast<std::size_t>(cnt - j);
253 if (n_lefts + n_rights == 0) {
254 i = j;
255 continue;
256 }
257
258 auto n_samples = cache->Param().NumPair();
259 // for each pair specifed by the user
260 while (n_samples--) {
261 // for each sample in the bucket
262 for (std::size_t pair_idx = i; pair_idx < j; ++pair_idx) {
263 std::size_t ridx = std::uniform_int_distribution<std::size_t>(
264 static_cast<std::size_t>(0), n_lefts + n_rights - 1)(rnd);
265 if (ridx >= n_lefts) {
266 ridx = ridx - i + j; // shift to the right of the bucket
267 }
268 // index that points to the rank list.
269 auto idx0 = y_sorted_idx[pair_idx];
270 auto idx1 = y_sorted_idx[ridx];
271 op(idx0, idx1);
272 }
273 }
274 i = j;
275 }
276 }
277}
278} // namespace xgboost::obj
279#endif // XGBOOST_OBJECTIVE_LAMBDARANK_OBJ_H_
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition span.h:424
A tensor view with static type and dimension.
Definition linalg.h:293
LINALG_HD std::size_t Size() const
Number of items in the tensor.
Definition linalg.h:533
Copyright 2014-2023, XGBoost Contributors.
A device-and-host vector abstraction layer.
Copyright 2015-2023 by XGBoost Contributors.
#define XGBOOST_DEVICE
Tag function as usable by device.
Definition base.h:64
Copyright 2015-2023 by XGBoost Contributors.
defines console logging options for xgboost. Use to enforce unified print behavior.
Copyright 2021-2023 by XGBoost Contributors.
XGBOOST_DEVICE float Sigmoid(float x)
calculate the sigmoid of the input.
Definition math.h:28
std::uint32_t position_t
top-k position
Definition ranking_utils.h:34
Copyright 2022-2023 by XGBoost contributors.
Definition custom_obj.cc:13
void MakePairs(Context const *ctx, std::int32_t iter, std::shared_ptr< ltr::RankingCache > const cache, bst_group_t g, linalg::VectorView< float const > g_label, common::Span< std::size_t const > g_rank, Op op)
Definition lambdarank_obj.h:216
std::uint32_t bst_group_t
Type for ranking group index.
Definition base.h:114
detail::GradientPairInternal< float > GradientPair
gradient statistics pair usually needed in gradient boosting
Definition base.h:256
Runtime context for XGBoost.
Definition context.h:84