Medial Code Documentation
Loading...
Searching...
No Matches
xentropy_objective.hpp
1#ifndef LIGHTGBM_OBJECTIVE_XENTROPY_OBJECTIVE_HPP_
2#define LIGHTGBM_OBJECTIVE_XENTROPY_OBJECTIVE_HPP_
3
4#include <LightGBM/objective_function.h>
5#include <LightGBM/meta.h>
6
7#include <LightGBM/utils/common.h>
8
9#include <cstring>
10#include <cmath>
11
12/*
13 * Implements gradients and hessians for the following point losses.
14 * Target y is anything in interval [0, 1].
15 *
16 * (1) CrossEntropy; "xentropy";
17 *
18 * loss(y, p, w) = { -(1-y)*log(1-p)-y*log(p) }*w,
19 * with probability p = 1/(1+exp(-f)), where f is being boosted
20 *
21 * ConvertToOutput: f -> p
22 *
23 * (2) CrossEntropyLambda; "xentlambda"
24 *
25 * loss(y, p, w) = -(1-y)*log(1-p)-y*log(p),
26 * with p = 1-exp(-lambda*w), lambda = log(1+exp(f)), f being boosted, and w > 0
27 *
28 * ConvertToOutput: f -> lambda
29 *
30 * (1) and (2) are the same if w=1; but outputs still differ.
31 *
32 */
33
34namespace LightGBM {
39public:
40 explicit CrossEntropy(const Config&) {
41 }
42
43 explicit CrossEntropy(const std::vector<std::string>&) {
44 }
45
46 ~CrossEntropy() {}
47
48 void Init(const Metadata& metadata, data_size_t num_data) override {
49 num_data_ = num_data;
50 label_ = metadata.label();
51 weights_ = metadata.weights();
52
53 CHECK_NOTNULL(label_);
54 Common::CheckElementsIntervalClosed<label_t>(label_, 0.0f, 1.0f, num_data_, GetName());
55 Log::Info("[%s:%s]: (objective) labels passed interval [0, 1] check", GetName(), __func__);
56
57 if (weights_ != nullptr) {
58 label_t minw;
59 double sumw;
60 Common::ObtainMinMaxSum(weights_, num_data_, &minw, (label_t*)nullptr, &sumw);
61 if (minw < 0.0f) {
62 Log::Fatal("[%s]: at least one weight is negative", GetName());
63 }
64 if (sumw == 0.0f) {
65 Log::Fatal("[%s]: sum of weights is zero", GetName());
66 }
67 }
68 }
69
70 void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override {
71 if (weights_ == nullptr) {
72 // compute pointwise gradients and hessians with implied unit weights
73 #pragma omp parallel for schedule(static)
74 for (data_size_t i = 0; i < num_data_; ++i) {
75 const double z = 1.0f / (1.0f + std::exp(-score[i]));
76 gradients[i] = static_cast<score_t>(z - label_[i]);
77 hessians[i] = static_cast<score_t>(z * (1.0f - z));
78 }
79 } else {
80 // compute pointwise gradients and hessians with given weights
81 #pragma omp parallel for schedule(static)
82 for (data_size_t i = 0; i < num_data_; ++i) {
83 const double z = 1.0f / (1.0f + std::exp(-score[i]));
84 gradients[i] = static_cast<score_t>((z - label_[i]) * weights_[i]);
85 hessians[i] = static_cast<score_t>(z * (1.0f - z) * weights_[i]);
86 }
87 }
88 }
89
90 const char* GetName() const override {
91 return "xentropy";
92 }
93
94 // convert score to a probability
95 void ConvertOutput(const double* input, double* output) const override {
96 output[0] = 1.0f / (1.0f + std::exp(-input[0]));
97 }
98
99 std::string ToString() const override {
100 std::stringstream str_buf;
101 str_buf << GetName();
102 return str_buf.str();
103 }
104
105 // implement custom average to boost from (if enabled among options)
106 double BoostFromScore(int) const override {
107 double suml = 0.0f;
108 double sumw = 0.0f;
109 if (weights_ != nullptr) {
110 #pragma omp parallel for schedule(static) reduction(+:suml, sumw)
111 for (data_size_t i = 0; i < num_data_; ++i) {
112 suml += label_[i] * weights_[i];
113 sumw += weights_[i];
114 }
115 } else {
116 sumw = static_cast<double>(num_data_);
117 #pragma omp parallel for schedule(static) reduction(+:suml)
118 for (data_size_t i = 0; i < num_data_; ++i) {
119 suml += label_[i];
120 }
121 }
122 double pavg = suml / sumw;
123 pavg = std::min(pavg, 1.0 - kEpsilon);
124 pavg = std::max<double>(pavg, kEpsilon);
125 double initscore = std::log(pavg / (1.0f - pavg));
126 Log::Info("[%s:%s]: pavg = %f -> initscore = %f", GetName(), __func__, pavg, initscore);
127 return initscore;
128 }
129
130private:
132 data_size_t num_data_;
134 const label_t* label_;
136 const label_t* weights_;
137};
138
143public:
144 explicit CrossEntropyLambda(const Config&) {
145 min_weight_ = max_weight_ = 0.0f;
146 }
147
148 explicit CrossEntropyLambda(const std::vector<std::string>&) {
149 }
150
152
153 void Init(const Metadata& metadata, data_size_t num_data) override {
154 num_data_ = num_data;
155 label_ = metadata.label();
156 weights_ = metadata.weights();
157
158 CHECK_NOTNULL(label_);
159 Common::CheckElementsIntervalClosed<label_t>(label_, 0.0f, 1.0f, num_data_, GetName());
160 Log::Info("[%s:%s]: (objective) labels passed interval [0, 1] check", GetName(), __func__);
161
162 if (weights_ != nullptr) {
163 Common::ObtainMinMaxSum(weights_, num_data_, &min_weight_, &max_weight_, (label_t*)nullptr);
164 if (min_weight_ <= 0.0f) {
165 Log::Fatal("[%s]: at least one weight is non-positive", GetName());
166 }
167
168 // Issue an info statement about this ratio
169 double weight_ratio = max_weight_ / min_weight_;
170 Log::Info("[%s:%s]: min, max weights = %f, %f; ratio = %f",
171 GetName(), __func__,
172 min_weight_, max_weight_,
173 weight_ratio);
174 } else {
175 // all weights are implied to be unity; no need to do anything
176 }
177 }
178
179 void GetGradients(const double* score, score_t* gradients, score_t* hessians) const override {
180 if (weights_ == nullptr) {
181 // compute pointwise gradients and hessians with implied unit weights; exactly equivalent to CrossEntropy with unit weights
182 #pragma omp parallel for schedule(static)
183 for (data_size_t i = 0; i < num_data_; ++i) {
184 const double z = 1.0f / (1.0f + std::exp(-score[i]));
185 gradients[i] = static_cast<score_t>(z - label_[i]);
186 hessians[i] = static_cast<score_t>(z * (1.0f - z));
187 }
188 } else {
189 // compute pointwise gradients and hessians with given weights
190 #pragma omp parallel for schedule(static)
191 for (data_size_t i = 0; i < num_data_; ++i) {
192 const double w = weights_[i];
193 const double y = label_[i];
194 const double epf = std::exp(score[i]);
195 const double hhat = std::log(1.0f + epf);
196 const double z = 1.0f - std::exp(-w*hhat);
197 const double enf = 1.0f / epf; // = std::exp(-score[i]);
198 gradients[i] = static_cast<score_t>((1.0f - y / z) * w / (1.0f + enf));
199 const double c = 1.0f / (1.0f - z);
200 double d = 1.0f + epf;
201 const double a = w * epf / (d * d);
202 d = c - 1.0f;
203 const double b = (c / (d * d) ) * (1.0f + w * epf - c);
204 hessians[i] = static_cast<score_t>(a * (1.0f + y * b));
205 }
206 }
207 }
208
209 const char* GetName() const override {
210 return "xentlambda";
211 }
212
213 //
214 // ATTENTION: the function output is the "normalized exponential parameter" lambda > 0, not the probability
215 //
216 // If this code would read: output[0] = 1.0f / (1.0f + std::exp(-input[0]));
217 // The output would still not be the probability unless the weights are unity.
218 //
219 // Let z = 1 / (1 + exp(-f)), then prob(z) = 1-(1-z)^w, where w is the weight for the specific point.
220 //
221
222 void ConvertOutput(const double* input, double* output) const override {
223 output[0] = std::log(1.0f + std::exp(input[0]));
224 }
225
226 std::string ToString() const override {
227 std::stringstream str_buf;
228 str_buf << GetName();
229 return str_buf.str();
230 }
231
232 double BoostFromScore(int) const override {
233 double suml = 0.0f;
234 double sumw = 0.0f;
235 if (weights_ != nullptr) {
236 #pragma omp parallel for schedule(static) reduction(+:suml, sumw)
237 for (data_size_t i = 0; i < num_data_; ++i) {
238 suml += label_[i] * weights_[i];
239 sumw += weights_[i];
240 }
241 } else {
242 sumw = static_cast<double>(num_data_);
243 #pragma omp parallel for schedule(static) reduction(+:suml)
244 for (data_size_t i = 0; i < num_data_; ++i) {
245 suml += label_[i];
246 }
247 }
248 double havg = suml / sumw;
249 double initscore = std::log(std::exp(havg) - 1.0f);
250 Log::Info("[%s:%s]: havg = %f -> initscore = %f", GetName(), __func__, havg, initscore);
251 return initscore;
252 }
253
254private:
256 data_size_t num_data_;
258 const label_t* label_;
260 const label_t* weights_;
262 label_t min_weight_;
264 label_t max_weight_;
265};
266
267} // end namespace LightGBM
268
269#endif // end #ifndef LIGHTGBM_OBJECTIVE_XENTROPY_OBJECTIVE_HPP_
Objective function for alternative parameterization of cross-entropy (see top of file for explanation...
Definition xentropy_objective.hpp:142
void GetGradients(const double *score, score_t *gradients, score_t *hessians) const override
calculating first order derivative of loss function
Definition xentropy_objective.hpp:179
void Init(const Metadata &metadata, data_size_t num_data) override
Initialize.
Definition xentropy_objective.hpp:153
Objective function for cross-entropy (with optional linear weights)
Definition xentropy_objective.hpp:38
void GetGradients(const double *score, score_t *gradients, score_t *hessians) const override
calculating first order derivative of loss function
Definition xentropy_objective.hpp:70
void Init(const Metadata &metadata, data_size_t num_data) override
Initialize.
Definition xentropy_objective.hpp:48
This class is used to store some meta(non-feature) data for training data, e.g. labels,...
Definition dataset.h:36
const label_t * label() const
Get pointer of label.
Definition dataset.h:113
const label_t * weights() const
Get weights, if not exists, will return nullptr.
Definition dataset.h:146
The interface of Objective Function.
Definition objective_function.h:13
desc and descl2 fields must be written in reStructuredText format
Definition application.h:10
float score_t
Type of score, and gradients.
Definition meta.h:26
float label_t
Type of metadata, include weight and label.
Definition meta.h:33
int32_t data_size_t
Type of data size, it is better to use signed type.
Definition meta.h:14
Definition config.h:27