Expectile Regression¶
Imports¶
In [2]:
Copied!
from lightgbmlss.model import *
from lightgbmlss.distributions.Expectile import *
from lightgbmlss.datasets.data_loader import load_simulated_gaussian_data
import plotnine
from plotnine import *
plotnine.options.figure_size = (20, 10)
from lightgbmlss.model import *
from lightgbmlss.distributions.Expectile import *
from lightgbmlss.datasets.data_loader import load_simulated_gaussian_data
import plotnine
from plotnine import *
plotnine.options.figure_size = (20, 10)
Data¶
In [3]:
Copied!
# The data is a simulated Gaussian as follows, where x is the only true feature and all others are noise variables
# loc = 10
# scale = 1 + 4*((0.3 < x) & (x < 0.5)) + 2*(x > 0.7)
train, test = load_simulated_gaussian_data()
X_train, y_train = train.filter(regex="x"), train["y"].values
X_test, y_test = test.filter(regex="x"), test["y"].values
dtrain = lgb.Dataset(X_train, label=y_train)
# The data is a simulated Gaussian as follows, where x is the only true feature and all others are noise variables
# loc = 10
# scale = 1 + 4*((0.3 < x) & (x < 0.5)) + 2*(x > 0.7)
train, test = load_simulated_gaussian_data()
X_train, y_train = train.filter(regex="x"), train["y"].values
X_test, y_test = test.filter(regex="x"), test["y"].values
dtrain = lgb.Dataset(X_train, label=y_train)
Expectile Specification¶
In [4]:
Copied!
lgblss = LightGBMLSS(
Expectile(stabilization="None", # Options are "None", "MAD", "L2".
expectiles = [0.05, 0.95], # List of expectiles to be estimated, in increasing order.
penalize_crossing = True # Whether to include a penalty term to discourage crossing of expectiles.
)
)
lgblss = LightGBMLSS(
Expectile(stabilization="None", # Options are "None", "MAD", "L2".
expectiles = [0.05, 0.95], # List of expectiles to be estimated, in increasing order.
penalize_crossing = True # Whether to include a penalty term to discourage crossing of expectiles.
)
)
Hyper-Parameter Optimization¶
Any LightGBM hyperparameter can be tuned, where the structure of the parameter dictionary needs to be as follows:
- Float/Int sample_type
- {"param_name": ["sample_type", low, high, log]}
- sample_type: str, Type of sampling, e.g., "float" or "int"
- low: int, Lower endpoint of the range of suggested values
- high: int, Upper endpoint of the range of suggested values
- log: bool, Flag to sample the value from the log domain or not
- Example: {"eta": "float", low=1e-5, high=1, log=True]}
- Categorical sample_type
- {"param_name": ["sample_type", ["choice1", "choice2", "choice3", "..."]]}
- sample_type: str, Type of sampling, either "categorical"
- choice1, choice2, choice3, ...: str, Possible choices for the parameter
- Example: {"boosting": ["categorical", ["gbdt", "dart"]]}
- For parameters without tunable choice (this is needed if tree_method = "gpu_hist" and gpu_id needs to be specified)
- {"param_name": ["none", [value]]},
- param_name: str, Name of the parameter
- value: int, Value of the parameter
- Example: {"gpu_id": ["none", [0]]}
In [5]:
Copied!
param_dict = {
"eta": ["float", {"low": 1e-5, "high": 1, "log": True}],
"max_depth": ["int", {"low": 1, "high": 10, "log": False}],
"num_leaves": ["int", {"low": 255, "high": 255, "log": False}], # set to constant for this example
"min_data_in_leaf": ["int", {"low": 20, "high": 20, "log": False}], # set to constant for this example
"min_gain_to_split": ["float", {"low": 1e-8, "high": 40, "log": False}],
"min_sum_hessian_in_leaf": ["float", {"low": 1e-8, "high": 500, "log": True}],
"subsample": ["float", {"low": 0.2, "high": 1.0, "log": False}],
"feature_fraction": ["float", {"low": 0.2, "high": 1.0, "log": False}],
"boosting": ["categorical", ["gbdt"]],
}
np.random.seed(123)
opt_param = lgblss.hyper_opt(param_dict,
dtrain,
num_boost_round=100, # Number of boosting iterations.
nfold=5, # Number of cv-folds.
early_stopping_rounds=20, # Number of early-stopping rounds
max_minutes=10, # Time budget in minutes, i.e., stop study after the given number of minutes.
n_trials=30, # The number of trials. If this argument is set to None, there is no limitation on the number of trials.
silence=False, # Controls the verbosity of the trail, i.e., user can silence the outputs of the trail.
seed=123, # Seed used to generate cv-folds.
hp_seed=None # Seed for random number generator used in the Bayesian hyperparameter search.
)
param_dict = {
"eta": ["float", {"low": 1e-5, "high": 1, "log": True}],
"max_depth": ["int", {"low": 1, "high": 10, "log": False}],
"num_leaves": ["int", {"low": 255, "high": 255, "log": False}], # set to constant for this example
"min_data_in_leaf": ["int", {"low": 20, "high": 20, "log": False}], # set to constant for this example
"min_gain_to_split": ["float", {"low": 1e-8, "high": 40, "log": False}],
"min_sum_hessian_in_leaf": ["float", {"low": 1e-8, "high": 500, "log": True}],
"subsample": ["float", {"low": 0.2, "high": 1.0, "log": False}],
"feature_fraction": ["float", {"low": 0.2, "high": 1.0, "log": False}],
"boosting": ["categorical", ["gbdt"]],
}
np.random.seed(123)
opt_param = lgblss.hyper_opt(param_dict,
dtrain,
num_boost_round=100, # Number of boosting iterations.
nfold=5, # Number of cv-folds.
early_stopping_rounds=20, # Number of early-stopping rounds
max_minutes=10, # Time budget in minutes, i.e., stop study after the given number of minutes.
n_trials=30, # The number of trials. If this argument is set to None, there is no limitation on the number of trials.
silence=False, # Controls the verbosity of the trail, i.e., user can silence the outputs of the trail.
seed=123, # Seed used to generate cv-folds.
hp_seed=None # Seed for random number generator used in the Bayesian hyperparameter search.
)
[I 2023-08-11 12:21:09,469] A new study created in memory with name: LightGBMLSS Hyper-Parameter Optimization
0%| | 0/30 [00:00<?, ?it/s]
[I 2023-08-11 12:21:12,718] Trial 0 finished with value: 2455.671630859375 and parameters: {'eta': 4.999979903379203e-05, 'max_depth': 6, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 21.93993866573528, 'min_sum_hessian_in_leaf': 0.0003530133520827798, 'subsample': 0.4971819264686692, 'feature_fraction': 0.3707311537482785, 'boosting': 'gbdt'}. Best is trial 0 with value: 2455.671630859375. [I 2023-08-11 12:21:14,662] Trial 1 finished with value: 1905.1077880859375 and parameters: {'eta': 0.031600943671035775, 'max_depth': 3, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 25.283240746368264, 'min_sum_hessian_in_leaf': 49.082392515255734, 'subsample': 0.6788944834474666, 'feature_fraction': 0.9258044091945574, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:18,260] Trial 2 finished with value: 2163.520751953125 and parameters: {'eta': 0.005894981780547752, 'max_depth': 9, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 10.409790461895193, 'min_sum_hessian_in_leaf': 0.0008141832901711874, 'subsample': 0.4070793729617024, 'feature_fraction': 0.6846602442537073, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:20,956] Trial 3 finished with value: 2340.4150390625 and parameters: {'eta': 0.001961322558956042, 'max_depth': 4, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 12.389445813281812, 'min_sum_hessian_in_leaf': 0.0001665823267805825, 'subsample': 0.8122180498006835, 'feature_fraction': 0.5881597651097203, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:23,663] Trial 4 finished with value: 2455.928955078125 and parameters: {'eta': 4.876502677739385e-05, 'max_depth': 2, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 4.557086477842732, 'min_sum_hessian_in_leaf': 8.971172534030456e-08, 'subsample': 0.43772569367787945, 'feature_fraction': 0.33757411361894973, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:26,413] Trial 5 finished with value: 2020.169677734375 and parameters: {'eta': 0.015583832782804402, 'max_depth': 6, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 17.990075586462332, 'min_sum_hessian_in_leaf': 9.047858492815616e-06, 'subsample': 0.2661853234410493, 'feature_fraction': 0.43711054797968024, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:29,603] Trial 6 finished with value: 2453.094970703125 and parameters: {'eta': 3.192833281012269e-05, 'max_depth': 5, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 4.179852781197093, 'min_sum_hessian_in_leaf': 0.0013367062405656063, 'subsample': 0.760623390889942, 'feature_fraction': 0.9918521803651483, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:33,061] Trial 7 finished with value: 2390.234375 and parameters: {'eta': 0.001099408166117131, 'max_depth': 7, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 5.928259874226358, 'min_sum_hessian_in_leaf': 22.801819887756512, 'subsample': 0.8539904207951285, 'feature_fraction': 0.5227409182953131, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:36,013] Trial 8 finished with value: 1914.283935546875 and parameters: {'eta': 0.2493102080752807, 'max_depth': 1, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 9.13540771750102, 'min_sum_hessian_in_leaf': 0.34110612176978133, 'subsample': 0.5308020126325235, 'feature_fraction': 0.8641969342663409, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:36,677] Trial 9 finished with value: 1937.4613037109375 and parameters: {'eta': 0.9636709157054849, 'max_depth': 5, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 32.67317336133683, 'min_sum_hessian_in_leaf': 1.2744267371826801e-08, 'subsample': 0.9935887353691604, 'feature_fraction': 0.943953338852964, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:38,579] Trial 10 finished with value: 1946.8072509765625 and parameters: {'eta': 0.04879973495349672, 'max_depth': 3, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 39.06107022051272, 'min_sum_hessian_in_leaf': 106.46715749293509, 'subsample': 0.6595408126514654, 'feature_fraction': 0.7942683208717877, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:40,307] Trial 11 finished with value: 1942.6409912109375 and parameters: {'eta': 0.3475471835643695, 'max_depth': 1, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 23.567503144962224, 'min_sum_hessian_in_leaf': 0.4888567766798536, 'subsample': 0.598911344176229, 'feature_fraction': 0.8378628459298021, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:43,076] Trial 12 finished with value: 1970.949951171875 and parameters: {'eta': 0.1355810372876077, 'max_depth': 1, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 27.308295812909954, 'min_sum_hessian_in_leaf': 0.45165961145216943, 'subsample': 0.5607270559851195, 'feature_fraction': 0.8192697272856723, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:43,791] Trial 13 pruned. Trial was pruned at iteration 20. [I 2023-08-11 12:21:45,120] Trial 14 finished with value: 1917.481201171875 and parameters: {'eta': 0.1918179723427333, 'max_depth': 2, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 0.5575780726770159, 'min_sum_hessian_in_leaf': 0.38144740122258586, 'subsample': 0.5276921773301158, 'feature_fraction': 0.8851651910106035, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:46,648] Trial 15 finished with value: 1908.2398681640625 and parameters: {'eta': 0.06814525677756796, 'max_depth': 3, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 13.416527851881755, 'min_sum_hessian_in_leaf': 1.5533574937270136, 'subsample': 0.6891248913559522, 'feature_fraction': 0.9718069324964819, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:47,454] Trial 16 pruned. Trial was pruned at iteration 20. [I 2023-08-11 12:21:48,239] Trial 17 pruned. Trial was pruned at iteration 20. [I 2023-08-11 12:21:49,033] Trial 18 pruned. Trial was pruned at iteration 20. [I 2023-08-11 12:21:49,857] Trial 19 pruned. Trial was pruned at iteration 20. [I 2023-08-11 12:21:51,393] Trial 20 finished with value: 1911.4632568359375 and parameters: {'eta': 0.06986238192969108, 'max_depth': 4, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 29.160727721810346, 'min_sum_hessian_in_leaf': 0.016442500462406705, 'subsample': 0.6023777335914249, 'feature_fraction': 0.9155248389222844, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:52,873] Trial 21 finished with value: 1911.5198974609375 and parameters: {'eta': 0.07161895441713878, 'max_depth': 4, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 29.27696506397222, 'min_sum_hessian_in_leaf': 0.020946061323593588, 'subsample': 0.6163027333377462, 'feature_fraction': 0.9137504739618006, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:53,660] Trial 22 pruned. Trial was pruned at iteration 20. [I 2023-08-11 12:21:55,121] Trial 23 finished with value: 1908.989501953125 and parameters: {'eta': 0.08734639812497531, 'max_depth': 4, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 30.42886083118784, 'min_sum_hessian_in_leaf': 0.03828710084297306, 'subsample': 0.7131459665462002, 'feature_fraction': 0.7904449014182882, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:55,893] Trial 24 pruned. Trial was pruned at iteration 20. [I 2023-08-11 12:21:57,012] Trial 25 finished with value: 1917.7242431640625 and parameters: {'eta': 0.1615054287282905, 'max_depth': 5, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 20.364960331001463, 'min_sum_hessian_in_leaf': 3.329890678595058, 'subsample': 0.7972962597807455, 'feature_fraction': 0.8464533630717106, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:57,915] Trial 26 finished with value: 1909.692626953125 and parameters: {'eta': 0.5158300039154515, 'max_depth': 2, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 25.112985606000265, 'min_sum_hessian_in_leaf': 0.05243689522362543, 'subsample': 0.6973560330154309, 'feature_fraction': 0.7889207482360956, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:21:58,767] Trial 27 pruned. Trial was pruned at iteration 20. [I 2023-08-11 12:22:00,393] Trial 28 finished with value: 1921.2425537109375 and parameters: {'eta': 0.09255361005893879, 'max_depth': 4, 'num_leaves': 255, 'min_data_in_leaf': 20, 'min_gain_to_split': 14.802111899924473, 'min_sum_hessian_in_leaf': 1.7943497012679166, 'subsample': 0.692797127315583, 'feature_fraction': 0.6813967545026932, 'boosting': 'gbdt'}. Best is trial 1 with value: 1905.1077880859375. [I 2023-08-11 12:22:01,231] Trial 29 pruned. Trial was pruned at iteration 20. Hyper-Parameter Optimization successfully finished. Number of finished trials: 30 Best trial: Value: 1905.1077880859375 Params: eta: 0.031600943671035775 max_depth: 3 num_leaves: 255 min_data_in_leaf: 20 min_gain_to_split: 25.283240746368264 min_sum_hessian_in_leaf: 49.082392515255734 subsample: 0.6788944834474666 feature_fraction: 0.9258044091945574 boosting: gbdt opt_rounds: 55
Model Training¶
In [6]:
Copied!
np.random.seed(123)
opt_params = opt_param.copy()
n_rounds = opt_params["opt_rounds"]
del opt_params["opt_rounds"]
# Train Model with optimized hyperparameters
lgblss.train(opt_params,
dtrain,
num_boost_round=n_rounds
)
np.random.seed(123)
opt_params = opt_param.copy()
n_rounds = opt_params["opt_rounds"]
del opt_params["opt_rounds"]
# Train Model with optimized hyperparameters
lgblss.train(opt_params,
dtrain,
num_boost_round=n_rounds
)
Prediction¶
In [7]:
Copied!
# Predicted expectiles
pred_expectile = lgblss.predict(X_test, pred_type="expectiles")
# Predicted expectiles
pred_expectile = lgblss.predict(X_test, pred_type="expectiles")
In [8]:
Copied!
pred_expectile.head()
pred_expectile.head()
Out[8]:
expectile_0.05 | expectile_0.95 | |
---|---|---|
0 | 6.695340 | 13.277894 |
1 | 6.615792 | 13.277894 |
2 | 8.519470 | 11.511595 |
3 | 4.557220 | 14.967069 |
4 | 6.615792 | 13.367647 |
SHAP Interpretability¶
In [9]:
Copied!
# Partial Dependence Plot of how x acts on selected expectile
lgblss.expectile_plot(X_test,
expectile="expectile_0.95",
feature="x_true",
plot_type="Partial_Dependence")
# Partial Dependence Plot of how x acts on selected expectile
lgblss.expectile_plot(X_test,
expectile="expectile_0.95",
feature="x_true",
plot_type="Partial_Dependence")
In [10]:
Copied!
# Partial Dependence Plot of how x acts on selected expectile
lgblss.expectile_plot(X_test,
expectile="expectile_0.05",
feature="x_true",
plot_type="Partial_Dependence")
# Partial Dependence Plot of how x acts on selected expectile
lgblss.expectile_plot(X_test,
expectile="expectile_0.05",
feature="x_true",
plot_type="Partial_Dependence")
In [11]:
Copied!
# Global Feature Importance of selected expectile
lgblss.expectile_plot(X_test,
expectile="expectile_0.95",
plot_type="Feature_Importance")
# Global Feature Importance of selected expectile
lgblss.expectile_plot(X_test,
expectile="expectile_0.95",
plot_type="Feature_Importance")
Plot of Actual vs. Predicted Expectiles¶
In [12]:
Copied!
np.random.seed(123)
###
# Actual Expectiles
###
y_loc = np.array([10])
y_scale = np.array([1 + 4*((0.3 < test["x_true"].values) & (test["x_true"].values < 0.5)) + 2*(test["x_true"].values > 0.7)])
tau_lower = np.array([lgblss.dist.tau[0]])
tau_upper = np.array([lgblss.dist.tau[1]])
# Calculates exact expectiles assuming a Normal distribution
expectile_lb = expectile_norm(tau=tau_lower,
m=y_loc,
sd=y_scale).reshape(-1,)
expectile_ub = expectile_norm(tau=tau_upper,
m=y_loc,
sd=y_scale).reshape(-1,)
test["expect"] = np.where(test["y"].values < expectile_lb, 0, np.where(test["y"].values < expectile_ub, 1, 2))
test["alpha"] = np.where(test["y"].values <= expectile_lb, 1, np.where(test["y"].values >= expectile_ub, 1, 0))
df_expectiles = test[test["alpha"] == 1]
# Lower Bound
yl = list(set(expectile_lb))
yl.sort()
yl = [yl[2],yl[0],yl[2],yl[1],yl[1]]
sfunl = pd.DataFrame({"x_true":[0, 0.3, 0.5, 0.7, 1],
"y":yl})
# Upper Bound
yu = list(set(expectile_ub))
yu.sort()
yu = [yu[0],yu[2],yu[0],yu[1],yu[1]]
sfunu = pd.DataFrame({"x_true":[0, 0.3, 0.5, 0.7, 1],
"y":yu})
###
# Forecasted Expectiles
###
test["lb"] = pred_expectile.iloc[:,0]
test["ub"] = pred_expectile.iloc[:,1]
###
# Plot
###
(ggplot(test,
aes("x_true",
"y")) +
geom_point(alpha = 0.2, color = "black", size = 2) +
theme_bw(base_size=15) +
theme(legend_position="bottom",
plot_title = element_text(hjust = 0.5)) +
labs(title = "LightGBMLSS Expectile Regression - Simulated Data Example") +
geom_line(aes("x_true",
"ub"),
size = 1.5,
color = "blue",
alpha = 0.7) +
geom_line(aes("x_true",
"lb"),
size = 1.5,
color = "blue",
alpha = 0.7) +
geom_point(df_expectiles,
aes("x_true",
"y"),
color = "red",
alpha = 0.7,
size = 2) +
geom_step(sfunl,
aes("x_true",
"y"),
size = 1,
linetype = "dashed") +
geom_step(sfunu,
aes("x_true",
"y"),
size = 1,
linetype = "dashed")
)
np.random.seed(123)
###
# Actual Expectiles
###
y_loc = np.array([10])
y_scale = np.array([1 + 4*((0.3 < test["x_true"].values) & (test["x_true"].values < 0.5)) + 2*(test["x_true"].values > 0.7)])
tau_lower = np.array([lgblss.dist.tau[0]])
tau_upper = np.array([lgblss.dist.tau[1]])
# Calculates exact expectiles assuming a Normal distribution
expectile_lb = expectile_norm(tau=tau_lower,
m=y_loc,
sd=y_scale).reshape(-1,)
expectile_ub = expectile_norm(tau=tau_upper,
m=y_loc,
sd=y_scale).reshape(-1,)
test["expect"] = np.where(test["y"].values < expectile_lb, 0, np.where(test["y"].values < expectile_ub, 1, 2))
test["alpha"] = np.where(test["y"].values <= expectile_lb, 1, np.where(test["y"].values >= expectile_ub, 1, 0))
df_expectiles = test[test["alpha"] == 1]
# Lower Bound
yl = list(set(expectile_lb))
yl.sort()
yl = [yl[2],yl[0],yl[2],yl[1],yl[1]]
sfunl = pd.DataFrame({"x_true":[0, 0.3, 0.5, 0.7, 1],
"y":yl})
# Upper Bound
yu = list(set(expectile_ub))
yu.sort()
yu = [yu[0],yu[2],yu[0],yu[1],yu[1]]
sfunu = pd.DataFrame({"x_true":[0, 0.3, 0.5, 0.7, 1],
"y":yu})
###
# Forecasted Expectiles
###
test["lb"] = pred_expectile.iloc[:,0]
test["ub"] = pred_expectile.iloc[:,1]
###
# Plot
###
(ggplot(test,
aes("x_true",
"y")) +
geom_point(alpha = 0.2, color = "black", size = 2) +
theme_bw(base_size=15) +
theme(legend_position="bottom",
plot_title = element_text(hjust = 0.5)) +
labs(title = "LightGBMLSS Expectile Regression - Simulated Data Example") +
geom_line(aes("x_true",
"ub"),
size = 1.5,
color = "blue",
alpha = 0.7) +
geom_line(aes("x_true",
"lb"),
size = 1.5,
color = "blue",
alpha = 0.7) +
geom_point(df_expectiles,
aes("x_true",
"y"),
color = "red",
alpha = 0.7,
size = 2) +
geom_step(sfunl,
aes("x_true",
"y"),
size = 1,
linetype = "dashed") +
geom_step(sfunu,
aes("x_true",
"y"),
size = 1,
linetype = "dashed")
)
Out[12]:
<Figure Size: (2000 x 1000)>