Gamma Regression (California Housing Data)¶
Imports¶
In [12]:
Copied!
from xgboostlss.model import *
from xgboostlss.distributions.Gamma import *
from sklearn import datasets
from sklearn.model_selection import train_test_split
import multiprocessing
from xgboostlss.model import *
from xgboostlss.distributions.Gamma import *
from sklearn import datasets
from sklearn.model_selection import train_test_split
import multiprocessing
Data¶
In [2]:
Copied!
n_cpu = multiprocessing.cpu_count()
housing_data = datasets.fetch_california_housing()
X, y = housing_data["data"], housing_data["target"]
feature_names = housing_data["feature_names"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
dtrain = xgb.DMatrix(X_train, label=y_train, nthread=n_cpu)
dtest = xgb.DMatrix(X_test, nthread=n_cpu)
n_cpu = multiprocessing.cpu_count()
housing_data = datasets.fetch_california_housing()
X, y = housing_data["data"], housing_data["target"]
feature_names = housing_data["feature_names"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
dtrain = xgb.DMatrix(X_train, label=y_train, nthread=n_cpu)
dtest = xgb.DMatrix(X_test, nthread=n_cpu)
Distribution Selection¶
In [3]:
Copied!
# Specifies Gamma distribution with exp response function and option to stabilize Gradient/Hessian. Type ?Gamma for an overview.
xgblss = XGBoostLSS(
Gamma(stabilization="L2", # Options are "None", "MAD", "L2".
response_fn="exp", # Function to transform the concentration and rate parameters, e.g., "exp" or "softplus".
loss_fn="nll" # Loss function. Options are "nll" (negative log-likelihood) or "crps"(continuous ranked probability score).
)
)
# Specifies Gamma distribution with exp response function and option to stabilize Gradient/Hessian. Type ?Gamma for an overview.
xgblss = XGBoostLSS(
Gamma(stabilization="L2", # Options are "None", "MAD", "L2".
response_fn="exp", # Function to transform the concentration and rate parameters, e.g., "exp" or "softplus".
loss_fn="nll" # Loss function. Options are "nll" (negative log-likelihood) or "crps"(continuous ranked probability score).
)
)
Hyper-Parameter Optimization¶
Any XGBoost hyperparameter can be tuned, where the structure of the parameter dictionary needs to be as follows:
- Float/Int sample_type
- {"param_name": ["sample_type", low, high, log]}
- sample_type: str, Type of sampling, e.g., "float" or "int"
- low: int, Lower endpoint of the range of suggested values
- high: int, Upper endpoint of the range of suggested values
- log: bool, Flag to sample the value from the log domain or not
- Example: {"eta": "float", low=1e-5, high=1, log=True]}
- Categorical sample_type
- {"param_name": ["sample_type", ["choice1", "choice2", "choice3", "..."]]}
- sample_type: str, Type of sampling, either "categorical"
- choice1, choice2, choice3, ...: str, Possible choices for the parameter
- Example: {"booster": ["categorical", ["gbtree", "dart"]]}
- For parameters without tunable choice (this is needed if tree_method = "gpu_hist" and gpu_id needs to be specified)
- {"param_name": ["none", [value]]},
- param_name: str, Name of the parameter
- value: int, Value of the parameter
- Example: {"gpu_id": ["none", [0]]}
Depending on which parameters are optimized, it might happen that some of them are not used, e.g., when {"booster": ["categorical", ["gbtree", "gblinear"]]} and {"max_depth": ["int", 1, 10, False]} are specified, max_depth is not used when gblinear is sampled, since it has no such argument.
In [4]:
Copied!
param_dict = {
"eta": ["float", {"low": 1e-5, "high": 1, "log": True}],
"max_depth": ["int", {"low": 1, "high": 10, "log": False}],
"gamma": ["float", {"low": 1e-8, "high": 40, "log": True}],
"subsample": ["float", {"low": 0.2, "high": 1.0, "log": False}],
"colsample_bytree": ["float", {"low": 0.2, "high": 1.0, "log": False}],
"min_child_weight": ["float", {"low": 1e-8, "high": 500, "log": True}],
"booster": ["categorical", ["gbtree"]],
# "tree_method": ["categorical", ["auto", "approx", "hist", "gpu_hist"]],
# "gpu_id": ["none", [0]]
}
np.random.seed(123)
opt_param = xgblss.hyper_opt(param_dict,
dtrain,
num_boost_round=100, # Number of boosting iterations.
nfold=5, # Number of cv-folds.
early_stopping_rounds=20, # Number of early-stopping rounds
max_minutes=5, # Time budget in minutes, i.e., stop study after the given number of minutes.
n_trials=None, # The number of trials. If this argument is set to None, there is no limitation on the number of trials.
silence=False, # Controls the verbosity of the trail, i.e., user can silence the outputs of the trail.
seed=123, # Seed used to generate cv-folds.
hp_seed=None # Seed for random number generator used in the Bayesian hyperparameter search.
)
param_dict = {
"eta": ["float", {"low": 1e-5, "high": 1, "log": True}],
"max_depth": ["int", {"low": 1, "high": 10, "log": False}],
"gamma": ["float", {"low": 1e-8, "high": 40, "log": True}],
"subsample": ["float", {"low": 0.2, "high": 1.0, "log": False}],
"colsample_bytree": ["float", {"low": 0.2, "high": 1.0, "log": False}],
"min_child_weight": ["float", {"low": 1e-8, "high": 500, "log": True}],
"booster": ["categorical", ["gbtree"]],
# "tree_method": ["categorical", ["auto", "approx", "hist", "gpu_hist"]],
# "gpu_id": ["none", [0]]
}
np.random.seed(123)
opt_param = xgblss.hyper_opt(param_dict,
dtrain,
num_boost_round=100, # Number of boosting iterations.
nfold=5, # Number of cv-folds.
early_stopping_rounds=20, # Number of early-stopping rounds
max_minutes=5, # Time budget in minutes, i.e., stop study after the given number of minutes.
n_trials=None, # The number of trials. If this argument is set to None, there is no limitation on the number of trials.
silence=False, # Controls the verbosity of the trail, i.e., user can silence the outputs of the trail.
seed=123, # Seed used to generate cv-folds.
hp_seed=None # Seed for random number generator used in the Bayesian hyperparameter search.
)
[I 2023-05-18 08:29:58,928] A new study created in memory with name: XGBoostLSS Hyper-Parameter Optimization
Progress bar is experimental (supported from v1.2.0). The interface can change in the future.
0%| | 00:00/05:00
[I 2023-05-18 08:30:11,999] Trial 0 finished with value: 4221.315039 and parameters: {'eta': 0.0007956486113255149, 'max_depth': 4, 'gamma': 9.651293834004364e-06, 'subsample': 0.9299029412512503, 'colsample_bytree': 0.533212122378312, 'min_child_weight': 1.0645031471691002e-06, 'booster': 'gbtree'}. Best is trial 0 with value: 4221.315039. [I 2023-05-18 08:30:31,329] Trial 1 finished with value: 4704.1411134 and parameters: {'eta': 1.5405304565767472e-05, 'max_depth': 8, 'gamma': 1.2567045481680053e-06, 'subsample': 0.8546851597908047, 'colsample_bytree': 0.7705937486648105, 'min_child_weight': 0.06607432187087299, 'booster': 'gbtree'}. Best is trial 0 with value: 4221.315039. [I 2023-05-18 08:30:43,968] Trial 2 finished with value: 4700.0834962 and parameters: {'eta': 4.0285972682401886e-05, 'max_depth': 2, 'gamma': 2.9603310576396133e-08, 'subsample': 0.32250688445301023, 'colsample_bytree': 0.6536953349316124, 'min_child_weight': 1.4342277391569358e-06, 'booster': 'gbtree'}. Best is trial 0 with value: 4221.315039. [I 2023-05-18 08:30:56,918] Trial 3 finished with value: 4719.9332028 and parameters: {'eta': 1.096369459232895e-05, 'max_depth': 2, 'gamma': 1.36363527254592, 'subsample': 0.2610232160164289, 'colsample_bytree': 0.7367492294157403, 'min_child_weight': 90.52248573370663, 'booster': 'gbtree'}. Best is trial 0 with value: 4221.315039. [I 2023-05-18 08:31:13,594] Trial 4 finished with value: 3789.2708494 and parameters: {'eta': 0.0015973526799597118, 'max_depth': 4, 'gamma': 0.0028902185513508716, 'subsample': 0.7301170911858303, 'colsample_bytree': 0.8794236864892113, 'min_child_weight': 2.3385499629068365, 'booster': 'gbtree'}. Best is trial 4 with value: 3789.2708494. [I 2023-05-18 08:31:17,490] Trial 5 finished with value: 17276.408789 and parameters: {'eta': 0.7754190340462471, 'max_depth': 8, 'gamma': 0.04779979308560174, 'subsample': 0.26819583759117727, 'colsample_bytree': 0.4716497365826704, 'min_child_weight': 1.5165151855944943e-05, 'booster': 'gbtree'}. Best is trial 4 with value: 3789.2708494. [I 2023-05-18 08:31:32,696] Trial 6 finished with value: 1848.9831299999998 and parameters: {'eta': 0.15100884665534847, 'max_depth': 3, 'gamma': 0.06213886551552638, 'subsample': 0.996066629023483, 'colsample_bytree': 0.5166347172920032, 'min_child_weight': 1.4431814327658545, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:31:50,650] Trial 7 finished with value: 3203.0339846 and parameters: {'eta': 0.005276040226297436, 'max_depth': 9, 'gamma': 19.985885054603994, 'subsample': 0.20671620521186185, 'colsample_bytree': 0.9922070467612387, 'min_child_weight': 2.725232979902565, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:32:11,875] Trial 8 finished with value: 3689.2131346 and parameters: {'eta': 0.0013108750480960915, 'max_depth': 10, 'gamma': 0.0015842103186243096, 'subsample': 0.42178420458507704, 'colsample_bytree': 0.7291727860468711, 'min_child_weight': 0.044455321804948635, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:32:29,694] Trial 9 finished with value: 2934.2718262000003 and parameters: {'eta': 0.010754205262854151, 'max_depth': 7, 'gamma': 0.099098099327934, 'subsample': 0.5723889394701623, 'colsample_bytree': 0.41654520716493304, 'min_child_weight': 0.2447012703927633, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:32:32,618] Trial 10 finished with value: 4755.6319336 and parameters: {'eta': 0.8134628838612719, 'max_depth': 1, 'gamma': 25.87490764106699, 'subsample': 0.9969166801978794, 'colsample_bytree': 0.24272571572151835, 'min_child_weight': 0.0004636770815965044, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:32:49,288] Trial 11 finished with value: 2287.1541015999996 and parameters: {'eta': 0.049787864917925403, 'max_depth': 6, 'gamma': 0.08313887137568525, 'subsample': 0.5875299866894881, 'colsample_bytree': 0.4512383454552321, 'min_child_weight': 201.60772059390564, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:33:04,557] Trial 12 finished with value: 2209.2589844 and parameters: {'eta': 0.07676963722759728, 'max_depth': 5, 'gamma': 0.07566160689603878, 'subsample': 0.6558562323266741, 'colsample_bytree': 0.3720148355281364, 'min_child_weight': 264.9052227263956, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:33:19,171] Trial 13 finished with value: 2071.3895018 and parameters: {'eta': 0.09659622817124115, 'max_depth': 4, 'gamma': 0.0008264653817919475, 'subsample': 0.7849646456353283, 'colsample_bytree': 0.3371901953937099, 'min_child_weight': 239.81745359254714, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:33:33,561] Trial 14 finished with value: 2078.2393312 and parameters: {'eta': 0.1316313662302804, 'max_depth': 3, 'gamma': 0.00013080976774594984, 'subsample': 0.8509652593269317, 'colsample_bytree': 0.2909226669647212, 'min_child_weight': 2.7126575878655095e-08, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:33:44,579] Trial 15 finished with value: 2655.322412 and parameters: {'eta': 0.22715422077386666, 'max_depth': 5, 'gamma': 0.0004395803399042971, 'subsample': 0.7975353462852024, 'colsample_bytree': 0.20960126485306685, 'min_child_weight': 8.870211414758396, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:33:59,369] Trial 16 finished with value: 2938.7385254 and parameters: {'eta': 0.020470837113062974, 'max_depth': 3, 'gamma': 7.419257642014852e-05, 'subsample': 0.9788164018931671, 'colsample_bytree': 0.5939373966343774, 'min_child_weight': 428.9533965700333, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:34:02,188] Trial 17 pruned. Trial was pruned at iteration 20. [I 2023-05-18 08:34:08,147] Trial 18 finished with value: 3116.5992675999996 and parameters: {'eta': 0.24327928864822645, 'max_depth': 6, 'gamma': 1.1466581811220105, 'subsample': 0.7530862843170326, 'colsample_bytree': 0.3139055022235435, 'min_child_weight': 0.23603851815873542, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:34:24,527] Trial 19 finished with value: 2032.5863526 and parameters: {'eta': 0.06206309017505763, 'max_depth': 4, 'gamma': 0.006897334980015432, 'subsample': 0.8871394934596608, 'colsample_bytree': 0.5189155635955937, 'min_child_weight': 0.003234679897161147, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:34:27,811] Trial 20 pruned. Trial was pruned at iteration 20. [I 2023-05-18 08:34:43,797] Trial 21 finished with value: 1980.9598632000002 and parameters: {'eta': 0.07369811168101653, 'max_depth': 4, 'gamma': 0.0007668260649319911, 'subsample': 0.9014082781083868, 'colsample_bytree': 0.4000670918676357, 'min_child_weight': 24.846489079459644, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:34:49,292] Trial 22 finished with value: 3066.3612304 and parameters: {'eta': 0.29099150477021357, 'max_depth': 4, 'gamma': 0.006327947188386288, 'subsample': 0.90036262160355, 'colsample_bytree': 0.48774414892187773, 'min_child_weight': 0.0027154241734887396, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. [I 2023-05-18 08:34:52,496] Trial 23 pruned. Trial was pruned at iteration 20. [I 2023-05-18 08:35:09,317] Trial 24 finished with value: 2220.9262696 and parameters: {'eta': 0.039172881167147955, 'max_depth': 5, 'gamma': 4.57005482262753e-05, 'subsample': 0.8367880963189092, 'colsample_bytree': 0.5781307973504972, 'min_child_weight': 13.30281948737475, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998. Hyper-Parameter Optimization successfully finished. Number of finished trials: 25 Best trial: Value: 1848.9831299999998 Params: eta: 0.15100884665534847 max_depth: 3 gamma: 0.06213886551552638 subsample: 0.996066629023483 colsample_bytree: 0.5166347172920032 min_child_weight: 1.4431814327658545 booster: gbtree opt_rounds: 100
Model Training¶
In [5]:
Copied!
np.random.seed(123)
opt_params = opt_param.copy()
n_rounds = opt_params["opt_rounds"]
del opt_params["opt_rounds"]
# Train Model with optimized hyperparameters
xgblss.train(opt_params,
dtrain,
num_boost_round=n_rounds
)
np.random.seed(123)
opt_params = opt_param.copy()
n_rounds = opt_params["opt_rounds"]
del opt_params["opt_rounds"]
# Train Model with optimized hyperparameters
xgblss.train(opt_params,
dtrain,
num_boost_round=n_rounds
)
Out[5]:
<xgboost.core.Booster at 0x29567c063d0>
Prediction¶
In [6]:
Copied!
# Set seed for reproducibility
torch.manual_seed(123)
# Number of samples to draw from predicted distribution
n_samples = 1000
quant_sel = [0.05, 0.95] # Quantiles to calculate from predicted distribution
# Sample from predicted distribution
pred_samples = xgblss.predict(dtest,
pred_type="samples",
n_samples=n_samples,
seed=123)
# Calculate quantiles from predicted distribution
pred_quantiles = xgblss.predict(dtest,
pred_type="quantiles",
n_samples=n_samples,
quantiles=quant_sel)
# Returns predicted distributional parameters
pred_params = xgblss.predict(dtest,
pred_type="parameters")
# Set seed for reproducibility
torch.manual_seed(123)
# Number of samples to draw from predicted distribution
n_samples = 1000
quant_sel = [0.05, 0.95] # Quantiles to calculate from predicted distribution
# Sample from predicted distribution
pred_samples = xgblss.predict(dtest,
pred_type="samples",
n_samples=n_samples,
seed=123)
# Calculate quantiles from predicted distribution
pred_quantiles = xgblss.predict(dtest,
pred_type="quantiles",
n_samples=n_samples,
quantiles=quant_sel)
# Returns predicted distributional parameters
pred_params = xgblss.predict(dtest,
pred_type="parameters")
In [7]:
Copied!
pred_samples.head()
pred_samples.head()
Out[7]:
y_sample0 | y_sample1 | y_sample2 | y_sample3 | y_sample4 | y_sample5 | y_sample6 | y_sample7 | y_sample8 | y_sample9 | ... | y_sample990 | y_sample991 | y_sample992 | y_sample993 | y_sample994 | y_sample995 | y_sample996 | y_sample997 | y_sample998 | y_sample999 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.876505 | 1.673667 | 2.938326 | 1.454809 | 2.399422 | 1.759965 | 1.460128 | 1.688740 | 1.784902 | 2.417931 | ... | 1.839925 | 1.887811 | 1.534427 | 1.326183 | 2.398682 | 0.998292 | 2.434605 | 0.991851 | 1.505856 | 1.574307 |
1 | 0.883587 | 1.291906 | 1.248149 | 0.884646 | 1.047550 | 0.854057 | 1.182763 | 0.746962 | 1.074761 | 0.996688 | ... | 0.876435 | 0.843717 | 0.897013 | 1.137278 | 0.555706 | 0.678625 | 1.023129 | 0.808541 | 0.920332 | 1.241407 |
2 | 1.030200 | 1.531222 | 1.095772 | 1.081247 | 1.793127 | 1.450662 | 1.419437 | 1.132455 | 1.276576 | 0.867061 | ... | 1.843388 | 1.766897 | 1.958323 | 1.276894 | 1.933600 | 1.407317 | 1.600355 | 1.875028 | 1.435953 | 1.653420 |
3 | 1.880502 | 0.746944 | 0.865230 | 1.270051 | 1.455336 | 1.308650 | 1.336325 | 1.357684 | 1.764968 | 1.272650 | ... | 1.868246 | 1.713741 | 1.608263 | 1.418744 | 1.705131 | 1.168589 | 2.756223 | 1.554872 | 1.339272 | 2.297941 |
4 | 5.162236 | 7.503895 | 6.175812 | 4.588247 | 4.577571 | 3.859889 | 4.225744 | 4.521331 | 5.302205 | 5.370818 | ... | 6.098175 | 4.018948 | 6.255476 | 4.507837 | 5.130795 | 3.461627 | 3.558900 | 4.876582 | 4.760717 | 5.481852 |
5 rows × 1000 columns
In [8]:
Copied!
pred_quantiles.head()
pred_quantiles.head()
Out[8]:
quant_0.05 | quant_0.95 | |
---|---|---|
0 | 1.227181 | 2.881343 |
1 | 0.515992 | 1.314277 |
2 | 0.907038 | 2.034687 |
3 | 1.101743 | 2.639123 |
4 | 3.463038 | 6.704498 |
In [9]:
Copied!
pred_params.head()
pred_params.head()
Out[9]:
concentration | rate | |
---|---|---|
0 | 15.969825 | 8.100084 |
1 | 13.882849 | 15.841588 |
2 | 16.768461 | 11.691557 |
3 | 14.617913 | 8.024559 |
4 | 23.413013 | 4.779128 |
SHAP Interpretability¶
In [10]:
Copied!
# Partial Dependence Plot
pdp_df = pd.DataFrame(X_train, columns=feature_names)
xgblss.plot(pdp_df,
parameter="concentration",
feature=feature_names[0],
plot_type="Partial_Dependence")
# Partial Dependence Plot
pdp_df = pd.DataFrame(X_train, columns=feature_names)
xgblss.plot(pdp_df,
parameter="concentration",
feature=feature_names[0],
plot_type="Partial_Dependence")
In [11]:
Copied!
# Feature Importance
xgblss.plot(pdp_df,
parameter="concentration",
plot_type="Feature_Importance")
# Feature Importance
xgblss.plot(pdp_df,
parameter="concentration",
plot_type="Feature_Importance")