Gamma Regression (California Housing Data)¶

Imports¶

In [12]:

Copied!





from xgboostlss.model import *
from xgboostlss.distributions.Gamma import *

from sklearn import datasets
from sklearn.model_selection import train_test_split
import multiprocessing
from xgboostlss.model import *
from xgboostlss.distributions.Gamma import *

from sklearn import datasets
from sklearn.model_selection import train_test_split
import multiprocessing

Data¶

In [2]:

Copied!

n_cpu = multiprocessing.cpu_count()

housing_data = datasets.fetch_california_housing()
X, y = housing_data["data"], housing_data["target"]
feature_names = housing_data["feature_names"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

dtrain = xgb.DMatrix(X_train, label=y_train, nthread=n_cpu)
dtest = xgb.DMatrix(X_test, nthread=n_cpu)
n_cpu = multiprocessing.cpu_count()

housing_data = datasets.fetch_california_housing()
X, y = housing_data["data"], housing_data["target"]
feature_names = housing_data["feature_names"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

dtrain = xgb.DMatrix(X_train, label=y_train, nthread=n_cpu)
dtest = xgb.DMatrix(X_test, nthread=n_cpu)

Distribution Selection¶

In [3]:

Copied!





# Specifies Gamma distribution with exp response function and option to stabilize Gradient/Hessian. Type ?Gamma for an overview.
xgblss = XGBoostLSS(
    Gamma(stabilization="L2",     # Options are "None", "MAD", "L2".
          response_fn="exp",      # Function to transform the concentration and rate parameters, e.g., "exp" or "softplus".
          loss_fn="nll"           # Loss function. Options are "nll" (negative log-likelihood) or "crps"(continuous ranked probability score).
         )
)
# Specifies Gamma distribution with exp response function and option to stabilize Gradient/Hessian. Type ?Gamma for an overview.
xgblss = XGBoostLSS(
    Gamma(stabilization="L2",     # Options are "None", "MAD", "L2".
          response_fn="exp",      # Function to transform the concentration and rate parameters, e.g., "exp" or "softplus".
          loss_fn="nll"           # Loss function. Options are "nll" (negative log-likelihood) or "crps"(continuous ranked probability score).
         )
)

Hyper-Parameter Optimization¶

Any XGBoost hyperparameter can be tuned, where the structure of the parameter dictionary needs to be as follows:

- Float/Int sample_type
    - {"param_name": ["sample_type", low, high, log]}
        - sample_type: str, Type of sampling, e.g., "float" or "int"
        - low: int, Lower endpoint of the range of suggested values
        - high: int, Upper endpoint of the range of suggested values
        - log: bool, Flag to sample the value from the log domain or not
    - Example: {"eta": "float", low=1e-5, high=1, log=True]}

- Categorical sample_type
    - {"param_name": ["sample_type", ["choice1", "choice2", "choice3", "..."]]}
        - sample_type: str, Type of sampling, either "categorical"
        - choice1, choice2, choice3, ...: str, Possible choices for the parameter
    - Example: {"booster": ["categorical", ["gbtree", "dart"]]}

- For parameters without tunable choice (this is needed if tree_method = "gpu_hist" and gpu_id needs to be specified)
    - {"param_name": ["none", [value]]},
        - param_name: str, Name of the parameter
        - value: int, Value of the parameter
    - Example: {"gpu_id": ["none", [0]]}

Depending on which parameters are optimized, it might happen that some of them are not used, e.g., when {"booster": ["categorical", ["gbtree", "gblinear"]]} and {"max_depth": ["int", 1, 10, False]} are specified, max_depth is not used when gblinear is sampled, since it has no such argument.

In [4]:

Copied!





param_dict = {
    "eta":              ["float", {"low": 1e-5,   "high": 1,     "log": True}],
    "max_depth":        ["int",   {"low": 1,      "high": 10,    "log": False}],
    "gamma":            ["float", {"low": 1e-8,   "high": 40,    "log": True}],
    "subsample":        ["float", {"low": 0.2,    "high": 1.0,   "log": False}],
    "colsample_bytree": ["float", {"low": 0.2,    "high": 1.0,   "log": False}],
    "min_child_weight": ["float", {"low": 1e-8,   "high": 500,   "log": True}],
    "booster":          ["categorical", ["gbtree"]],
    # "tree_method":    ["categorical", ["auto", "approx", "hist", "gpu_hist"]],
    # "gpu_id":         ["none", [0]]
}

np.random.seed(123)
opt_param = xgblss.hyper_opt(param_dict,
                             dtrain,
                             num_boost_round=100,        # Number of boosting iterations.
                             nfold=5,                    # Number of cv-folds.
                             early_stopping_rounds=20,   # Number of early-stopping rounds
                             max_minutes=5,              # Time budget in minutes, i.e., stop study after the given number of minutes.
                             n_trials=None,              # The number of trials. If this argument is set to None, there is no limitation on the number of trials.
                             silence=False,              # Controls the verbosity of the trail, i.e., user can silence the outputs of the trail.
                             seed=123,                   # Seed used to generate cv-folds.
                             hp_seed=None                # Seed for random number generator used in the Bayesian hyperparameter search.
                            )
param_dict = {
    "eta":              ["float", {"low": 1e-5,   "high": 1,     "log": True}],
    "max_depth":        ["int",   {"low": 1,      "high": 10,    "log": False}],
    "gamma":            ["float", {"low": 1e-8,   "high": 40,    "log": True}],
    "subsample":        ["float", {"low": 0.2,    "high": 1.0,   "log": False}],
    "colsample_bytree": ["float", {"low": 0.2,    "high": 1.0,   "log": False}],
    "min_child_weight": ["float", {"low": 1e-8,   "high": 500,   "log": True}],
    "booster":          ["categorical", ["gbtree"]],
    # "tree_method":    ["categorical", ["auto", "approx", "hist", "gpu_hist"]],
    # "gpu_id":         ["none", [0]]
}

np.random.seed(123)
opt_param = xgblss.hyper_opt(param_dict,
                             dtrain,
                             num_boost_round=100,        # Number of boosting iterations.
                             nfold=5,                    # Number of cv-folds.
                             early_stopping_rounds=20,   # Number of early-stopping rounds
                             max_minutes=5,              # Time budget in minutes, i.e., stop study after the given number of minutes.
                             n_trials=None,              # The number of trials. If this argument is set to None, there is no limitation on the number of trials.
                             silence=False,              # Controls the verbosity of the trail, i.e., user can silence the outputs of the trail.
                             seed=123,                   # Seed used to generate cv-folds.
                             hp_seed=None                # Seed for random number generator used in the Bayesian hyperparameter search.
                            )

[I 2023-05-18 08:29:58,928] A new study created in memory with name: XGBoostLSS Hyper-Parameter Optimization
Progress bar is experimental (supported from v1.2.0). The interface can change in the future.

   0%|          | 00:00/05:00

[I 2023-05-18 08:30:11,999] Trial 0 finished with value: 4221.315039 and parameters: {'eta': 0.0007956486113255149, 'max_depth': 4, 'gamma': 9.651293834004364e-06, 'subsample': 0.9299029412512503, 'colsample_bytree': 0.533212122378312, 'min_child_weight': 1.0645031471691002e-06, 'booster': 'gbtree'}. Best is trial 0 with value: 4221.315039.
[I 2023-05-18 08:30:31,329] Trial 1 finished with value: 4704.1411134 and parameters: {'eta': 1.5405304565767472e-05, 'max_depth': 8, 'gamma': 1.2567045481680053e-06, 'subsample': 0.8546851597908047, 'colsample_bytree': 0.7705937486648105, 'min_child_weight': 0.06607432187087299, 'booster': 'gbtree'}. Best is trial 0 with value: 4221.315039.
[I 2023-05-18 08:30:43,968] Trial 2 finished with value: 4700.0834962 and parameters: {'eta': 4.0285972682401886e-05, 'max_depth': 2, 'gamma': 2.9603310576396133e-08, 'subsample': 0.32250688445301023, 'colsample_bytree': 0.6536953349316124, 'min_child_weight': 1.4342277391569358e-06, 'booster': 'gbtree'}. Best is trial 0 with value: 4221.315039.
[I 2023-05-18 08:30:56,918] Trial 3 finished with value: 4719.9332028 and parameters: {'eta': 1.096369459232895e-05, 'max_depth': 2, 'gamma': 1.36363527254592, 'subsample': 0.2610232160164289, 'colsample_bytree': 0.7367492294157403, 'min_child_weight': 90.52248573370663, 'booster': 'gbtree'}. Best is trial 0 with value: 4221.315039.
[I 2023-05-18 08:31:13,594] Trial 4 finished with value: 3789.2708494 and parameters: {'eta': 0.0015973526799597118, 'max_depth': 4, 'gamma': 0.0028902185513508716, 'subsample': 0.7301170911858303, 'colsample_bytree': 0.8794236864892113, 'min_child_weight': 2.3385499629068365, 'booster': 'gbtree'}. Best is trial 4 with value: 3789.2708494.
[I 2023-05-18 08:31:17,490] Trial 5 finished with value: 17276.408789 and parameters: {'eta': 0.7754190340462471, 'max_depth': 8, 'gamma': 0.04779979308560174, 'subsample': 0.26819583759117727, 'colsample_bytree': 0.4716497365826704, 'min_child_weight': 1.5165151855944943e-05, 'booster': 'gbtree'}. Best is trial 4 with value: 3789.2708494.
[I 2023-05-18 08:31:32,696] Trial 6 finished with value: 1848.9831299999998 and parameters: {'eta': 0.15100884665534847, 'max_depth': 3, 'gamma': 0.06213886551552638, 'subsample': 0.996066629023483, 'colsample_bytree': 0.5166347172920032, 'min_child_weight': 1.4431814327658545, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:31:50,650] Trial 7 finished with value: 3203.0339846 and parameters: {'eta': 0.005276040226297436, 'max_depth': 9, 'gamma': 19.985885054603994, 'subsample': 0.20671620521186185, 'colsample_bytree': 0.9922070467612387, 'min_child_weight': 2.725232979902565, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:32:11,875] Trial 8 finished with value: 3689.2131346 and parameters: {'eta': 0.0013108750480960915, 'max_depth': 10, 'gamma': 0.0015842103186243096, 'subsample': 0.42178420458507704, 'colsample_bytree': 0.7291727860468711, 'min_child_weight': 0.044455321804948635, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:32:29,694] Trial 9 finished with value: 2934.2718262000003 and parameters: {'eta': 0.010754205262854151, 'max_depth': 7, 'gamma': 0.099098099327934, 'subsample': 0.5723889394701623, 'colsample_bytree': 0.41654520716493304, 'min_child_weight': 0.2447012703927633, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:32:32,618] Trial 10 finished with value: 4755.6319336 and parameters: {'eta': 0.8134628838612719, 'max_depth': 1, 'gamma': 25.87490764106699, 'subsample': 0.9969166801978794, 'colsample_bytree': 0.24272571572151835, 'min_child_weight': 0.0004636770815965044, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:32:49,288] Trial 11 finished with value: 2287.1541015999996 and parameters: {'eta': 0.049787864917925403, 'max_depth': 6, 'gamma': 0.08313887137568525, 'subsample': 0.5875299866894881, 'colsample_bytree': 0.4512383454552321, 'min_child_weight': 201.60772059390564, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:33:04,557] Trial 12 finished with value: 2209.2589844 and parameters: {'eta': 0.07676963722759728, 'max_depth': 5, 'gamma': 0.07566160689603878, 'subsample': 0.6558562323266741, 'colsample_bytree': 0.3720148355281364, 'min_child_weight': 264.9052227263956, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:33:19,171] Trial 13 finished with value: 2071.3895018 and parameters: {'eta': 0.09659622817124115, 'max_depth': 4, 'gamma': 0.0008264653817919475, 'subsample': 0.7849646456353283, 'colsample_bytree': 0.3371901953937099, 'min_child_weight': 239.81745359254714, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:33:33,561] Trial 14 finished with value: 2078.2393312 and parameters: {'eta': 0.1316313662302804, 'max_depth': 3, 'gamma': 0.00013080976774594984, 'subsample': 0.8509652593269317, 'colsample_bytree': 0.2909226669647212, 'min_child_weight': 2.7126575878655095e-08, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:33:44,579] Trial 15 finished with value: 2655.322412 and parameters: {'eta': 0.22715422077386666, 'max_depth': 5, 'gamma': 0.0004395803399042971, 'subsample': 0.7975353462852024, 'colsample_bytree': 0.20960126485306685, 'min_child_weight': 8.870211414758396, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:33:59,369] Trial 16 finished with value: 2938.7385254 and parameters: {'eta': 0.020470837113062974, 'max_depth': 3, 'gamma': 7.419257642014852e-05, 'subsample': 0.9788164018931671, 'colsample_bytree': 0.5939373966343774, 'min_child_weight': 428.9533965700333, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:34:02,188] Trial 17 pruned. Trial was pruned at iteration 20.
[I 2023-05-18 08:34:08,147] Trial 18 finished with value: 3116.5992675999996 and parameters: {'eta': 0.24327928864822645, 'max_depth': 6, 'gamma': 1.1466581811220105, 'subsample': 0.7530862843170326, 'colsample_bytree': 0.3139055022235435, 'min_child_weight': 0.23603851815873542, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:34:24,527] Trial 19 finished with value: 2032.5863526 and parameters: {'eta': 0.06206309017505763, 'max_depth': 4, 'gamma': 0.006897334980015432, 'subsample': 0.8871394934596608, 'colsample_bytree': 0.5189155635955937, 'min_child_weight': 0.003234679897161147, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:34:27,811] Trial 20 pruned. Trial was pruned at iteration 20.
[I 2023-05-18 08:34:43,797] Trial 21 finished with value: 1980.9598632000002 and parameters: {'eta': 0.07369811168101653, 'max_depth': 4, 'gamma': 0.0007668260649319911, 'subsample': 0.9014082781083868, 'colsample_bytree': 0.4000670918676357, 'min_child_weight': 24.846489079459644, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:34:49,292] Trial 22 finished with value: 3066.3612304 and parameters: {'eta': 0.29099150477021357, 'max_depth': 4, 'gamma': 0.006327947188386288, 'subsample': 0.90036262160355, 'colsample_bytree': 0.48774414892187773, 'min_child_weight': 0.0027154241734887396, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.
[I 2023-05-18 08:34:52,496] Trial 23 pruned. Trial was pruned at iteration 20.
[I 2023-05-18 08:35:09,317] Trial 24 finished with value: 2220.9262696 and parameters: {'eta': 0.039172881167147955, 'max_depth': 5, 'gamma': 4.57005482262753e-05, 'subsample': 0.8367880963189092, 'colsample_bytree': 0.5781307973504972, 'min_child_weight': 13.30281948737475, 'booster': 'gbtree'}. Best is trial 6 with value: 1848.9831299999998.

Hyper-Parameter Optimization successfully finished.
  Number of finished trials:  25
  Best trial:
    Value: 1848.9831299999998
    Params: 
    eta: 0.15100884665534847
    max_depth: 3
    gamma: 0.06213886551552638
    subsample: 0.996066629023483
    colsample_bytree: 0.5166347172920032
    min_child_weight: 1.4431814327658545
    booster: gbtree
    opt_rounds: 100

Model Training¶

In [5]:

Copied!





np.random.seed(123)

opt_params = opt_param.copy()
n_rounds = opt_params["opt_rounds"]
del opt_params["opt_rounds"]

# Train Model with optimized hyperparameters
xgblss.train(opt_params,
             dtrain,
             num_boost_round=n_rounds
             )
np.random.seed(123)

opt_params = opt_param.copy()
n_rounds = opt_params["opt_rounds"]
del opt_params["opt_rounds"]

# Train Model with optimized hyperparameters
xgblss.train(opt_params,
             dtrain,
             num_boost_round=n_rounds
             )

Out[5]:

<xgboost.core.Booster at 0x29567c063d0>

Prediction¶

In [6]:

Copied!





# Set seed for reproducibility
torch.manual_seed(123)

# Number of samples to draw from predicted distribution
n_samples = 1000
quant_sel = [0.05, 0.95] # Quantiles to calculate from predicted distribution

# Sample from predicted distribution
pred_samples = xgblss.predict(dtest,
                              pred_type="samples",
                              n_samples=n_samples,
                              seed=123)

# Calculate quantiles from predicted distribution
pred_quantiles = xgblss.predict(dtest,
                                pred_type="quantiles",
                                n_samples=n_samples,
                                quantiles=quant_sel)

# Returns predicted distributional parameters
pred_params = xgblss.predict(dtest,
                             pred_type="parameters")
# Set seed for reproducibility
torch.manual_seed(123)

# Number of samples to draw from predicted distribution
n_samples = 1000
quant_sel = [0.05, 0.95] # Quantiles to calculate from predicted distribution

# Sample from predicted distribution
pred_samples = xgblss.predict(dtest,
                              pred_type="samples",
                              n_samples=n_samples,
                              seed=123)

# Calculate quantiles from predicted distribution
pred_quantiles = xgblss.predict(dtest,
                                pred_type="quantiles",
                                n_samples=n_samples,
                                quantiles=quant_sel)

# Returns predicted distributional parameters
pred_params = xgblss.predict(dtest,
                             pred_type="parameters")

In [7]:

Copied!

pred_samples.head()
pred_samples.head()

Out[7]:

	y_sample0	y_sample1	y_sample2	y_sample3	y_sample4	y_sample5	y_sample6	y_sample7	y_sample8	y_sample9	...	y_sample990	y_sample991	y_sample992	y_sample993	y_sample994	y_sample995	y_sample996	y_sample997	y_sample998	y_sample999
0	1.876505	1.673667	2.938326	1.454809	2.399422	1.759965	1.460128	1.688740	1.784902	2.417931	...	1.839925	1.887811	1.534427	1.326183	2.398682	0.998292	2.434605	0.991851	1.505856	1.574307
1	0.883587	1.291906	1.248149	0.884646	1.047550	0.854057	1.182763	0.746962	1.074761	0.996688	...	0.876435	0.843717	0.897013	1.137278	0.555706	0.678625	1.023129	0.808541	0.920332	1.241407
2	1.030200	1.531222	1.095772	1.081247	1.793127	1.450662	1.419437	1.132455	1.276576	0.867061	...	1.843388	1.766897	1.958323	1.276894	1.933600	1.407317	1.600355	1.875028	1.435953	1.653420
3	1.880502	0.746944	0.865230	1.270051	1.455336	1.308650	1.336325	1.357684	1.764968	1.272650	...	1.868246	1.713741	1.608263	1.418744	1.705131	1.168589	2.756223	1.554872	1.339272	2.297941
4	5.162236	7.503895	6.175812	4.588247	4.577571	3.859889	4.225744	4.521331	5.302205	5.370818	...	6.098175	4.018948	6.255476	4.507837	5.130795	3.461627	3.558900	4.876582	4.760717	5.481852

5 rows × 1000 columns

In [8]:

Copied!

pred_quantiles.head()
pred_quantiles.head()

Out[8]:

	quant_0.05	quant_0.95
0	1.227181	2.881343
1	0.515992	1.314277
2	0.907038	2.034687
3	1.101743	2.639123
4	3.463038	6.704498

In [9]:

Copied!

pred_params.head()
pred_params.head()

Out[9]:

	concentration	rate
0	15.969825	8.100084
1	13.882849	15.841588
2	16.768461	11.691557
3	14.617913	8.024559
4	23.413013	4.779128

SHAP Interpretability¶

In [10]:

Copied!





# Partial Dependence Plot
pdp_df = pd.DataFrame(X_train, columns=feature_names)
xgblss.plot(pdp_df,
            parameter="concentration",
            feature=feature_names[0],
            plot_type="Partial_Dependence")
# Partial Dependence Plot
pdp_df = pd.DataFrame(X_train, columns=feature_names)
xgblss.plot(pdp_df,
            parameter="concentration",
            feature=feature_names[0],
            plot_type="Partial_Dependence")

No description has been provided for this image

In [11]:

Copied!





# Feature Importance
xgblss.plot(pdp_df,
            parameter="concentration",
            plot_type="Feature_Importance")
# Feature Importance
xgblss.plot(pdp_df,
            parameter="concentration",
            plot_type="Feature_Importance")