Gamma Regression (California Housing Data)¶

Imports¶

In [2]:

Copied!

from lightgbmlss.model import *
from lightgbmlss.distributions.Gamma import *

from sklearn import datasets
from sklearn.model_selection import train_test_split
from lightgbmlss.model import *
from lightgbmlss.distributions.Gamma import *

from sklearn import datasets
from sklearn.model_selection import train_test_split

Data¶

In [3]:

Copied!

housing_data = datasets.fetch_california_housing()
X, y = housing_data["data"], housing_data["target"]
feature_names = housing_data["feature_names"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

dtrain = lgb.Dataset(X_train, label=y_train)
housing_data = datasets.fetch_california_housing()
X, y = housing_data["data"], housing_data["target"]
feature_names = housing_data["feature_names"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

dtrain = lgb.Dataset(X_train, label=y_train)

Distribution Selection¶

In [4]:

Copied!





# Specifies Gamma distribution with exp response function and option to stabilize Gradient/Hessian. Type ?Gamma for an overview.
lgblss = LightGBMLSS(
    Gamma(stabilization="L2",     # Options are "None", "MAD", "L2".
          response_fn="softplus", # Function to transform the concentration and rate parameters, e.g., "exp" or "softplus".
          loss_fn="nll"           # Loss function. Options are "nll" (negative log-likelihood) or "crps"(continuous ranked probability score).
         )
)
# Specifies Gamma distribution with exp response function and option to stabilize Gradient/Hessian. Type ?Gamma for an overview.
lgblss = LightGBMLSS(
    Gamma(stabilization="L2",     # Options are "None", "MAD", "L2".
          response_fn="softplus", # Function to transform the concentration and rate parameters, e.g., "exp" or "softplus".
          loss_fn="nll"           # Loss function. Options are "nll" (negative log-likelihood) or "crps"(continuous ranked probability score).
         )
)

Hyper-Parameter Optimization¶

Any LightGBM hyperparameter can be tuned, where the structure of the parameter dictionary needs to be as follows:

- Float/Int sample_type
    - {"param_name": ["sample_type", low, high, log]}
        - sample_type: str, Type of sampling, e.g., "float" or "int"
        - low: int, Lower endpoint of the range of suggested values
        - high: int, Upper endpoint of the range of suggested values
        - log: bool, Flag to sample the value from the log domain or not
    - Example: {"eta": "float", low=1e-5, high=1, log=True]}

- Categorical sample_type
    - {"param_name": ["sample_type", ["choice1", "choice2", "choice3", "..."]]}
        - sample_type: str, Type of sampling, either "categorical"
        - choice1, choice2, choice3, ...: str, Possible choices for the parameter
    - Example: {"boosting": ["categorical", ["gbdt", "dart"]]}

- For parameters without tunable choice (this is needed if tree_method = "gpu_hist" and gpu_id needs to be specified)
    - {"param_name": ["none", [value]]},
        - param_name: str, Name of the parameter
        - value: int, Value of the parameter
    - Example: {"gpu_id": ["none", [0]]}

In [5]:

Copied!





param_dict = {
    "eta":                      ["float", {"low": 1e-5,   "high": 1,     "log": True}],
    "max_depth":                ["int",   {"low": 1,      "high": 10,    "log": False}],
    "subsample":                ["float", {"low": 0.2,    "high": 1.0,   "log": False}],
    "feature_fraction":         ["float", {"low": 0.2,    "high": 1.0,   "log": False}],
    "boosting":                 ["categorical", ["gbdt"]],
}

np.random.seed(123)
opt_param = lgblss.hyper_opt(param_dict,
                             dtrain,
                             num_boost_round=100,        # Number of boosting iterations.
                             nfold=5,                    # Number of cv-folds.
                             early_stopping_rounds=20,   # Number of early-stopping rounds
                             max_minutes=20,             # Time budget in minutes, i.e., stop study after the given number of minutes.
                             n_trials=30,                # The number of trials. If this argument is set to None, there is no limitation on the number of trials.
                             silence=False,              # Controls the verbosity of the trail, i.e., user can silence the outputs of the trail.
                             seed=123,                   # Seed used to generate cv-folds.
                             hp_seed=None                # Seed for random number generator used in the Bayesian hyperparameter search.
                            )
param_dict = {
    "eta":                      ["float", {"low": 1e-5,   "high": 1,     "log": True}],
    "max_depth":                ["int",   {"low": 1,      "high": 10,    "log": False}],
    "subsample":                ["float", {"low": 0.2,    "high": 1.0,   "log": False}],
    "feature_fraction":         ["float", {"low": 0.2,    "high": 1.0,   "log": False}],
    "boosting":                 ["categorical", ["gbdt"]],
}

np.random.seed(123)
opt_param = lgblss.hyper_opt(param_dict,
                             dtrain,
                             num_boost_round=100,        # Number of boosting iterations.
                             nfold=5,                    # Number of cv-folds.
                             early_stopping_rounds=20,   # Number of early-stopping rounds
                             max_minutes=20,             # Time budget in minutes, i.e., stop study after the given number of minutes.
                             n_trials=30,                # The number of trials. If this argument is set to None, there is no limitation on the number of trials.
                             silence=False,              # Controls the verbosity of the trail, i.e., user can silence the outputs of the trail.
                             seed=123,                   # Seed used to generate cv-folds.
                             hp_seed=None                # Seed for random number generator used in the Bayesian hyperparameter search.
                            )

[I 2023-08-11 12:29:16,191] A new study created in memory with name: LightGBMLSS Hyper-Parameter Optimization

  0%|          | 0/30 [00:00<?, ?it/s]

[I 2023-08-11 12:29:30,946] Trial 0 finished with value: 3655.994319219647 and parameters: {'eta': 0.0055601230544869195, 'max_depth': 7, 'subsample': 0.9477091024240025, 'feature_fraction': 0.9480677544137415, 'boosting': 'gbdt'}. Best is trial 0 with value: 3655.994319219647.
[I 2023-08-11 12:29:43,746] Trial 1 finished with value: 4227.673235529888 and parameters: {'eta': 0.0023020600298896826, 'max_depth': 4, 'subsample': 0.8651585394401682, 'feature_fraction': 0.8334419259540924, 'boosting': 'gbdt'}. Best is trial 0 with value: 3655.994319219647.
[I 2023-08-11 12:29:56,252] Trial 2 finished with value: 4339.068493424474 and parameters: {'eta': 0.0027858552626389625, 'max_depth': 4, 'subsample': 0.7481047090926836, 'feature_fraction': 0.28093366785144963, 'boosting': 'gbdt'}. Best is trial 0 with value: 3655.994319219647.
[I 2023-08-11 12:30:09,051] Trial 3 finished with value: 4654.563387175151 and parameters: {'eta': 0.000444156259151128, 'max_depth': 3, 'subsample': 0.6107418507428386, 'feature_fraction': 0.3863125393663567, 'boosting': 'gbdt'}. Best is trial 0 with value: 3655.994319219647.
[I 2023-08-11 12:30:22,971] Trial 4 finished with value: 4561.714498700547 and parameters: {'eta': 0.0006413188632848346, 'max_depth': 7, 'subsample': 0.8415936831413335, 'feature_fraction': 0.5304303058022481, 'boosting': 'gbdt'}. Best is trial 0 with value: 3655.994319219647.
[I 2023-08-11 12:30:36,725] Trial 5 finished with value: 3940.645249758013 and parameters: {'eta': 0.004514061436631957, 'max_depth': 8, 'subsample': 0.6813546073094912, 'feature_fraction': 0.37103056892915803, 'boosting': 'gbdt'}. Best is trial 0 with value: 3655.994319219647.
[I 2023-08-11 12:30:49,208] Trial 6 finished with value: 2313.9436995982674 and parameters: {'eta': 0.34104448990548714, 'max_depth': 2, 'subsample': 0.49381730590587314, 'feature_fraction': 0.48051971736040777, 'boosting': 'gbdt'}. Best is trial 6 with value: 2313.9436995982674.
[I 2023-08-11 12:31:01,412] Trial 7 finished with value: 4716.048776510342 and parameters: {'eta': 4.121739938983374e-05, 'max_depth': 3, 'subsample': 0.8639944116265685, 'feature_fraction': 0.8978770904112527, 'boosting': 'gbdt'}. Best is trial 6 with value: 2313.9436995982674.
[I 2023-08-11 12:31:15,571] Trial 8 finished with value: 4633.214125114749 and parameters: {'eta': 0.00030771118259821184, 'max_depth': 9, 'subsample': 0.8618195165072102, 'feature_fraction': 0.9999114906902047, 'boosting': 'gbdt'}. Best is trial 6 with value: 2313.9436995982674.
[I 2023-08-11 12:31:29,560] Trial 9 finished with value: 3478.353837243644 and parameters: {'eta': 0.01610742689419875, 'max_depth': 9, 'subsample': 0.33497542766410426, 'feature_fraction': 0.29392943880535444, 'boosting': 'gbdt'}. Best is trial 6 with value: 2313.9436995982674.
[I 2023-08-11 12:31:41,953] Trial 10 finished with value: 2325.067843452219 and parameters: {'eta': 0.8686888033261265, 'max_depth': 1, 'subsample': 0.4392043546139532, 'feature_fraction': 0.6492156437954112, 'boosting': 'gbdt'}. Best is trial 6 with value: 2313.9436995982674.
[I 2023-08-11 12:31:54,009] Trial 11 finished with value: 2380.659260562121 and parameters: {'eta': 0.7699218284463614, 'max_depth': 1, 'subsample': 0.4581350563769593, 'feature_fraction': 0.6781897708396918, 'boosting': 'gbdt'}. Best is trial 6 with value: 2313.9436995982674.
[I 2023-08-11 12:32:06,259] Trial 12 finished with value: 2300.620681894269 and parameters: {'eta': 0.9611466035281578, 'max_depth': 1, 'subsample': 0.4748729196012275, 'feature_fraction': 0.5804945378046049, 'boosting': 'gbdt'}. Best is trial 12 with value: 2300.620681894269.
[I 2023-08-11 12:32:18,560] Trial 13 finished with value: 3140.364558904482 and parameters: {'eta': 0.13151506526519777, 'max_depth': 1, 'subsample': 0.20986359097182283, 'feature_fraction': 0.49641299908154024, 'boosting': 'gbdt'}. Best is trial 12 with value: 2300.620681894269.
[I 2023-08-11 12:32:31,398] Trial 14 finished with value: 2607.970802667295 and parameters: {'eta': 0.13257582555637926, 'max_depth': 3, 'subsample': 0.5234273923433372, 'feature_fraction': 0.560112443439662, 'boosting': 'gbdt'}. Best is trial 12 with value: 2300.620681894269.
[I 2023-08-11 12:32:44,762] Trial 15 finished with value: 2385.2393013330156 and parameters: {'eta': 0.11688247797482056, 'max_depth': 5, 'subsample': 0.5485181191665621, 'feature_fraction': 0.7236071396551794, 'boosting': 'gbdt'}. Best is trial 12 with value: 2300.620681894269.
[I 2023-08-11 12:32:56,986] Trial 16 finished with value: 1972.3296999647914 and parameters: {'eta': 0.9322627775031824, 'max_depth': 2, 'subsample': 0.3884067129956542, 'feature_fraction': 0.46443454447840615, 'boosting': 'gbdt'}. Best is trial 16 with value: 1972.3296999647914.
[I 2023-08-11 12:33:09,216] Trial 17 finished with value: 3338.4070116064154 and parameters: {'eta': 0.050131523125958345, 'max_depth': 2, 'subsample': 0.3614410986311337, 'feature_fraction': 0.20670293691731673, 'boosting': 'gbdt'}. Best is trial 16 with value: 1972.3296999647914.
[I 2023-08-11 12:33:13,236] Trial 18 finished with value: 3389.8614075906107 and parameters: {'eta': 0.986925756263617, 'max_depth': 5, 'subsample': 0.6167322950411971, 'feature_fraction': 0.5972303136885762, 'boosting': 'gbdt'}. Best is trial 16 with value: 1972.3296999647914.
[I 2023-08-11 12:33:16,990] Trial 19 pruned. Trial was pruned at iteration 30.
[I 2023-08-11 12:33:30,329] Trial 20 finished with value: 1808.1962308645848 and parameters: {'eta': 0.3847448879651696, 'max_depth': 6, 'subsample': 0.2765534220110189, 'feature_fraction': 0.4489981481795888, 'boosting': 'gbdt'}. Best is trial 20 with value: 1808.1962308645848.
[I 2023-08-11 12:33:45,265] Trial 21 finished with value: 1899.1444205576186 and parameters: {'eta': 0.3036015753737849, 'max_depth': 6, 'subsample': 0.23678743004304076, 'feature_fraction': 0.4675647812287519, 'boosting': 'gbdt'}. Best is trial 20 with value: 1808.1962308645848.
[I 2023-08-11 12:33:58,702] Trial 22 finished with value: 1908.5410750844935 and parameters: {'eta': 0.29460827340472634, 'max_depth': 6, 'subsample': 0.237820065177013, 'feature_fraction': 0.44683152950742716, 'boosting': 'gbdt'}. Best is trial 20 with value: 1808.1962308645848.
[I 2023-08-11 12:34:12,440] Trial 23 finished with value: 1929.1341625507328 and parameters: {'eta': 0.3229800617391119, 'max_depth': 6, 'subsample': 0.2055577261442562, 'feature_fraction': 0.43369472632611744, 'boosting': 'gbdt'}. Best is trial 20 with value: 1808.1962308645848.
[I 2023-08-11 12:34:25,588] Trial 24 finished with value: 2699.3212142027087 and parameters: {'eta': 0.07071121976729182, 'max_depth': 6, 'subsample': 0.2792800081576346, 'feature_fraction': 0.4327598617720121, 'boosting': 'gbdt'}. Best is trial 20 with value: 1808.1962308645848.
[I 2023-08-11 12:34:39,033] Trial 25 finished with value: 1917.4633105999606 and parameters: {'eta': 0.27555064079028785, 'max_depth': 7, 'subsample': 0.28227436090885094, 'feature_fraction': 0.5387936166895363, 'boosting': 'gbdt'}. Best is trial 20 with value: 1808.1962308645848.
[I 2023-08-11 12:34:41,956] Trial 26 pruned. Trial was pruned at iteration 20.
[I 2023-08-11 12:34:55,636] Trial 27 finished with value: 2011.3551667558138 and parameters: {'eta': 0.23623752924982647, 'max_depth': 6, 'subsample': 0.21095407561350688, 'feature_fraction': 0.5018294529971035, 'boosting': 'gbdt'}. Best is trial 20 with value: 1808.1962308645848.
[I 2023-08-11 12:34:59,898] Trial 28 pruned. Trial was pruned at iteration 31.
[I 2023-08-11 12:35:02,824] Trial 29 pruned. Trial was pruned at iteration 20.

Hyper-Parameter Optimization successfully finished.
  Number of finished trials:  30
  Best trial:
    Value: 1808.1962308645848
    Params: 
    eta: 0.3847448879651696
    max_depth: 6
    subsample: 0.2765534220110189
    feature_fraction: 0.4489981481795888
    boosting: gbdt
    opt_rounds: 100

Model Training¶

In [6]:

Copied!





np.random.seed(123)

opt_params = opt_param.copy()
n_rounds = opt_params["opt_rounds"]
del opt_params["opt_rounds"]

# Train Model with optimized hyperparameters
lgblss.train(opt_params,
             dtrain,
             num_boost_round=n_rounds
             )
np.random.seed(123)

opt_params = opt_param.copy()
n_rounds = opt_params["opt_rounds"]
del opt_params["opt_rounds"]

# Train Model with optimized hyperparameters
lgblss.train(opt_params,
             dtrain,
             num_boost_round=n_rounds
             )

Prediction¶

In [7]:

Copied!





# Set seed for reproducibility
torch.manual_seed(123)

# Number of samples to draw from predicted distribution
n_samples = 1000
quant_sel = [0.05, 0.95] # Quantiles to calculate from predicted distribution

# Sample from predicted distribution
pred_samples = lgblss.predict(X_test,
                              pred_type="samples",
                              n_samples=n_samples,
                              seed=123)

# Calculate quantiles from predicted distribution
pred_quantiles = lgblss.predict(X_test,
                                pred_type="quantiles",
                                n_samples=n_samples,
                                quantiles=quant_sel)

# Returns predicted distributional parameters
pred_params = lgblss.predict(X_test,
                             pred_type="parameters")
# Set seed for reproducibility
torch.manual_seed(123)

# Number of samples to draw from predicted distribution
n_samples = 1000
quant_sel = [0.05, 0.95] # Quantiles to calculate from predicted distribution

# Sample from predicted distribution
pred_samples = lgblss.predict(X_test,
                              pred_type="samples",
                              n_samples=n_samples,
                              seed=123)

# Calculate quantiles from predicted distribution
pred_quantiles = lgblss.predict(X_test,
                                pred_type="quantiles",
                                n_samples=n_samples,
                                quantiles=quant_sel)

# Returns predicted distributional parameters
pred_params = lgblss.predict(X_test,
                             pred_type="parameters")

In [8]:

Copied!

pred_samples.head()
pred_samples.head()

Out[8]:

	y_sample0	y_sample1	y_sample2	y_sample3	y_sample4	y_sample5	y_sample6	y_sample7	y_sample8	y_sample9	...	y_sample990	y_sample991	y_sample992	y_sample993	y_sample994	y_sample995	y_sample996	y_sample997	y_sample998	y_sample999
0	1.803845	2.055054	3.721187	2.103017	2.785087	1.427447	2.607677	1.835255	2.056286	1.429045	...	1.519037	1.412902	1.625722	1.990941	1.041016	1.458764	2.493577	2.022398	2.520749	1.871148
1	0.955117	0.304340	0.505187	0.712550	1.216954	0.933732	0.380238	1.482657	0.736652	0.427856	...	1.171600	1.010972	0.735717	1.319169	1.202447	0.967412	0.966288	1.435245	0.821253	0.793779
2	1.144286	3.009341	1.845451	3.251571	2.020669	1.400807	2.169500	1.314221	1.898889	1.066060	...	1.997797	1.747813	2.150669	2.500485	1.338773	1.857834	1.787327	1.801178	2.843899	1.567025
3	1.808418	3.125320	1.761757	1.979179	2.679116	1.515085	1.274946	1.122176	2.836676	2.317276	...	1.435263	1.846332	1.362321	1.321298	1.959441	1.562435	1.587221	1.903269	1.783693	1.171151
4	4.249149	3.424617	3.813255	3.438753	2.999937	3.809508	3.346083	2.839856	3.459303	3.179483	...	3.772136	3.544357	3.080696	6.813697	4.206345	2.519868	3.878748	5.277604	4.116862	3.673533

5 rows × 1000 columns

In [9]:

Copied!

pred_quantiles.head()
pred_quantiles.head()

Out[9]:

	quant_0.05	quant_0.95
0	1.104986	2.876080
1	0.493789	1.469833
2	0.998464	2.626445
3	1.021038	2.662356
4	2.412530	5.914484

In [10]:

Copied!

pred_params.head()
pred_params.head()

Out[10]:

	concentration	rate
0	12.671469	6.625142
1	9.840599	10.347679
2	12.215254	7.179342
3	11.811074	6.747012
4	13.350488	3.346810

SHAP Interpretability¶

In [11]:

Copied!





# Partial Dependence Plot
shap_df = pd.DataFrame(X_train, columns=feature_names)

lgblss.plot(shap_df,
            parameter="concentration",
            feature=feature_names[0],
            plot_type="Partial_Dependence")
# Partial Dependence Plot
shap_df = pd.DataFrame(X_train, columns=feature_names)

lgblss.plot(shap_df,
            parameter="concentration",
            feature=feature_names[0],
            plot_type="Partial_Dependence")

No description has been provided for this image

In [12]:

Copied!





# Feature Importance
lgblss.plot(shap_df,
            parameter="concentration",
            plot_type="Feature_Importance")
# Feature Importance
lgblss.plot(shap_df,
            parameter="concentration",
            plot_type="Feature_Importance")