5.1 Introduction
5.2 Background: Hyperopt for Optimization
-
A search domain,
-
An objective function,
-
An optimization algorithm.
from hyperopt import hp
space = hp.choice( ’ my_conditional ’,
[
( ’case 1’, 1 + hp.lognormal( ’c1’, 0, 1)),
( ’case 2’, hp.uniform( ’c2’, -10, 10))
( ’case 3’, hp.choice( ’c3’, [ ’a’, ’b’, ’c’]))
])
fmin
function carries out the optimization, and stores results of the search to a database (e.g. either a simple Python list or a MongoDB instance). The fmin
call carries out the simple analysis of finding the best-performing configuration, and returns that to the caller. The fmin
call can use multiple workers when using the MongoDB backend, to implement parallel model selection on a compute cluster.5.3 Scikit-Learn Model Selection as a Search Problem
5.4 Example Usage
from hpsklearn import HyperoptEstimator
# Load data
train_data , train_label , test_data, test_label = load_my_data ()
# Create the estimator object
estim = HyperoptEstimator ()
# Search the space of classifiers and preprocessing steps and their
# respective hyperparameters in scikit-learn to fit a model to the data
estim.fit( train_data , train_label )
# Make a prediction using the optimized model
prediction = estim.predict(test_data)
# Report the accuracy of the classifier on a given set of data
score = estim.score(test_data, test_label )
# Return instances of the classifier and preprocessing steps
model = estim. best_model ()
from hpsklearn import HyperoptEstimator
from hyperopt import tpe
estim = HyperoptEstimator (algo=tpe.suggest,
max_evals=150,
trial_timeout =60)
from hpsklearn import HyperoptEstimator
from hyperopt import anneal, rand, tpe, mix
# define an algorithm that searches randomly 5% of the time,
# uses TPE 75% of the time, and uses annealing 20% of the time
mix_algo = partial(mix.suggest, p_suggest=[
(0.05, rand.suggest),
(0.75, tpe.suggest),
(0.20, anneal.suggest)])
estim = HyperoptEstimator (algo=mix_algo,
max_evals=150,
trial_timeout =60)
from hpsklearn import HyperoptEstimator , svc
# limit the search to only SVC models
estim = HyperoptEstimator ( classifier =svc( ’my_svc’))
from hpsklearn import HyperoptEstimator , svc, knn
from hyperopt import hp
# restrict the space to contain only random forest,
# k-nearest neighbors, and SVC models.
clf = hp.choice( ’my_name’,
[ random_forest ( ’my_name. random_forest ’),
svc( ’my_name.svc’),
knn( ’my_name.knn’)])
estim = HyperoptEstimator ( classifier =clf)
from hpsklearn import HyperoptEstimator , svc_rbf
estim = HyperoptEstimator ( classifier =svc_rbf( ’my_svc’))
svc
.
from hpsklearn import HyperoptEstimator , svc
estim = HyperoptEstimator (
classifier =svc( ’my_svc’,
kernels=[ ’linear’,
’sigmoid’]))
from hpsklearn import HyperoptEstimator , pca
estim = HyperoptEstimator ( preprocessing =[pca( ’my_pca’)])
from hpsklearn import HyperoptEstimator , tfidf, pca
from hyperopt import hp
preproc = hp.choice( ’my_name’,
[[pca( ’my_name.pca’)],
[pca( ’my_name.pca’), normalizer ( ’my_name.norm’)]
[ standard_scaler ( ’my_name. std_scaler ’)],
[]])
estim = HyperoptEstimator ( preprocessing =preproc)
from hpsklearn import HyperoptEstimator , \
any_sparse_classifier , \
any_text_preprocessing
from hyperopt import tpe
estim = HyperoptEstimator (
algo=tpe.suggest,
classifier = any_sparse_classifier ( ’my_clf’)
preprocessing = any_text_preprocessing ( ’my_pp’)
max_evals=200,
trial_timeout =60)
from hpsklearn import HyperoptEstimator , pca, svc_poly
estim = HyperoptEstimator (
preprocessing =[pca( ’my_pca’, whiten=True)],
classifier =svc_poly( ’my_poly’, degree=3))
from hpsklearn import HyperoptEstimator , pca, sgd
from hyperopt import hp
import numpy as np
sgd_loss = hp.pchoice( ’loss’,
[(0.50, ’hinge’),
(0.25, ’log’),
(0.25, ’huber’)])
sgd_penalty = hp.choice( ’penalty’,
[ ’l2’, ’ elasticnet ’])
sgd_alpha = hp. loguniform ( ’alpha’,
low=np.log(1e-5),
high=np.log(1) )
estim = HyperoptEstimator (
classifier =sgd( ’my_sgd’,
loss=sgd_loss,
penalty= sgd_penalty ,
alpha=sgd_alpha) )
components.py
file. A complete working example of using hyperopt-sklearn to find a model for the 20 newsgroups data set is shown below.
from hpsklearn import HyperoptEstimator , tfidf, any_sparse_classifier
from sklearn.datasets import fetch_20newsgroups
from hyperopt import tpe
import numpy as np
# Download data and split training and test sets
train = fetch_20newsgroups (subset= ’train’)
test = fetch_20newsgroups (subset= ’test’)
X_train = train.data
y_train = train.target
X_test = test.data
y_test = test.target
estim = HyperoptEstimator (
classifier = any_sparse_classifier ( ’clf’),
preprocessing =[tfidf( ’tfidf’)],
algo=tpe.suggest,
trial_timeout =180)
estim.fit(X_train, y_train)
print(estim.score(X_test, y_test))
print(estim. best_model ())
5.5 Experiments
5.6 Discussion and Future Work
MNIST | 20 Newsgroups | Convex shapes | |||
---|---|---|---|---|---|
Approach | Accuracy | Approach | F-Score | Approach | Accuracy |
Committee of convnets | 99.8% | CFC | 0.928 |
hyperopt-sklearn
|
88.7%
|
hyperopt-sklearn
|
98.7%
|
hyperopt-sklearn
|
0.856
| hp-dbnet | 84.6% |
libSVM grid search | 98.6% | SVMTorch | 0.848 | dbn-3 | 81.4% |
Boosted trees | 98.5% | LibSVM | 0.843 |
algorithm
and leaf_size
in the KNN model). Care should be taken to identify these parameters in each model and they may need to be treated differently during exploration.