Source code for hyperion.transforms.skl_tsne

"""
 Copyright 2018 Johns Hopkins University  (Author: Jesus Villalba)
 Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
"""
from jsonargparse import ArgumentParser, ActionParser
import numpy as np

from sklearn.manifold import TSNE

from ..hyp_model import HypModel


[docs]class SklTSNE(HypModel): """Wrapper class for sklearn TSNE manifold learner Attributes: tsne_dim: dimension of the embedded space. perplexity: the perplexity is related to the number of nearest neighbors that is used in other manifold learning algorithms. Larger datasets usually require a larger perplexity. Consider selecting a value between 5 and 50. early_exaggeration: controls how tight natural clusters in the original space are in the embedded space and how much space will be between them. lr: the learning rate for t-SNE is usually in the range [10.0, 1000.0]. num_iter: maximum number of iterations for the optimization. num_iter_without_progress: maximum number of iterations without progress before we abort the optimization min_grad_norm: if the gradient norm is below this threshold, the optimization will be stopped. metric: the metric to use when calculating distance between instances in ['cosine', 'euclidean', 'l1', 'l2', 'precomputed'] or callable function. init: initialization method in ['random', 'pca'] or embedding matrix of shape (num_samples, num_comp) verbose: verbosity level. rng: RandomState instance rng_seed: seed for random number generator method: gradient calculation method in [‘barnes_hut’, 'exact'] angle: angle thetha in Barnes-Hut TSNE num_jobs: number of parallel jobs to run for neighbors search. """
[docs] def __init__( self, tsne_dim=2, perplexity=30.0, early_exaggeration=12.0, lr=200.0, num_iter=1000, num_iter_without_progress=300, min_grad_norm=1e-07, metric="euclidean", init="random", verbose=0, rng=None, rng_seed=1234, method="barnes_hut", angle=0.5, num_jobs=None, **kwargs ): super().__init__(**kwargs) self.rng_seed = rng_seed if rng is None: rng = np.random.RandomState(seed=rng_seed) self._tsne = TSNE( n_components=tsne_dim, perplexity=perplexity, early_exaggeration=early_exaggeration, learning_rate=lr, n_iter=num_iter, n_iter_without_progress=num_iter_without_progress, min_grad_norm=min_grad_norm, metric=metric, init=init, verbose=verbose, random_state=rng, method=method, angle=angle, n_jobs=num_jobs, )
@property def tsne_dim(self): return self._tsne.n_components @property def perplexity(self): return self.perplexity @property def early_exaggeration(self): return self._tsne.early_exaggeration @property def lr(self): return self._tsne.learning_rate @property def num_iter(self): return self._tsne.n_iter @property def num_iter_without_progress(self): return self._tsne.n_iter_without_progress @property def min_grad_norm(self): return self._tsne.min_grad_norm @property def metric(self): return self._tsne.metric @property def init(self): return self._tsne.init @property def method(self): return self._tsne.method @property def angle(self): return self._tsne.angle @property def num_jobs(self): return self._tsne.n_jobs
[docs] def predict(self, x): return self._tsne.fit_transform(x)
[docs] def fit(self, x): return self._tsne.fit_transform(x)
[docs] def save_params(self, f): pass
[docs] @classmethod def load_params(cls, f, config): return cls(**config)
[docs] def get_config(self): config = { "tsne_dim": self.tsne_dim, "perplexity": self.perplexity, "early_exaggeration": self.early_exaggeration, "lr": self.lr, "num_iter": self.num_iter, "num_iter_without_progress": self.num_iter_without_progress, "min_grad_norm": self.min_grad_norm, "metric": self.metric, "init": self.init, "rng_seed": self.rng_seed, "method": self.method, "angle": self.angle, "num_jobs": self.num_jobs, } base_config = super().get_config() return dict(list(base_config.items()) + list(config.items()))
[docs] @staticmethod def filter_args(**kwargs): valid_args = ( "tsne_dim", "perplexity", "early_exaggeration", "lr", "num_iter", "num_iter_without_progress", "min_grad_norm", "metric", "init", "rng_seed", "method", "angle", "num_jobs", ) return dict((k, kwargs[k]) for k in valid_args if k in kwargs)
[docs] @staticmethod def add_class_args(parser, prefix=None): if prefix is not None: outer_parser = parser parser = ArgumentParser(prog="") parser.add_argument("--tsne-dim", default=2, type=int, help=("tsne dimension")) parser.add_argument( "--perplexity", default=30.0, type=float, help=("tsne perplexity") ) parser.add_argument( "--early-exaggeration", default=12.0, type=float, help=( "controls how tight natural clusters in the original space" "are in the embedded space and how much space will be " "between them." ), ) parser.add_argument( "--lr", default=200.0, type=float, help=("learning rate for t-sne") ) parser.add_argument( "--num-iter", default=1000, type=int, help=("max. number of iterations") ) parser.add_argument( "--num-iter-without-progress", default=300, type=int, help=("max. number of iterations without improvement"), ) parser.add_argument( "--min-grad-norm", default=1e-07, type=float, help=("minimum gradient norm to stop optim."), ) parser.add_argument( "--metric", default="euclidean", choices=["cosine", "euclidean", "l1", "l2", "precomputed"], help=("distance metric"), ) parser.add_argument( "--init", default="random", choices=["random", "pca"], help=("initialization method"), ) parser.add_argument( "--method", default="barnes_hut", choices=["barnes_hut", "exact"], help=("gradient calculation method"), ) parser.add_argument( "--angle", default=0.5, type=float, help=("angle thetha in Barnes-Hut TSNE") ) parser.add_argument( "--num-jobs", default=1, type=int, help=("num parallel jobs for NN search") ) parser.add_argument("--rnd-seed", default=1234, type=int, help=("random seed")) if prefix is not None: outer_parser.add_argument("--" + prefix, action=ActionParser(parser=parser))
add_argparse_args = add_class_args