Your browser doesn't support the features required by impress.js, so you are presented with a simplified version of this presentation.
For the best experience please use the latest Chrome , Safari or Firefox browser.
X = np.c_[X0, X1] features = make_union(SelectKBest(k=1), PCA(n_components=2)) forest = RandomForestRegressor( prd = make_pipeline( features, forest)) prd.fit(X, y)
X = np.c_[X0, X1] features = make_union(SelectKBest(k=1), PCA(n_components=2)) forest = RandomForestRegressor( prd = make_pipeline( features, forest)) prd.fit(X, y)
X = np.c_[X0, X1] features = make_union(SelectKBest(k=1), PCA(n_components=2)) forest = RandomForestRegressor( prd = make_pipeline( features, forest)) prd.fit(X, y)
X = np.c_[X0, X1] features = make_union(SelectKBest(k=1), PCA(n_components=2)) forest = RandomForestRegressor( prd = make_pipeline( features, forest)) prd.fit(X, y)
X = np.c_[X0, X1] features = make_union(SelectKBest(k=1), PCA(n_components=2)) forest = RandomForestRegressor( prd = make_pipeline( features, forest)) prd.fit(X, y)
>>> X0 array([[23.2, 21.8], ..., ..., ..., ])
>>> X1 array([[9, 8, 78], ..., ..., ..., ])
>>> X array([[21.8, 0.3, 9.8], ..., ..., ..., ])
>>> forest.feature_importances_ array([0.1, 0.3, 0.7])
>>> forest.feature_importances_ array([0.1, 0.3, 0.7])
User | Movie | Rating |
---|---|---|
Roni | Frozen | 5 |
Roni | Mulan | 4 |
Yarden | Mulan | 1 |
Anat | Mulan | 1 |
Anat | Shrek | 2 |
Yarden | Shrek | 3 |
Yarden | Frozen | 5 |
Anat | Frozen | 5 |
Roni | Shrek | 5 |
User | Movie | Rating |
---|---|---|
Roni | Frozen | 5 |
Roni | Mulan | 4 |
Yarden | Mulan | 1 |
Anat | Mulan | 1 |
Anat | Shrek | 2 |
Yarden | Shrek | 3 |
Yarden | Frozen | 5 |
Anat | Frozen | 5 |
Roni | Shrek | 5 |
Frozen | Shrek | Mulan | |
---|---|---|---|
Roni | 5 | 5 | 4 |
Yarden | 5 | 3 | 1 |
Anat | 5 | 2 | 1 |
User | user_0 | user_1 |
---|---|---|
Roni | 0.1 | 1.3 |
Yarden | 1.1 | 1.4 |
Anat | 0.2 | 9.8 |
Movie | movie_0 | movie_1 |
---|---|---|
Frozen | 10 | 20 |
Shrek | 20.1 | 30.4 |
Mulan | 12 | 13 |
User | Movie | user_0 | user_1 | movie_0 | movie_1 |
---|---|---|---|---|---|
Roni | Frozen | 0.1 | 1.3 | 10 | 20 |
Roni | Mulan | 0.1 | 1.3 | 12 | 13 |
Yarden | Mulan | 1.1 | 1.4 | 12 | 13 |
Anat | Mulan | 0.2 | 9.8 | 12 | 13 |
Anat | Shrek | 0.2 | 9.8 | 20.1 | 30.4 |
Yarden | Shrek | 1.1 | 1.4 | 20.1 | 30.4 |
Yarden | Frozen | 1.1 | 1.4 | 10 | 20 |
Anat | Frozen | 0.2 | 9.8 | 10 | 20 |
Roni | Shrek | 0.1 | 1.3 | 20.1 | 30.4 |
User | Movie | Rating |
---|---|---|
Roni | Frozen | 5 |
Roni | Mulan | 4 |
Yarden | Mulan | 1 |
Anat | Mulan | 1 |
Anat | Shrek | 2 |
Yarden | Shrek | 3 |
Yarden | Frozen | 5 |
Anat | Frozen | 5 |
Roni | Shrek | 5 |
features = make_union(SelectKBest(k=1), PCA(n_components=2)) forest = RandomForestRegressor() prd = make_pipeline( features, forest)
Numpy
5.1 | 3.5 | 1.4 | 0.2 |
4.9 | 3.0 | 1.4 | 0.2 |
4.7 | 3.2 | 1.3 | 0.2 |
4.6 | 3.1 | 1.5 | 0.2 |
5.0 | 3.6 | 1.4 | 0.2 |
Pandas
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | |
---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 |
1 | 4.9 | 3.0 | 1.4 | 0.2 |
2 | 4.7 | 3.2 | 1.3 | 0.2 |
3 | 4.6 | 3.1 | 1.5 | 0.2 |
4 | 5.0 | 3.6 | 1.4 | 0.2 |
We intuitively know what this means
$ ls | wc
from ibex import frame PdLinearRegression = frame(sklearn.linear_model.LinearRegression)
import pandas as pd X = pd.DataFrame({'a': [0, 2, 3], 'b': [1, 4, 6]}) y = pd.Series([10, 20 , 30])
a | b | |
---|---|---|
0 | 0 | 1 |
1 | 2 | 4 |
2 | 3 | 6 |
0 | 10 |
---|---|
1 | 20 |
2 | 30 |
>>> PdLinearRegression().fit(X, y).predict(X)
0 | 10 |
---|---|
1 | 20 |
2 | 30 |
X = pd.DataFrame({'a': [0, 2, 3], 'b': [1, 4, 6]}) y = pd.Series([10, 20 , 30])
a | b | |
---|---|---|
0 | 0 | 1 |
1 | 2 | 4 |
2 | 3 | 6 |
0 | 10 |
---|---|
1 | 20 |
2 | 30 |
b | a | |
---|---|---|
0 | 1 | 0 |
1 | 4 | 2 |
2 | 6 | 3 |
>>> PdLinearRegression().fit(X, y).predict(X[['b', 'a']])
0 | 10 |
---|---|
1 | 20 |
2 | 30 |
X = pd.DataFrame({'a': [0, 2, 3], 'b': [1, 4, 6]}, index=['i', 'j', 'k']) y = pd.Series([10, 20 , 30], index=[21, 22, 23])
a | b | |
---|---|---|
i | 0 | 1 |
j | 2 | 4 |
k | 3 | 6 |
21 | 10 |
---|---|
22 | 20 |
23 | 30 |
>>> PdLinearRegression().fit(X, y).predict(X) --------------------------------------------------------------------------- ValueError Traceback (most recent call last) ----> 1 LinearRegression().fit(X, y) ... --> 113 **kwargs) ... ValueError: Indexes do not match
X = pd.DataFrame({'a': [0, 2, 3], 'b': [1, 4, 6]}) y = pd.Series([10, 20 , 30]) X1 = pd.DataFrame({'m': [0, 2, 3], 'n': [1, 4, 6]})
a | b | |
---|---|---|
0 | 0 | 1 |
1 | 2 | 4 |
2 | 3 | 6 |
0 | 10 |
---|---|
1 | 20 |
2 | 30 |
m | n | |
---|---|---|
0 | 0 | 1 |
1 | 2 | 4 |
2 | 3 | 6 |
>>> PdLinearRegression().fit(X, y).predict(X1) --------------------------------------------------------------------------- KeyError Traceback (most recent call last) ----> 1 LinearRegression().fit(X, y).predict(X1) ... -> 1231 raise KeyError('%s not in index' % objarr[mask]) ... KeyError: "Index(['a', 'b'], dtype='object') not in index"
>>> iris.head()
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | class | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 0.0 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 0.0 |
2 | 4.7 | 3.2 | 1.3 | 0.2 | 0.0 |
3 | 4.6 | 3.1 | 1.5 | 0.2 | 0.0 |
4 | 5.0 | 3.6 | 1.4 | 0.2 | 0.0 |
clf = PdSVC(kernel="linear", probability=True) >>> clf.fit(iris[features], iris['class']).coef_
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | |
---|---|---|---|---|
setosa | -0.046259 | 0.521183 | -1.003045 | -0.46130 |
versicolor | -0.007223 | 0.178941 | -0.538365 | -0.292393 |
virginica | 0.595498 | 0.973900 | -2.031000 | -2.006303 |
make_pipeline
:
sklearn.pipeline.make_pipeline( frame(PCA)(n_components=2), frame(RandomForestRegressor)()) frame(PCA)(n_components=2) | frame(RandomForestRegressor)()
make_union
:
sklearn.pipeline.make_union( frame(SelectKBest)(k=2), frame(PCA)(n_components=2)) frame(SelectKBest)(k=2) + frame(PCA)(n_components=2)
PdSelectKBest(k=1) + PdPCA(n_components=2) | PdRandomForestRegressor()
>>> iris.head()
sepal length (cm) | sepal width (cm) | petal length (cm) | petal width (cm) | class | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 0.0 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 0.0 |
2 | 4.7 | 3.2 | 1.3 | 0.2 | 0.0 |
3 | 4.6 | 3.1 | 1.5 | 0.2 | 0.0 |
4 | 5.0 | 3.6 | 1.4 | 0.2 | 0.0 |
trn = PdPCA(n_components=2) + PdSelectKBest(k=1) trn.fit(iris[features], iris['class']).transform(iris[features])
pca | selectkbest | ||
---|---|---|---|
comp_0 | comp_1 | petal length (cm) | |
0 | -2.684207 | 0.326607 | 1.4 |
1 | -2.715391 | -0.169557 | 1.4 |
2 | -2.889820 | -0.137346 | 1.3 |
3 | -2.746437 | -0.311124 | 1.5 |
4 | -2.728593 | 0.333925 | 1.4 |
forest = PdRandomForestClassifier() prd = PdSelectKBest(k=1) + PdPCA(n_components=2) | forest >>> prd.fit(iris[features], iris['class']).feature_importances_ sepal length (cm) 0.090375 sepal width (cm) 0.050915 petal length (cm) 0.424982 petal width (cm) 0.433729 dtype: float64
User | Movie | Rating |
---|---|---|
Roni | Frozen | 5 |
Roni | Mulan | 4 |
Yarden | Mulan | 1 |
Anat | Mulan | 1 |
Anat | Shrek | 2 |
Yarden | Shrek | 3 |
Yarden | Frozen | 5 |
Anat | Frozen | 5 |
Roni | Shrek | 5 |
Frozen | Shrek | Mulan | |
---|---|---|---|
Roni | 5 | 5 | 4 |
Yarden | 5 | 3 | 1 |
Anat | 5 | 2 | 1 |
User | Movie | user_0 | user_1 | movie_0 | movie_1 |
---|---|---|---|---|---|
Roni | Frozen | 0.1 | 1.3 | 10 | 20 |
Roni | Mulan | 0.1 | 1.3 | 12 | 13 |
Yarden | Mulan | 1.1 | 1.4 | 12 | 13 |
Anat | Mulan | 0.2 | 9.8 | 12 | 13 |
Anat b | Shrek | 0.2 | 9.8 | 20.1 | 30.4 |
Yarden | Shrek | 1.1 | 1.4 | 20.1 | 30.4 |
Yarden | Frozen | 1.1 | 1.4 | 10 | 20 |
Anat | Frozen | 0.2 | 9.8 | 10 | 20 |
Roni | Shrek | 0.1 | 1.3 | 20.1 | 30.4 |
class UserMovieTransformer( base.BaseEstimator, base.EstimatorMixin, ibex.FrameMixin): def fit(self, X, y): # X is a DataFrame pd.pivot( values=X.target, index=X.user_id, columns=X.item_id) ...
PdSelectKBest = frame(SelectKBest) PdLinearRegression = frame(LinearRegression) PdRandomForestRegressor = frame(RandomForestRegressor) PdPCA = frame(PCA) PdStandardScaler = frame(StandardScaler) ...
ibex.sklearn.linear_model.LinearRegression = \ frame(sklearn.linear_model.LinearRegression) ... ibex.xgboost.XGBRegressor = frame(xgboost.XGBRegressor) ...
class Wrapper(BaseEstimator, RegressorMixin, FrameMixin): def __init__(self, *args, **kwargs): self.wrapped = LinearRegression(*args, **kwargs) ...
sklearn
Too Large/Dynamic For Manual Wrappingsys.meta_path
class ModuleLoader(object): def find_module(self, full_name, _=None): ... sys.meta_path.append(ModuleLoader())
dir
>>> dir(linear_model) ['__all__', 'ARDRegression', 'BayesianRidge', 'ElasticNet', 'ElasticNetCV', 'Hinge', ...]
inspect
import inspect if inspect.isclass(est) and issubclass(est, base.BaseEstimator): ...
__init__
Requirements:
class FooTransformer(BaseEstimator, TransformerMixin): def __init__(self, *args, **kwargs): ...
linear_model.LinearRegression().fit(X, y).coef_ linear_model.LinearRegression().coef_
class LogisticRegression(BaseEstimator, ClassifierMixin): def __init__(self, penalty=’l2’, dual=False, tol=0.0001, C...): ... self.n_iter = ... def frame(est): class _Adaptor(est, FrameMixin): ... return _Adaptor
class LogisticRegression(BaseEstimator, ClassifierMixin): def __init__(self, penalty=’l2’, dual=False, tol=0.0001, C...): ... self.n_iter = ... def frame(est): class _Adaptor(est, FrameMixin): ... return _Adaptor
class LogisticRegression(BaseEstimator, ClassifierMixin): def __init__(self, penalty=’l2’, dual=False, tol=0.0001, C...): ... self.n_iter = ... def frame(est): class _Adaptor(est, FrameMixin): ... return _Adaptor
class LogisticRegression(BaseEstimator, ClassifierMixin): def __init__(self, penalty=’l2’, dual=False, tol=0.0001, C...): ... self.n_iter = ... def frame(est): class _Adaptor(est, FrameMixin): ... return _Adaptor
import pickle def foo(): class Foo(object): pass return Foo() >>> pickle.dumps(foo()) --------------------------------------------------------------------------- AttributeError Traceback (most recent call last)in () 6 return Foo() 7 ----> 8 pickle.dumps(foo()) AttributeError: Can't pickle local object 'foo. .Foo'
model_selection.cross_val_score(prd, ..., n_jobs=-1)
def frame(est): class _Adaptor(est, FrameMixin): ... def __reduce__(self): ... return _Adaptor
sys.meta_path
dir
inspect