diff --git a/benchmarks.html b/benchmarks.html deleted file mode 100644 index 02c881ccfb..0000000000 --- a/benchmarks.html +++ /dev/null @@ -1,17 +0,0 @@ - - - - -

Binary classification

-
- - - diff --git a/benchmarks/README.md b/benchmarks/README.md index e1fce097a3..6b1786cea6 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -1,11 +1,21 @@ # Benchmarks -Navigate to this directory and create a `conda` virtual environment, as so: +Navigate to the root of this repo and create a `conda` virtual environment, as so: ```sh -$ conda create -n river-benchmarks -y python==3.8.5 -$ conda activate river-benchmarks -$ pip install -r requirements.txt +conda create -n river-benchmarks -y python==3.8.5 +conda activate river-benchmarks +pip install -e ".[benchmarks]" ``` -Note that this will install the development version of `river` from GitHub. You can change this behaviour by modifying `requirements.txt` before creating the virtual environment. +Then run the benchmarks: + +```sh +python run.py +``` + +This creates a `results.json` file. To generate the page that gets displayed in the docs, do this: + +```sh +python render.py +``` diff --git a/benchmarks/details.json b/benchmarks/details.json new file mode 100644 index 0000000000..93b976a28d --- /dev/null +++ b/benchmarks/details.json @@ -0,0 +1,75 @@ +{ + "Binary classification": { + "Dataset": { + "Bananas": "Bananas dataset.\n\nAn artificial dataset where instances belongs to several clusters with a banana shape.\nThere are two attributes that correspond to the x and y axis, respectively.\n\n Name Bananas \n Task Binary classification \n Samples 5,300 \nFeatures 2 \n Sparse False \n Path /Users/mastelini/Documents/river/river/datasets/banana.zip", + "Phishing": "Phishing websites.\n\nThis dataset contains features from web pages that are classified as phishing or not.\n\n Name Phishing \n Task Binary classification \n Samples 1,250 \nFeatures 9 \n Sparse False \n Path /Users/mastelini/Documents/river/river/datasets/phishing.csv.gz" + }, + "Model": { + "ADWIN Bagging": "[HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n)]", + "ALMA": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n ALMAClassifier (\n p=2\n alpha=0.9\n B=1.111111\n C=1.414214\n )\n)", + "AdaBoost": "[HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n)]", + "Adaptive Random Forest": "[]", + "Bagging": "[HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n)]", + "Extremely Fast Decision Tree": "ExtremelyFastDecisionTreeClassifier (\n grace_period=200\n max_depth=inf\n min_samples_reevaluate=20\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n)", + "Hoeffding Adaptive Tree": "HoeffdingAdaptiveTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n bootstrap_sampling=True\n drift_window_threshold=300\n adwin_confidence=0.002\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n seed=42\n)", + "Hoeffding Tree": "HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n)", + "Leveraging Bagging": "[HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n)]", + "Logistic regression": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n LogisticRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=Log (\n weight_pos=1.\n weight_neg=1.\n )\n l2=0.\n l1=0.\n intercept_init=0.\n intercept_lr=Constant (\n learning_rate=0.01\n )\n clip_gradient=1e+12\n initializer=Zeros ()\n )\n)", + "Naive Bayes": "GaussianNB ()", + "Stacking": "[Pipeline (\n StandardScaler (\n with_std=True\n ),\n SoftmaxRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=CrossEntropy (\n class_weight={}\n )\n l2=0\n )\n), GaussianNB (), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), Pipeline (\n StandardScaler (\n with_std=True\n ),\n KNNClassifier (\n n_neighbors=5\n window_size=100\n min_distance_keep=0.\n weighted=True\n cleanup_every=0\n distance_func=None\n softmax=False\n )\n)]", + "Stochastic Gradient Tree": "SGTClassifier (\n delta=1e-07\n grace_period=200\n init_pred=0.\n max_depth=inf\n lambda_value=0.1\n gamma=1.\n nominal_attributes=[]\n feature_quantizer=StaticQuantizer (\n n_bins=64\n warm_start=100\n buckets=None\n )\n)", + "Streaming Random Patches": "SRPClassifier (\n model=HoeffdingTreeClassifier (\n grace_period=50\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=0.01\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n )\n n_models=10\n subspace_size=0.6\n training_method=\"patches\"\n lam=6\n drift_detector=ADWIN (\n delta=1e-05\n )\n warning_detector=ADWIN (\n delta=0.0001\n )\n disable_detector=\"off\"\n disable_weighted_vote=False\n seed=None\n metric=Accuracy (\n cm=ConfusionMatrix (\n classes=[]\n )\n )\n)", + "Voting": "VotingClassifier (\n models=[Pipeline (\n StandardScaler (\n with_std=True\n ),\n SoftmaxRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=CrossEntropy (\n class_weight={}\n )\n l2=0\n )\n), GaussianNB (), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), Pipeline (\n StandardScaler (\n with_std=True\n ),\n KNNClassifier (\n n_neighbors=5\n window_size=100\n min_distance_keep=0.\n weighted=True\n cleanup_every=0\n distance_func=None\n softmax=False\n )\n)]\n use_probabilities=True\n)", + "[baseline] Last Class": "NoChangeClassifier ()", + "k-Nearest Neighbors": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n KNNClassifier (\n n_neighbors=5\n window_size=100\n min_distance_keep=0.\n weighted=True\n cleanup_every=0\n distance_func=None\n softmax=False\n )\n)" + } + }, + "Multiclass classification": { + "Dataset": { + "ImageSegments": "Image segments classification.\n\nThis dataset contains features that describe image segments into 7 classes: brickface, sky,\nfoliage, cement, window, path, and grass.\n\n Name ImageSegments \n Task Multi-class classification \n Samples 2,310 \nFeatures 18 \n Sparse False \n Path /Users/mastelini/Documents/river/river/datasets/segment.csv.zip", + "Insects": "Insects dataset.\n\nThis dataset has different variants, which are:\n\n- abrupt_balanced\n- abrupt_imbalanced\n- gradual_balanced\n- gradual_imbalanced\n- incremental-abrupt_balanced\n- incremental-abrupt_imbalanced\n- incremental-reoccurring_balanced\n- incremental-reoccurring_imbalanced\n- incremental_balanced\n- incremental_imbalanced\n- out-of-control\n\nThe number of samples and the difficulty change from one variant to another. The number of\nclasses is always the same (6), except for the last variant (24).\n\n Name Insects \n Task Multi-class classification \n Samples 52,848 \n Features 33 \n Classes 6 \n Sparse False \n Path /Users/mastelini/river_data/Insects/INSECTS-abrupt_balanced_norm.arff \n URL http://sites.labic.icmc.usp.br/vsouza/repository/creme/INSECTS-abrupt_balanced_norm.arff\n Size 15.66 MB \nDownloaded True \n Variant abrupt_balanced \n\nParameters\n----------\n variant\n Indicates which variant of the dataset to load.", + "Keystroke": "CMU keystroke dataset.\n\nUsers are tasked to type in a password. The task is to determine which user is typing in the\npassword.\n\nThe only difference with the original dataset is that the \"sessionIndex\" and \"rep\" attributes\nhave been dropped.\n\n Name Keystroke \n Task Multi-class classification \n Samples 20,400 \n Features 31 \n Sparse False \n Path /Users/mastelini/river_data/Keystroke/DSL-StrongPasswordData.csv\n URL http://www.cs.cmu.edu/~keystroke/DSL-StrongPasswordData.csv \n Size 4.45 MB \nDownloaded True " + }, + "Model": { + "ADWIN Bagging": "[HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n)]", + "AdaBoost": "[HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n)]", + "Adaptive Random Forest": "[]", + "Bagging": "[HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n)]", + "Extremely Fast Decision Tree": "ExtremelyFastDecisionTreeClassifier (\n grace_period=200\n max_depth=inf\n min_samples_reevaluate=20\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n)", + "Hoeffding Adaptive Tree": "HoeffdingAdaptiveTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n bootstrap_sampling=True\n drift_window_threshold=300\n adwin_confidence=0.002\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n seed=42\n)", + "Hoeffding Tree": "HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n)", + "Leveraging Bagging": "[HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n)]", + "Naive Bayes": "GaussianNB ()", + "Stacking": "[Pipeline (\n StandardScaler (\n with_std=True\n ),\n SoftmaxRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=CrossEntropy (\n class_weight={}\n )\n l2=0\n )\n), GaussianNB (), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), Pipeline (\n StandardScaler (\n with_std=True\n ),\n KNNClassifier (\n n_neighbors=5\n window_size=100\n min_distance_keep=0.\n weighted=True\n cleanup_every=0\n distance_func=None\n softmax=False\n )\n)]", + "Streaming Random Patches": "SRPClassifier (\n model=HoeffdingTreeClassifier (\n grace_period=50\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=0.01\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n )\n n_models=10\n subspace_size=0.6\n training_method=\"patches\"\n lam=6\n drift_detector=ADWIN (\n delta=1e-05\n )\n warning_detector=ADWIN (\n delta=0.0001\n )\n disable_detector=\"off\"\n disable_weighted_vote=False\n seed=None\n metric=Accuracy (\n cm=ConfusionMatrix (\n classes=[]\n )\n )\n)", + "Voting": "VotingClassifier (\n models=[Pipeline (\n StandardScaler (\n with_std=True\n ),\n SoftmaxRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=CrossEntropy (\n class_weight={}\n )\n l2=0\n )\n), GaussianNB (), HoeffdingTreeClassifier (\n grace_period=200\n max_depth=inf\n split_criterion=\"info_gain\"\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"nba\"\n nb_threshold=0\n nominal_attributes=None\n splitter=GaussianSplitter (\n n_splits=10\n )\n binary_split=False\n max_size=100.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n), Pipeline (\n StandardScaler (\n with_std=True\n ),\n KNNClassifier (\n n_neighbors=5\n window_size=100\n min_distance_keep=0.\n weighted=True\n cleanup_every=0\n distance_func=None\n softmax=False\n )\n)]\n use_probabilities=True\n)", + "[baseline] Last Class": "NoChangeClassifier ()", + "k-Nearest Neighbors": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n KNNClassifier (\n n_neighbors=5\n window_size=100\n min_distance_keep=0.\n weighted=True\n cleanup_every=0\n distance_func=None\n softmax=False\n )\n)" + } + }, + "Regression": { + "Dataset": { + "Friedman7k": "Sample from the stationary version of the Friedman dataset.\n\nThis sample contains 10k instances sampled from the Friedman generator.\n\n Name Friedman7k\n Task Regression\n Samples 7,000 \nFeatures 10 \n Sparse False ", + "FriedmanGSG10k": "Sample from the FriedmanGSG generator.\n\nThis sample contains 10k instances sampled from the Friedman generator and presents\nglobal and slow gradual concept drifts that affect the data and happen after\n3.5k and 7k instances. The transition window between different concepts has a length of\n1k instances.\n\n Name FriedmanGSG10k\n Task Regression \n Samples 10,000 \nFeatures 10 \n Sparse False ", + "FriedmanLEA10k": "Sample from the FriedmanLEA generator.\n\nThis sample contains 10k instances sampled from the Friedman generator and presents\nlocal-expanding abrupt concept drifts that locally affect the data and happen after\n2k, 5k, and 8k instances.\n\n Name FriedmanLEA10k\n Task Regression \n Samples 10,000 \nFeatures 10 \n Sparse False ", + "TrumpApproval": "Donald Trump approval ratings.\n\nThis dataset was obtained by reshaping the data used by FiveThirtyEight for analyzing Donald\nTrump's approval ratings. It contains 5 features, which are approval ratings collected by\n5 polling agencies. The target is the approval rating from FiveThirtyEight's model. The goal of\nthis task is to see if we can reproduce FiveThirtyEight's model.\n\n Name TrumpApproval \n Task Regression \n Samples 1,001 \nFeatures 6 \n Sparse False \n Path /Users/mastelini/Documents/river/river/datasets/trump_approval.csv.gz" + }, + "Model": { + "Adaptive Model Rules": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n AMRules (\n n_min=200\n delta=1e-07\n tau=0.05\n pred_type=\"adaptive\"\n pred_model=LinearRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=Squared ()\n l2=0.\n l1=0.\n intercept_init=0.\n intercept_lr=Constant (\n learning_rate=0.01\n )\n clip_gradient=1e+12\n initializer=Zeros ()\n )\n splitter=EBSTSplitter ()\n drift_detector=ADWIN (\n delta=0.002\n )\n alpha=0.99\n anomaly_threshold=-0.75\n m_min=30\n ordered_rule_set=True\n min_samples_split=5\n )\n)", + "Adaptive Random Forest": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n []\n)", + "Exponentially Weighted Average": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n [LinearRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=Squared ()\n l2=0.\n l1=0.\n intercept_init=0.\n intercept_lr=Constant (\n learning_rate=0.01\n )\n clip_gradient=1e+12\n initializer=Zeros ()\n ), HoeffdingAdaptiveTreeRegressor (\n grace_period=200\n max_depth=inf\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"model\"\n leaf_model=LinearRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=Squared ()\n l2=0.\n l1=0.\n intercept_init=0.\n intercept_lr=Constant (\n learning_rate=0.01\n )\n clip_gradient=1e+12\n initializer=Zeros ()\n )\n model_selector_decay=0.95\n nominal_attributes=None\n splitter=EBSTSplitter ()\n min_samples_split=5\n bootstrap_sampling=True\n drift_window_threshold=300\n adwin_confidence=0.002\n binary_split=False\n max_size=500.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n seed=None\n ), KNNRegressor (\n n_neighbors=5\n window_size=100\n aggregation_method=\"mean\"\n min_distance_keep=0.\n distance_func=None\n ), AMRules (\n n_min=200\n delta=1e-07\n tau=0.05\n pred_type=\"adaptive\"\n pred_model=LinearRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=Squared ()\n l2=0.\n l1=0.\n intercept_init=0.\n intercept_lr=Constant (\n learning_rate=0.01\n )\n clip_gradient=1e+12\n initializer=Zeros ()\n )\n splitter=EBSTSplitter ()\n drift_detector=PageHinkley (\n min_instances=30\n delta=0.005\n threshold=50\n alpha=0.9999\n )\n alpha=0.99\n anomaly_threshold=-0.75\n m_min=30\n ordered_rule_set=True\n min_samples_split=5\n )]\n)", + "Hoeffding Adaptive Tree": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n HoeffdingAdaptiveTreeRegressor (\n grace_period=200\n max_depth=inf\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"model\"\n leaf_model=LinearRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=Squared ()\n l2=0.\n l1=0.\n intercept_init=0.\n intercept_lr=Constant (\n learning_rate=0.01\n )\n clip_gradient=1e+12\n initializer=Zeros ()\n )\n model_selector_decay=0.95\n nominal_attributes=None\n splitter=EBSTSplitter ()\n min_samples_split=5\n bootstrap_sampling=True\n drift_window_threshold=300\n adwin_confidence=0.002\n binary_split=False\n max_size=500.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n seed=42\n )\n)", + "Hoeffding Tree": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n HoeffdingAdaptiveTreeRegressor (\n grace_period=200\n max_depth=inf\n split_confidence=1e-07\n tie_threshold=0.05\n leaf_prediction=\"model\"\n leaf_model=LinearRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=Squared ()\n l2=0.\n l1=0.\n intercept_init=0.\n intercept_lr=Constant (\n learning_rate=0.01\n )\n clip_gradient=1e+12\n initializer=Zeros ()\n )\n model_selector_decay=0.95\n nominal_attributes=None\n splitter=EBSTSplitter ()\n min_samples_split=5\n bootstrap_sampling=True\n drift_window_threshold=300\n adwin_confidence=0.002\n binary_split=False\n max_size=500.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n seed=None\n )\n)", + "Linear Regression": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n LinearRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=Squared ()\n l2=0.\n l1=0.\n intercept_init=0.\n intercept_lr=Constant (\n learning_rate=0.01\n )\n clip_gradient=1e+12\n initializer=Zeros ()\n )\n)", + "Linear Regression with l1 regularization": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n LinearRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=Squared ()\n l2=0.\n l1=1.\n intercept_init=0.\n intercept_lr=Constant (\n learning_rate=0.01\n )\n clip_gradient=1e+12\n initializer=Zeros ()\n )\n)", + "Linear Regression with l2 regularization": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n LinearRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=Squared ()\n l2=1.\n l1=0.\n intercept_init=0.\n intercept_lr=Constant (\n learning_rate=0.01\n )\n clip_gradient=1e+12\n initializer=Zeros ()\n )\n)", + "Multi-layer Perceptron": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n MLPRegressor (\n hidden_dims=(5,)\n activations=(, , )\n loss=Squared ()\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.001\n )\n )\n seed=42\n )\n)", + "Passive-Aggressive Regressor, mode 1": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n PARegressor (\n C=1.\n mode=1\n eps=0.1\n learn_intercept=True\n )\n)", + "Passive-Aggressive Regressor, mode 2": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n PARegressor (\n C=1.\n mode=2\n eps=0.1\n learn_intercept=True\n )\n)", + "Stochastic Gradient Tree": "SGTRegressor (\n delta=1e-07\n grace_period=200\n init_pred=0.\n max_depth=inf\n lambda_value=0.1\n gamma=1.\n nominal_attributes=[]\n feature_quantizer=StaticQuantizer (\n n_bins=64\n warm_start=100\n buckets=None\n )\n)", + "Streaming Random Patches": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n SRPRegressor (\n model=HoeffdingTreeRegressor (\n grace_period=50\n max_depth=inf\n split_confidence=0.01\n tie_threshold=0.05\n leaf_prediction=\"model\"\n leaf_model=LinearRegression (\n optimizer=SGD (\n lr=Constant (\n learning_rate=0.01\n )\n )\n loss=Squared ()\n l2=0.\n l1=0.\n intercept_init=0.\n intercept_lr=Constant (\n learning_rate=0.01\n )\n clip_gradient=1e+12\n initializer=Zeros ()\n )\n model_selector_decay=0.95\n nominal_attributes=None\n splitter=EBSTSplitter ()\n min_samples_split=5\n binary_split=False\n max_size=500.\n memory_estimate_period=1000000\n stop_mem_management=False\n remove_poor_attrs=False\n merit_preprune=True\n )\n n_models=10\n subspace_size=0.6\n training_method=\"patches\"\n lam=6\n drift_detector=ADWIN (\n delta=1e-05\n )\n warning_detector=ADWIN (\n delta=0.0001\n )\n disable_detector=\"off\"\n disable_weighted_vote=True\n drift_detection_criteria=\"error\"\n aggregation_method=\"mean\"\n seed=42\n metric=MAE ()\n )\n)", + "[baseline] Mean predictor": "StatisticRegressor (\n statistic=Mean ()\n)", + "k-Nearest Neighbors": "Pipeline (\n StandardScaler (\n with_std=True\n ),\n KNNRegressor (\n n_neighbors=5\n window_size=100\n aggregation_method=\"mean\"\n min_distance_keep=0.\n distance_func=None\n )\n)" + } + } +} diff --git a/benchmarks/render.py b/benchmarks/render.py new file mode 100644 index 0000000000..15abd4a4a1 --- /dev/null +++ b/benchmarks/render.py @@ -0,0 +1,132 @@ +import json +import dominate +from dominate.tags import * +from river import datasets +from slugify import slugify +from watermark import watermark + +with open('results.json') as f: + benchmarks = json.load(f) + +with open("details.json") as f: + models = json.load(f) + +with open('../docs/benchmarks/index.md', 'w') as f: + print_ = lambda x: print(x, file=f, end='\n\n') + print_("""--- +hide: +- navigation +--- +""") + print_('# Benchmarks') + + print_('## Environment') + print_(pre(watermark(python=True, packages='river,numpy,scikit-learn,pandas,scipy', machine=True))) + + imports = div() + imports.add(link(href="https://unpkg.com/tabulator-tables@5.2.6/dist/css/tabulator.min.css", rel="stylesheet")) + imports.add(script(type="text/javascript", src="https://unpkg.com/tabulator-tables@5.2.6/dist/js/tabulator.min.js")) + print_(imports) + + print_(script(dominate.util.raw(""" + let baseColumns + let metrics + let columns + """))) + + + for track_name, results in benchmarks.items(): + print_(f'## {track_name}') + + print_("### Results") + print_(div(id=f"{slugify(track_name)}-results")) + + print_("### Datasets") + for name, desc in models[track_name]["Dataset"].items(): + _details = details() + _details.add(summary(name)) + _details.add(pre(desc)) + print_(_details) + + print_("### Models") + for name, desc in models[track_name]["Model"].items(): + _details = details() + _details.add(summary(name)) + _details.add(pre(desc)) + print_(_details) + + print_(script(dominate.util.raw(f""" + var results = {results} + + baseColumns = [ + "Dataset", + "Model", + "Memory", + "Time" + ] + metrics = Object.keys(results[0]).filter(x => !baseColumns.includes(x)).sort(); + columns = [...baseColumns, ...metrics].map(x => ({{title: x, field: x}})) + + function formatBytes(bytes, decimals = 2) {{ + if (bytes === 0) return '0 Bytes' + + const k = 1024; + const dm = decimals < 0 ? 0 : decimals; + const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']; + + const i = Math.floor(Math.log(bytes) / Math.log(k)); + + return parseFloat((bytes / Math.pow(k, i)).toFixed(dm)) + ' ' + sizes[i]; + }} + + function msToTime(s) {{ + function pad(n, z) {{ + z = z || 2; + return ('00' + n).slice(-z); + }} + + var ms = s % 1000; + s = (s - ms) / 1000; + var secs = s % 60; + s = (s - secs) / 60; + var mins = s % 60; + var hrs = (s - mins) / 60; + + return pad(hrs) + ':' + pad(mins) + ':' + pad(secs) + '.' + pad(ms, 3); + }} + + columns.map((x, i) => {{ + if (x.title === 'Dataset') {{ + columns[i]["headerFilter"] = true + }} + if (x.title === 'Model') {{ + columns[i]["headerFilter"] = true + }} + if (x.title === 'Memory') {{ + columns[i]["formatter"] = function(cell, formatterParams, onRendered){{ + return formatBytes(cell.getValue()) + }} + }} + if (x.title === 'Time') {{ + columns[i]["formatter"] = function(cell, formatterParams, onRendered) {{ + return msToTime(cell.getValue()) + }} + }} + if (['Accuracy', 'F1'].includes(x.title)) {{ + columns[i]["formatter"] = function(cell, formatterParams, onRendered) {{ + return (100 * cell.getValue()).toFixed(2) + "%" + }} + }} + if (['MAE', 'RMSE', 'R2'].includes(x.title)) {{ + columns[i]["formatter"] = function(cell, formatterParams, onRendered) {{ + return cell.getValue().toFixed(3) + }} + }} + }}) + + new Tabulator('#{slugify(track_name)}-results', {{ + data: results, + layout: 'fitColumns', + columns: columns + }}) + """))) diff --git a/benchmarks/requirements.txt b/benchmarks/requirements.txt deleted file mode 100644 index c60b120d7f..0000000000 --- a/benchmarks/requirements.txt +++ /dev/null @@ -1,8 +0,0 @@ -git+https://github.com/online-ml/river -pandas -rich -scikit-learn -tabulate -torch -vowpalwabbit -watermark \ No newline at end of file diff --git a/benchmarks/results.db b/benchmarks/results.db new file mode 100644 index 0000000000..dc9947e2ac Binary files /dev/null and b/benchmarks/results.db differ diff --git a/benchmarks/results.json b/benchmarks/results.json new file mode 100644 index 0000000000..837e5d9d71 --- /dev/null +++ b/benchmarks/results.json @@ -0,0 +1,1198 @@ +{ + "Binary classification": [ + { + "Accuracy": 0.6257784487639177, + "Dataset": "Bananas", + "F1": 0.4477861319966584, + "Memory": 165670, + "Model": "ADWIN Bagging", + "Time": 2618.05 + }, + { + "Accuracy": 0.5064150943396226, + "Dataset": "Bananas", + "F1": 0.4825949367088608, + "Memory": 3063, + "Model": "ALMA", + "Time": 163.789 + }, + { + "Accuracy": 0.6778637478769579, + "Dataset": "Bananas", + "F1": 0.64504054897068, + "Memory": 171980, + "Model": "AdaBoost", + "Time": 2489.237 + }, + { + "Accuracy": 0.8856387997735422, + "Dataset": "Bananas", + "F1": 0.8696213425129088, + "Memory": 8567570, + "Model": "Adaptive Random Forest", + "Time": 5560.815 + }, + { + "Accuracy": 0.6335157576901302, + "Dataset": "Bananas", + "F1": 0.45875139353400224, + "Memory": 208556, + "Model": "Bagging", + "Time": 1883.013 + }, + { + "Accuracy": 0.6252123042083412, + "Dataset": "Bananas", + "F1": 0.4513812154696133, + "Memory": 29602, + "Model": "Extremely Fast Decision Tree", + "Time": 303.313 + }, + { + "Accuracy": 0.6165314210228345, + "Dataset": "Bananas", + "F1": 0.4408365437534397, + "Memory": 43404, + "Model": "Hoeffding Adaptive Tree", + "Time": 484.379 + }, + { + "Accuracy": 0.6421966408756369, + "Dataset": "Bananas", + "F1": 0.5034049240440022, + "Memory": 24843, + "Model": "Hoeffding Tree", + "Time": 251.493 + }, + { + "Accuracy": 0.8284581996603133, + "Dataset": "Bananas", + "F1": 0.8028627195836044, + "Memory": 1244447, + "Model": "Leveraging Bagging", + "Time": 9075.224 + }, + { + "Accuracy": 0.5373584905660377, + "Dataset": "Bananas", + "F1": 0.22109275730622616, + "Memory": 4423, + "Model": "Logistic regression", + "Time": 170.88 + }, + { + "Accuracy": 0.6152104170598226, + "Dataset": "Bananas", + "F1": 0.4139120436907157, + "Memory": 3901, + "Model": "Naive Bayes", + "Time": 240.513 + }, + { + "Accuracy": 0.8514814115870919, + "Dataset": "Bananas", + "F1": 0.8321603753465558, + "Memory": 9988024, + "Model": "Stacking", + "Time": 8676.398 + }, + { + "Accuracy": 0.6575471698113208, + "Dataset": "Bananas", + "F1": 0.560639070442992, + "Memory": 2186974, + "Model": "Stochastic Gradient Tree", + "Time": 586.001 + }, + { + "Accuracy": 0.8694093225136819, + "Dataset": "Bananas", + "F1": 0.8508620689655172, + "Memory": 4420472, + "Model": "Streaming Random Patches", + "Time": 11370.613 + }, + { + "Accuracy": 0.5095301000188714, + "Dataset": "Bananas", + "F1": 0.4529572721532309, + "Memory": 535, + "Model": "[baseline] Last Class", + "Time": 75.535 + }, + { + "Accuracy": 0.8484619739573505, + "Dataset": "Bananas", + "F1": 0.8274231678486997, + "Memory": 43826, + "Model": "k-Nearest Neighbors", + "Time": 990.766 + }, + { + "Accuracy": 0.8935148118494796, + "Dataset": "Phishing", + "F1": 0.8792007266121706, + "Memory": 416562, + "Model": "ADWIN Bagging", + "Time": 1383.466 + }, + { + "Accuracy": 0.8264, + "Dataset": "Phishing", + "F1": 0.8117953165654813, + "Memory": 4803, + "Model": "ALMA", + "Time": 64.185 + }, + { + "Accuracy": 0.8783026421136909, + "Dataset": "Phishing", + "F1": 0.8635547576301617, + "Memory": 293828, + "Model": "AdaBoost", + "Time": 1524.644 + }, + { + "Accuracy": 0.9087269815852682, + "Dataset": "Phishing", + "F1": 0.8969258589511755, + "Memory": 1460226, + "Model": "Adaptive Random Forest", + "Time": 1276.841 + }, + { + "Accuracy": 0.8935148118494796, + "Dataset": "Phishing", + "F1": 0.8792007266121706, + "Memory": 399544, + "Model": "Bagging", + "Time": 1045.51 + }, + { + "Accuracy": 0.8879103282626101, + "Dataset": "Phishing", + "F1": 0.8734177215189873, + "Memory": 132210, + "Model": "Extremely Fast Decision Tree", + "Time": 1240.158 + }, + { + "Accuracy": 0.8670936749399519, + "Dataset": "Phishing", + "F1": 0.8445692883895132, + "Memory": 57312, + "Model": "Hoeffding Adaptive Tree", + "Time": 188.008 + }, + { + "Accuracy": 0.8799039231385108, + "Dataset": "Phishing", + "F1": 0.8605947955390334, + "Memory": 43223, + "Model": "Hoeffding Tree", + "Time": 133.039 + }, + { + "Accuracy": 0.8951160928742994, + "Dataset": "Phishing", + "F1": 0.8783658310120707, + "Memory": 1236851, + "Model": "Leveraging Bagging", + "Time": 4427.237 + }, + { + "Accuracy": 0.892, + "Dataset": "Phishing", + "F1": 0.8789237668161435, + "Memory": 5811, + "Model": "Logistic regression", + "Time": 68.847 + }, + { + "Accuracy": 0.8847077662129704, + "Dataset": "Phishing", + "F1": 0.8714285714285714, + "Memory": 12021, + "Model": "Naive Bayes", + "Time": 99.403 + }, + { + "Accuracy": 0.899119295436349, + "Dataset": "Phishing", + "F1": 0.8866906474820143, + "Memory": 1692234, + "Model": "Stacking", + "Time": 2583.277 + }, + { + "Accuracy": 0.8232, + "Dataset": "Phishing", + "F1": 0.8141295206055509, + "Memory": 3911518, + "Model": "Stochastic Gradient Tree", + "Time": 433.726 + }, + { + "Accuracy": 0.9095276220976781, + "Dataset": "Phishing", + "F1": 0.8962350780532599, + "Memory": 2561969, + "Model": "Streaming Random Patches", + "Time": 3259.081 + }, + { + "Accuracy": 0.5156124899919936, + "Dataset": "Phishing", + "F1": 0.4474885844748858, + "Memory": 535, + "Model": "[baseline] Last Class", + "Time": 28.997 + }, + { + "Accuracy": 0.8670936749399519, + "Dataset": "Phishing", + "F1": 0.847985347985348, + "Memory": 74814, + "Model": "k-Nearest Neighbors", + "Time": 465.452 + }, + { + "Accuracy": 0.8903122497998399, + "Dataset": "Phishing", + "F1": 0.8769092542677449, + "Memory": 134873, + "Model": "Voting", + "Time": 747.333 + }, + { + "Accuracy": 0.8301566333270428, + "Dataset": "Bananas", + "F1": 0.7949886104783599, + "Memory": 76293, + "Model": "Voting", + "Time": 1649.353 + } + ], + "Multiclass classification": [ + { + "Accuracy": 0.7721957557384149, + "Dataset": "ImageSegments", + "MacroF1": 0.7587729537473662, + "Memory": 968200, + "MicroF1": 0.772195755738415, + "Model": "ADWIN Bagging", + "Time": 14317.314 + }, + { + "Accuracy": 0.8046773495019489, + "Dataset": "ImageSegments", + "MacroF1": 0.7977695866822913, + "Memory": 950510, + "MicroF1": 0.8046773495019489, + "Model": "AdaBoost", + "Time": 14164.505 + }, + { + "Accuracy": 0.8185361628410567, + "Dataset": "ImageSegments", + "MacroF1": 0.8141343880882678, + "Memory": 1477809, + "MicroF1": 0.8185361628410566, + "Model": "Adaptive Random Forest", + "Time": 5368.26 + }, + { + "Accuracy": 0.7769597228237333, + "Dataset": "ImageSegments", + "MacroF1": 0.7645642360301897, + "Memory": 945619, + "MicroF1": 0.7769597228237333, + "Model": "Bagging", + "Time": 9962.266 + }, + { + "Accuracy": 0.6253789519272412, + "Dataset": "ImageSegments", + "MacroF1": 0.6326079461514587, + "Memory": 887128, + "MicroF1": 0.6253789519272412, + "Model": "Extremely Fast Decision Tree", + "Time": 10523.965 + }, + { + "Accuracy": 0.7743611953226505, + "Dataset": "ImageSegments", + "MacroF1": 0.7631658299307776, + "Memory": 107980, + "MicroF1": 0.7743611953226506, + "Model": "Hoeffding Adaptive Tree", + "Time": 1591.331 + }, + { + "Accuracy": 0.776093546990039, + "Dataset": "ImageSegments", + "MacroF1": 0.7631372452021825, + "Memory": 102435, + "MicroF1": 0.776093546990039, + "Model": "Hoeffding Tree", + "Time": 1079.278 + }, + { + "Accuracy": 0.7782589865742746, + "Dataset": "ImageSegments", + "MacroF1": 0.7660163657276378, + "Memory": 952155, + "MicroF1": 0.7782589865742745, + "Model": "Leveraging Bagging", + "Time": 42925.328 + }, + { + "Accuracy": 0.7319185794716327, + "Dataset": "ImageSegments", + "MacroF1": 0.7304188192194185, + "Memory": 74112, + "MicroF1": 0.7319185794716329, + "Model": "Naive Bayes", + "Time": 564.832 + }, + { + "Accuracy": 0.8527501082719792, + "Dataset": "ImageSegments", + "MacroF1": 0.8518698684396576, + "Memory": 1399545, + "MicroF1": 0.8527501082719792, + "Model": "Stacking", + "Time": 7025.083 + }, + { + "Accuracy": 0.7522737115634474, + "Dataset": "ImageSegments", + "MacroF1": 0.7487742352030357, + "Memory": 2733193, + "MicroF1": 0.7522737115634474, + "Model": "Streaming Random Patches", + "Time": 26468.948 + }, + { + "Accuracy": 0.14811606756171503, + "Dataset": "ImageSegments", + "MacroF1": 0.1481156678425267, + "Memory": 1436, + "MicroF1": 0.14811606756171503, + "Model": "[baseline] Last Class", + "Time": 56.817 + }, + { + "Accuracy": 0.8198354265915981, + "Dataset": "ImageSegments", + "MacroF1": 0.8160519969700987, + "Memory": 132305, + "MicroF1": 0.8198354265915981, + "Model": "k-Nearest Neighbors", + "Time": 1458.77 + }, + { + "Accuracy": 0.5756239710863436, + "Dataset": "Insects", + "MacroF1": 0.5660846204171648, + "Memory": 3500308, + "MicroF1": 0.5756239710863436, + "Model": "ADWIN Bagging", + "Time": 491606.18 + }, + { + "Accuracy": 0.5635324616345299, + "Dataset": "Insects", + "MacroF1": 0.5546220283668154, + "Memory": 6686768, + "MicroF1": 0.5635324616345299, + "Model": "AdaBoost", + "Time": 482156.812 + }, + { + "Accuracy": 0.7466081329119912, + "Dataset": "Insects", + "MacroF1": 0.7443289389681618, + "Memory": 217444, + "MicroF1": 0.7466081329119912, + "Model": "Adaptive Random Forest", + "Time": 151447.505 + }, + { + "Accuracy": 0.5730694268359604, + "Dataset": "Insects", + "MacroF1": 0.5637706604925724, + "Memory": 5952191, + "MicroF1": 0.5730694268359604, + "Model": "Bagging", + "Time": 338569.006 + }, + { + "Accuracy": 0.6525819819478873, + "Dataset": "Insects", + "MacroF1": 0.6508885643449825, + "Memory": 4000179, + "MicroF1": 0.6525819819478873, + "Model": "Extremely Fast Decision Tree", + "Time": 550728.594 + }, + { + "Accuracy": 0.6133176906919977, + "Dataset": "Insects", + "MacroF1": 0.6061092531305883, + "Memory": 61353, + "MicroF1": 0.6133176906919977, + "Model": "Hoeffding Adaptive Tree", + "Time": 61958.852 + }, + { + "Accuracy": 0.5373058073305959, + "Dataset": "Insects", + "MacroF1": 0.5273644947479657, + "Memory": 625371, + "MicroF1": 0.5373058073305959, + "Model": "Hoeffding Tree", + "Time": 35141.94 + }, + { + "Accuracy": 0.6850341552027551, + "Dataset": "Insects", + "MacroF1": 0.6793184268681459, + "Memory": 1775086, + "MicroF1": 0.6850341552027551, + "Model": "Leveraging Bagging", + "Time": 1219782.581 + }, + { + "Accuracy": 0.5068972694760346, + "Dataset": "Insects", + "MacroF1": 0.4930190627831494, + "Memory": 115338, + "MicroF1": 0.5068972694760346, + "Model": "Naive Bayes", + "Time": 18087.676 + }, + { + "Accuracy": 0.751792911612769, + "Dataset": "Insects", + "MacroF1": 0.7498238877852431, + "Memory": 2983236, + "MicroF1": 0.7517929116127688, + "Model": "Stacking", + "Time": 205589.696 + }, + { + "Accuracy": 0.7378091471606714, + "Dataset": "Insects", + "MacroF1": 0.7359988196057962, + "Memory": 2531311, + "MicroF1": 0.7378091471606714, + "Model": "Streaming Random Patches", + "Time": 1067820.552 + }, + { + "Accuracy": 0.2897610081934642, + "Dataset": "Insects", + "MacroF1": 0.2897627257031321, + "Memory": 1454, + "MicroF1": 0.2897610081934642, + "Model": "[baseline] Last Class", + "Time": 1598.511 + }, + { + "Accuracy": 0.6868317974530248, + "Dataset": "Insects", + "MacroF1": 0.6839236226719291, + "Memory": 227091, + "MicroF1": 0.6868317974530248, + "Model": "k-Nearest Neighbors", + "Time": 56699.578 + }, + { + "Accuracy": 0.7196921417716555, + "Dataset": "Keystroke", + "MacroF1": 0.721416487495366, + "Memory": 9083268, + "MicroF1": 0.7196921417716555, + "Model": "ADWIN Bagging", + "Time": 595706.964 + }, + { + "Accuracy": 0.8415608608265112, + "Dataset": "Keystroke", + "MacroF1": 0.8430678719218747, + "Memory": 38613262, + "MicroF1": 0.841560860826511, + "Model": "AdaBoost", + "Time": 672862.45 + }, + { + "Accuracy": 0.9691651551546644, + "Dataset": "Keystroke", + "MacroF1": 0.9691813964225685, + "Memory": 976550, + "MicroF1": 0.9691651551546644, + "Model": "Adaptive Random Forest", + "Time": 33811.16 + }, + { + "Accuracy": 0.6679739202902103, + "Dataset": "Keystroke", + "MacroF1": 0.6688529665037398, + "Memory": 10833907, + "MicroF1": 0.6679739202902103, + "Model": "Bagging", + "Time": 514038.001 + }, + { + "Accuracy": 0.856267464091377, + "Dataset": "Keystroke", + "MacroF1": 0.8560901018523239, + "Memory": 10480902, + "MicroF1": 0.856267464091377, + "Model": "Extremely Fast Decision Tree", + "Time": 537981.641 + }, + { + "Accuracy": 0.729398499926467, + "Dataset": "Keystroke", + "MacroF1": 0.7281138823431088, + "Memory": 163688, + "MicroF1": 0.7293984999264669, + "Model": "Hoeffding Adaptive Tree", + "Time": 53354.762 + }, + { + "Accuracy": 0.6482180499044071, + "Dataset": "Keystroke", + "MacroF1": 0.6472493759146579, + "Memory": 1142454, + "MicroF1": 0.6482180499044071, + "Model": "Hoeffding Tree", + "Time": 51814.5 + }, + { + "Accuracy": 0.9525957154762489, + "Dataset": "Keystroke", + "MacroF1": 0.9526888505135084, + "Memory": 1136395, + "MicroF1": 0.9525957154762489, + "Model": "Leveraging Bagging", + "Time": 163893.914 + }, + { + "Accuracy": 0.6525319868621011, + "Dataset": "Keystroke", + "MacroF1": 0.6515767870317882, + "Memory": 906211, + "MicroF1": 0.6525319868621011, + "Model": "Naive Bayes", + "Time": 26760.67 + }, + { + "Accuracy": 0.9763713907544488, + "Dataset": "Keystroke", + "MacroF1": 0.976366524785322, + "Memory": 3653482, + "MicroF1": 0.9763713907544488, + "Model": "Stacking", + "Time": 128219.644 + }, + { + "Accuracy": 0.9494092847688612, + "Dataset": "Keystroke", + "MacroF1": 0.9494668179502542, + "Memory": 13490486, + "MicroF1": 0.9494092847688612, + "Model": "Streaming Random Patches", + "Time": 228579.135 + }, + { + "Accuracy": 0.9975488994558557, + "Dataset": "Keystroke", + "MacroF1": 0.9975489582566449, + "Memory": 5287, + "MicroF1": 0.9975488994558557, + "Model": "[baseline] Last Class", + "Time": 652.881 + }, + { + "Accuracy": 0.9845090445610079, + "Dataset": "Keystroke", + "MacroF1": 0.984507607652182, + "Memory": 224560, + "MicroF1": 0.9845090445610079, + "Model": "k-Nearest Neighbors", + "Time": 20648.639 + }, + { + "Accuracy": 0.8033780857514076, + "Dataset": "ImageSegments", + "MacroF1": 0.7949621132813502, + "Memory": 322233, + "MicroF1": 0.8033780857514076, + "Model": "Voting", + "Time": 3296.878 + }, + { + "Accuracy": 0.6482297954472345, + "Dataset": "Insects", + "MacroF1": 0.6362223941753196, + "Memory": 985469, + "MicroF1": 0.6482297954472345, + "Model": "Voting", + "Time": 118584.905 + }, + { + "Accuracy": 0.793274180106868, + "Dataset": "Keystroke", + "MacroF1": 0.7984237858213096, + "Memory": 2391436, + "MicroF1": 0.793274180106868, + "Model": "Voting", + "Time": 106985.492 + } + ], + "Regression": [ + { + "Dataset": "Friedman7k", + "MAE": 2.266797239461916, + "Memory": 29614147, + "Model": "Adaptive Model Rules", + "R2": 0.6512347492119022, + "RMSE": 2.925569796880849, + "Time": 9688.85 + }, + { + "Dataset": "Friedman7k", + "MAE": 2.0715443914050855, + "Memory": 39110662, + "Model": "Adaptive Random Forest", + "R2": 0.7027053826578435, + "RMSE": 2.701079601225446, + "Time": 10662.744 + }, + { + "Dataset": "Friedman7k", + "MAE": 2.322574690437379, + "Memory": 13987893, + "Model": "Exponentially Weighted Average", + "R2": 0.6347937556123011, + "RMSE": 2.9937322603692325, + "Time": 15044.119 + }, + { + "Dataset": "Friedman7k", + "MAE": 1.9327832857630483, + "Memory": 8273250, + "Model": "Hoeffding Adaptive Tree", + "R2": 0.7386068659475168, + "RMSE": 2.5327419749635567, + "Time": 2502.669 + }, + { + "Dataset": "Friedman7k", + "MAE": 2.020332317083193, + "Memory": 10966826, + "Model": "Hoeffding Tree", + "R2": 0.7192800400505797, + "RMSE": 2.6247050487456343, + "Time": 2345.215 + }, + { + "Dataset": "Friedman7k", + "MAE": 2.23757758349508, + "Memory": 5447, + "Model": "Linear Regression", + "R2": 0.6549674406764808, + "RMSE": 2.9098720954400448, + "Time": 223.047 + }, + { + "Dataset": "Friedman7k", + "MAE": 2.3552263510654856, + "Memory": 5689, + "Model": "Linear Regression with l1 regularization", + "R2": 0.6357810009521413, + "RMSE": 2.989683112289033, + "Time": 254.21 + }, + { + "Dataset": "Friedman7k", + "MAE": 2.5425853022401457, + "Memory": 5471, + "Model": "Linear Regression with l2 regularization", + "R2": 0.5818589220275194, + "RMSE": 3.203356525801635, + "Time": 232.907 + }, + { + "Dataset": "Friedman7k", + "MAE": 2.146869030920678, + "Memory": 12091, + "Model": "Multi-layer Perceptron", + "R2": 0.6318109228679125, + "RMSE": 3.0059330965111926, + "Time": 3184.292 + }, + { + "Dataset": "Friedman7k", + "MAE": 6.016255319658246, + "Memory": 4983, + "Model": "Passive-Aggressive Regressor, mode 1", + "R2": -1.347670503657537, + "RMSE": 7.590361003168411, + "Time": 255.695 + }, + { + "Dataset": "Friedman7k", + "MAE": 10.12033002328945, + "Memory": 4983, + "Model": "Passive-Aggressive Regressor, mode 2", + "R2": -5.562891455099383, + "RMSE": 12.690872201368217, + "Time": 255.824 + }, + { + "Dataset": "Friedman7k", + "MAE": 3.209324662171133, + "Memory": 20969242, + "Model": "Stochastic Gradient Tree", + "R2": 0.21521551831389918, + "RMSE": 4.388529917508489, + "Time": 2329.924 + }, + { + "Dataset": "Friedman7k", + "MAE": 1.5644434179256224, + "Memory": 68660013, + "Model": "Streaming Random Patches", + "R2": 0.829542788638389, + "RMSE": 2.0452742452942285, + "Time": 27158.253 + }, + { + "Dataset": "Friedman7k", + "MAE": 4.02148215121205, + "Memory": 514, + "Model": "[baseline] Mean predictor", + "R2": -0.0014847455535940135, + "RMSE": 4.957537743224416, + "Time": 98.273 + }, + { + "Dataset": "Friedman7k", + "MAE": 2.8164610154113827, + "Memory": 78397, + "Model": "k-Nearest Neighbors", + "R2": 0.4947712815895522, + "RMSE": 3.5211771463760955, + "Time": 1810.138 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 2.3742139385673555, + "Memory": 5969323, + "Model": "Adaptive Model Rules", + "R2": 0.6239955593702382, + "RMSE": 3.057752290417493, + "Time": 12008.389 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 2.4022303284252065, + "Memory": 56428418, + "Model": "Adaptive Random Forest", + "R2": 0.604885075330712, + "RMSE": 3.1344946322058447, + "Time": 16220.623 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 2.3899049048143155, + "Memory": 14295945, + "Model": "Exponentially Weighted Average", + "R2": 0.6208260554075504, + "RMSE": 3.070612803488889, + "Time": 22886.661 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 2.10760176283711, + "Memory": 13810638, + "Model": "Hoeffding Adaptive Tree", + "R2": 0.6874032423195169, + "RMSE": 2.7880338847278057, + "Time": 3703.04 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 2.1330045182145754, + "Memory": 16366574, + "Model": "Hoeffding Tree", + "R2": 0.6831420762147579, + "RMSE": 2.8069721211121186, + "Time": 3397.315 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 2.2897103473066593, + "Memory": 5447, + "Model": "Linear Regression", + "R2": 0.6487787967935958, + "RMSE": 2.9552632714772367, + "Time": 322.822 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 2.5037061545634054, + "Memory": 5689, + "Model": "Linear Regression with l1 regularization", + "R2": 0.6003176271746284, + "RMSE": 3.1525596354441774, + "Time": 367.055 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 2.5905961448519443, + "Memory": 5471, + "Model": "Linear Regression with l2 regularization", + "R2": 0.5766652320365953, + "RMSE": 3.244500027385861, + "Time": 347.911 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 2.337595204250624, + "Memory": 12091, + "Model": "Multi-layer Perceptron", + "R2": 0.6050096867242896, + "RMSE": 3.134000314576819, + "Time": 4545.582 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 6.044287566711288, + "Memory": 4983, + "Model": "Passive-Aggressive Regressor, mode 1", + "R2": -1.3389219567464345, + "RMSE": 7.6262963690899035, + "Time": 369.261 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 10.115920115372026, + "Memory": 4983, + "Model": "Passive-Aggressive Regressor, mode 2", + "R2": -5.511166055168164, + "RMSE": 12.724338057614846, + "Time": 366.968 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 3.720464233481201, + "Memory": 27632354, + "Model": "Stochastic Gradient Tree", + "R2": 0.042238177662453746, + "RMSE": 4.880165764242603, + "Time": 3333.67 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 2.0720985521286743, + "Memory": 98468277, + "Model": "Streaming Random Patches", + "R2": 0.7039148417442476, + "RMSE": 2.7134019468515786, + "Time": 40688.375 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 4.056565397244311, + "Memory": 514, + "Model": "[baseline] Mean predictor", + "R2": -0.001062765801945309, + "RMSE": 4.989263800502261, + "Time": 142.935 + }, + { + "Dataset": "FriedmanGSG10k", + "MAE": 2.875975531851087, + "Memory": 78397, + "Model": "k-Nearest Neighbors", + "R2": 0.4857931546887805, + "RMSE": 3.5758125153022364, + "Time": 2586.037 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 2.4616387076740853, + "Memory": 8993671, + "Model": "Adaptive Model Rules", + "R2": 0.6220300776226204, + "RMSE": 3.2841672209717414, + "Time": 9648.793 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 2.2549031139652906, + "Memory": 56216102, + "Model": "Adaptive Random Forest", + "R2": 0.6706281169960582, + "RMSE": 3.065772347776576, + "Time": 16268.761 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 2.5118819600036697, + "Memory": 18419141, + "Model": "Exponentially Weighted Average", + "R2": 0.6071824293628496, + "RMSE": 3.3480512254053356, + "Time": 22065.324 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 2.1377401292260303, + "Memory": 12463062, + "Model": "Hoeffding Adaptive Tree", + "R2": 0.7116121602774277, + "RMSE": 2.8686998670541857, + "Time": 3609.104 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 2.162783803675369, + "Memory": 12437074, + "Model": "Hoeffding Tree", + "R2": 0.701638258823956, + "RMSE": 2.917885244710447, + "Time": 3416.623 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 2.521459611350154, + "Memory": 5447, + "Model": "Linear Regression", + "R2": 0.6037965552608275, + "RMSE": 3.3624494572844275, + "Time": 324.519 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 2.640726178462324, + "Memory": 5689, + "Model": "Linear Regression with l1 regularization", + "R2": 0.5768193415984122, + "RMSE": 3.4750379066060555, + "Time": 369.549 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 2.804691270416271, + "Memory": 5471, + "Model": "Linear Regression with l2 regularization", + "R2": 0.539341677314148, + "RMSE": 3.6256518778679254, + "Time": 339.582 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 2.4022330927696007, + "Memory": 12091, + "Model": "Multi-layer Perceptron", + "R2": 0.6097219798337612, + "RMSE": 3.3372111657174504, + "Time": 4561.495 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 6.210896502344477, + "Memory": 4983, + "Model": "Passive-Aggressive Regressor, mode 1", + "R2": -1.151193121824162, + "RMSE": 7.834952027785555, + "Time": 372.566 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 10.42075346727717, + "Memory": 4983, + "Model": "Passive-Aggressive Regressor, mode 2", + "R2": -5.005052424437335, + "RMSE": 13.090464069276372, + "Time": 376.903 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 3.246544597341542, + "Memory": 27020122, + "Model": "Stochastic Gradient Tree", + "R2": 0.29830631914660743, + "RMSE": 4.474766974153515, + "Time": 3255.706 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 1.75766823002308, + "Memory": 97572397, + "Model": "Streaming Random Patches", + "R2": 0.7904680325003055, + "RMSE": 2.4452416632013763, + "Time": 40265.714 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 4.2928283270418905, + "Memory": 514, + "Model": "[baseline] Mean predictor", + "R2": -0.0009442770949354973, + "RMSE": 5.344432444452069, + "Time": 146.237 + }, + { + "Dataset": "FriedmanLEA10k", + "MAE": 3.1012647780473994, + "Memory": 78397, + "Model": "k-Nearest Neighbors", + "R2": 0.44465419180979704, + "RMSE": 3.9808736209297644, + "Time": 2586.896 + }, + { + "Dataset": "TrumpApproval", + "MAE": 1.0233245255093981, + "Memory": 1932923, + "Model": "Adaptive Model Rules", + "R2": -0.7336119671283983, + "RMSE": 2.252200758055532, + "Time": 350.808 + }, + { + "Dataset": "TrumpApproval", + "MAE": 1.1349189151719374, + "Memory": 2410422, + "Model": "Adaptive Random Forest", + "R2": -3.132109663766241, + "RMSE": 3.4770991244853735, + "Time": 1525.138 + }, + { + "Dataset": "TrumpApproval", + "MAE": 40.75458054545452, + "Memory": 2792633, + "Model": "Exponentially Weighted Average", + "R2": -567.6629514867817, + "RMSE": 40.7904615623717, + "Time": 1032.473 + }, + { + "Dataset": "TrumpApproval", + "MAE": 1.140435026253614, + "Memory": 547010, + "Model": "Hoeffding Adaptive Tree", + "R2": -3.7114560390466327, + "RMSE": 3.712861283297428, + "Time": 390.924 + }, + { + "Dataset": "TrumpApproval", + "MAE": 1.0688915883104473, + "Memory": 542990, + "Model": "Hoeffding Tree", + "R2": -3.483359889458873, + "RMSE": 3.621870793254918, + "Time": 374.42 + }, + { + "Dataset": "TrumpApproval", + "MAE": 1.3474338935927912, + "Memory": 5215, + "Model": "Linear Regression", + "R2": -4.81891868547912, + "RMSE": 4.126219207359161, + "Time": 27.848 + }, + { + "Dataset": "TrumpApproval", + "MAE": 1.2151577407875496, + "Memory": 5457, + "Model": "Linear Regression with l1 regularization", + "R2": -4.650904180700232, + "RMSE": 4.0662129936129725, + "Time": 30.516 + }, + { + "Dataset": "TrumpApproval", + "MAE": 1.9978419034436667, + "Memory": 5239, + "Model": "Linear Regression with l2 regularization", + "R2": -5.640263007309195, + "RMSE": 4.407819407941372, + "Time": 27.93 + }, + { + "Dataset": "TrumpApproval", + "MAE": 1.5898274221188347, + "Memory": 11583, + "Model": "Multi-layer Perceptron", + "R2": -8.045077068340989, + "RMSE": 5.144430305753038, + "Time": 389.321 + }, + { + "Dataset": "TrumpApproval", + "MAE": 4.903983530526025, + "Memory": 4651, + "Model": "Passive-Aggressive Regressor, mode 1", + "R2": -14.171985226958702, + "RMSE": 6.662732200837991, + "Time": 31.427 + }, + { + "Dataset": "TrumpApproval", + "MAE": 31.12616606921402, + "Memory": 4651, + "Model": "Passive-Aggressive Regressor, mode 2", + "R2": -403.916378910996, + "RMSE": 34.42023446743753, + "Time": 30.932 + }, + { + "Dataset": "TrumpApproval", + "MAE": 9.429746533156267, + "Memory": 2116974, + "Model": "Stochastic Gradient Tree", + "R2": -108.97151968967047, + "RMSE": 17.937886241411594, + "Time": 158.125 + }, + { + "Dataset": "TrumpApproval", + "MAE": 1.1714913650238603, + "Memory": 1800701, + "Model": "Streaming Random Patches", + "R2": -1.6041644181648174, + "RMSE": 2.7603576294334173, + "Time": 4328.816 + }, + { + "Dataset": "TrumpApproval", + "MAE": 1.567554989468773, + "Memory": 514, + "Model": "[baseline] Mean predictor", + "R2": -0.6584830635688459, + "RMSE": 2.202858861923226, + "Time": 12.714 + }, + { + "Dataset": "TrumpApproval", + "MAE": 0.49369847918747883, + "Memory": 69121, + "Model": "k-Nearest Neighbors", + "R2": 0.22347386899695654, + "RMSE": 1.5073329387274894, + "Time": 186.799 + } + ] +} \ No newline at end of file diff --git a/benchmarks/run.py b/benchmarks/run.py new file mode 100644 index 0000000000..7783aa4760 --- /dev/null +++ b/benchmarks/run.py @@ -0,0 +1,183 @@ +import datetime +import json +import shelve +import sys + +from river import ( + drift, + dummy, + ensemble, + evaluate, + linear_model, + metrics, + naive_bayes, + neighbors, +) +from river import neural_net as nn +from river import optim, preprocessing, rules, stats, tree + + +def run_track(models, track, benchmarks): + print(track.name) + if track.name in benchmarks: + completed = set((cr["Dataset"], cr["Model"]) for cr in benchmarks[track.name]) + else: + completed = set() + + for model_name, model in models.items(): + print(f"\t{model_name}") + for dataset in track: + data_name = dataset.__class__.__name__ + if (data_name, model_name) in completed: + print(f"\t\t[skipped] {data_name}") + continue + # Get cached data from the shelf + results = benchmarks[track.name] + res = next(track.run(model, dataset, n_checkpoints=1)) + res["Dataset"] = data_name + res["Model"] = model_name + for k, v in res.items(): + if isinstance(v, metrics.base.Metric): + res[k] = v.get() + res["Time"] = res["Time"] / datetime.timedelta(milliseconds=1) + res.pop("Step") + results.append(res) + + # Writes updated version to the shelf + benchmarks[track.name] = results + print(f"\t\t[done] {data_name}") + + +tracks = [ + evaluate.BinaryClassificationTrack(), + evaluate.MultiClassClassificationTrack(), + evaluate.RegressionTrack(), +] + +models = { + "Binary classification": { + "Logistic regression": preprocessing.StandardScaler() | linear_model.LogisticRegression(), + "ALMA": preprocessing.StandardScaler() | linear_model.ALMAClassifier(), + "Stochastic Gradient Tree": tree.SGTClassifier(), + }, + "Multiclass classification": { + "Naive Bayes": naive_bayes.GaussianNB(), + "Hoeffding Tree": tree.HoeffdingTreeClassifier(), + "Hoeffding Adaptive Tree": tree.HoeffdingAdaptiveTreeClassifier(seed=42), + "Extremely Fast Decision Tree": tree.ExtremelyFastDecisionTreeClassifier(), + "Adaptive Random Forest": ensemble.AdaptiveRandomForestClassifier(seed=42), + "Streaming Random Patches": ensemble.SRPClassifier(), + "k-Nearest Neighbors": preprocessing.StandardScaler() + | neighbors.KNNClassifier(window_size=100), + "ADWIN Bagging": ensemble.ADWINBaggingClassifier( + tree.HoeffdingTreeClassifier(), seed=42 + ), + "AdaBoost": ensemble.AdaBoostClassifier( + tree.HoeffdingTreeClassifier(), seed=42 + ), + "Bagging": ensemble.BaggingClassifier(tree.HoeffdingTreeClassifier(), seed=42), + "Leveraging Bagging": ensemble.LeveragingBaggingClassifier( + tree.HoeffdingTreeClassifier(), seed=42 + ), + "Stacking": ensemble.StackingClassifier( + [ + preprocessing.StandardScaler() | linear_model.SoftmaxRegression(), + naive_bayes.GaussianNB(), + tree.HoeffdingTreeClassifier(), + preprocessing.StandardScaler() + | neighbors.KNNClassifier(window_size=100), + ], + meta_classifier=ensemble.AdaptiveRandomForestClassifier(seed=42), + ), + "Voting": ensemble.VotingClassifier( + [ + preprocessing.StandardScaler() | linear_model.SoftmaxRegression(), + naive_bayes.GaussianNB(), + tree.HoeffdingTreeClassifier(), + preprocessing.StandardScaler() + | neighbors.KNNClassifier(window_size=100), + ] + ), + # Baseline + "[baseline] Last Class": dummy.NoChangeClassifier(), + }, + "Regression": { + "Linear Regression": preprocessing.StandardScaler() + | linear_model.LinearRegression(), + "Linear Regression with l1 regularization": preprocessing.StandardScaler() + | linear_model.LinearRegression(l1=1.0), + "Linear Regression with l2 regularization": preprocessing.StandardScaler() + | linear_model.LinearRegression(l2=1.0), + "Passive-Aggressive Regressor, mode 1": preprocessing.StandardScaler() + | linear_model.PARegressor(mode=1), + "Passive-Aggressive Regressor, mode 2": preprocessing.StandardScaler() + | linear_model.PARegressor(mode=2), + "k-Nearest Neighbors": preprocessing.StandardScaler() + | neighbors.KNNRegressor(window_size=100), + "Hoeffding Tree": preprocessing.StandardScaler() + | tree.HoeffdingAdaptiveTreeRegressor(), + "Hoeffding Adaptive Tree": preprocessing.StandardScaler() + | tree.HoeffdingAdaptiveTreeRegressor(seed=42), + "Stochastic Gradient Tree": tree.SGTRegressor(), + "Adaptive Random Forest": preprocessing.StandardScaler() + | ensemble.AdaptiveRandomForestRegressor(seed=42), + "Adaptive Model Rules": preprocessing.StandardScaler() + | rules.AMRules(drift_detector=drift.ADWIN()), + "Streaming Random Patches": preprocessing.StandardScaler() + | ensemble.SRPRegressor(seed=42), + "Exponentially Weighted Average": preprocessing.StandardScaler() + | ensemble.EWARegressor( + models=[ + linear_model.LinearRegression(), + tree.HoeffdingAdaptiveTreeRegressor(), + neighbors.KNNRegressor(window_size=100), + rules.AMRules(), + ], + ), + "Multi-layer Perceptron": preprocessing.StandardScaler() + | nn.MLPRegressor( + hidden_dims=(5,), + activations=( + nn.activations.ReLU, + nn.activations.ReLU, + nn.activations.Identity, + ), + optimizer=optim.SGD(1e-3), + seed=42, + ), + # Baseline + "[baseline] Mean predictor": dummy.StatisticRegressor(stats.Mean()), + } +} + + +if __name__ == "__main__": + if len(sys.argv) > 1 and sys.argv[1] == "force": + benchmarks = shelve.open("results", flag="n") + else: + benchmarks = shelve.open("results", flag="c") + + models["Binary classification"].update(models["Multiclass classification"]) + details = {} + + for track_name, track in tracks.items(): + run_track( + models=models[track_name], track=track, benchmarks=benchmarks + ) + details[track_name] = { + "Dataset": {}, + "Model": {} + } + for dataset in bin_class_track: + details[track_name]["Dataset"][dataset.__class__.__name__] = repr(dataset) + for model_n, model in bin_class_models.items(): + details[track_name]["Model"][model_n] = repr(model) + + # Close the shelf + benchmarks.close() + + with open('results.json', 'w') as f: + json.dump(benchmarks, f, sort_keys=True, indent=4) + + with open('details.json', 'w') as f: + json.dump(details, f, sort_keys=True, indent=4) diff --git a/docs/.pages b/docs/.pages index 16f4ba50fc..8e2b17193e 100644 --- a/docs/.pages +++ b/docs/.pages @@ -5,3 +5,4 @@ nav: - api - faq - releases + - benchmarks diff --git a/docs/benchmarks.html b/docs/benchmarks.html new file mode 100644 index 0000000000..83f765868a --- /dev/null +++ b/docs/benchmarks.html @@ -0,0 +1,188 @@ + + + + +

Online machine learning benchmarks

+
Python implementation: CPython
+Python version       : 3.9.12
+IPython version      : 7.30.1
+
+river       : 0.10.1
+numpy       : 1.22.3
+scikit-learn: 1.1.0
+pandas      : 1.4.1
+scipy       : 1.8.0
+
+Compiler    : Clang 12.0.1 
+OS          : Darwin
+Release     : 21.3.0
+Machine     : x86_64
+Processor   : i386
+CPU cores   : 8
+Architecture: 64bit
+
+ +

Binary classification

+

Datasets

+

Results

+
+ +

Single-target Regression

+

Datasets

+

Results

+
+ + + diff --git a/docs/benchmarks/.pages b/docs/benchmarks/.pages new file mode 100644 index 0000000000..f9c4795e96 --- /dev/null +++ b/docs/benchmarks/.pages @@ -0,0 +1 @@ +title: Benchmarks 📊 diff --git a/docs/benchmarks/index.md b/docs/benchmarks/index.md new file mode 100644 index 0000000000..00aa087d17 --- /dev/null +++ b/docs/benchmarks/index.md @@ -0,0 +1,2982 @@ +--- +hide: +- navigation +--- + + +# Benchmarks + +## Environment + +
Python implementation: CPython
+Python version       : 3.9.12
+IPython version      : 7.30.1
+
+river       : 0.10.1
+numpy       : 1.22.3
+scikit-learn: 1.1.0
+pandas      : 1.4.1
+scipy       : 1.8.0
+
+Compiler    : Clang 12.0.1 
+OS          : Darwin
+Release     : 21.3.0
+Machine     : x86_64
+Processor   : i386
+CPU cores   : 8
+Architecture: 64bit
+
+ +
+ + +
+ + + +## Binary classification + +### Results + +
+ +### Datasets + +
+ Bananas +
Bananas dataset.
+
+An artificial dataset where instances belongs to several clusters with a banana shape.
+There are two attributes that correspond to the x and y axis, respectively.
+
+    Name  Bananas                                                   
+    Task  Binary classification                                     
+ Samples  5,300                                                     
+Features  2                                                         
+  Sparse  False                                                     
+    Path  /Users/mastelini/Documents/river/river/datasets/banana.zip
+
+ +
+ Phishing +
Phishing websites.
+
+This dataset contains features from web pages that are classified as phishing or not.
+
+    Name  Phishing                                                       
+    Task  Binary classification                                          
+ Samples  1,250                                                          
+Features  9                                                              
+  Sparse  False                                                          
+    Path  /Users/mastelini/Documents/river/river/datasets/phishing.csv.gz
+
+ +### Models + +
+ ADWIN Bagging +
[HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+)]
+
+ +
+ ALMA +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  ALMAClassifier (
+    p=2
+    alpha=0.9
+    B=1.111111
+    C=1.414214
+  )
+)
+
+ +
+ AdaBoost +
[HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+)]
+
+ +
+ Adaptive Random Forest +
[]
+
+ +
+ Bagging +
[HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+)]
+
+ +
+ Extremely Fast Decision Tree +
ExtremelyFastDecisionTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  min_samples_reevaluate=20
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+)
+
+ +
+ Hoeffding Adaptive Tree +
HoeffdingAdaptiveTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  bootstrap_sampling=True
+  drift_window_threshold=300
+  adwin_confidence=0.002
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+  seed=42
+)
+
+ +
+ Hoeffding Tree +
HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+)
+
+ +
+ Leveraging Bagging +
[HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+)]
+
+ +
+ Logistic regression +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  LogisticRegression (
+    optimizer=SGD (
+      lr=Constant (
+        learning_rate=0.01
+      )
+    )
+    loss=Log (
+      weight_pos=1.
+      weight_neg=1.
+    )
+    l2=0.
+    l1=0.
+    intercept_init=0.
+    intercept_lr=Constant (
+      learning_rate=0.01
+    )
+    clip_gradient=1e+12
+    initializer=Zeros ()
+  )
+)
+
+ +
+ Naive Bayes +
GaussianNB ()
+
+ +
+ Stacking +
[Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  SoftmaxRegression (
+    optimizer=SGD (
+      lr=Constant (
+        learning_rate=0.01
+      )
+    )
+    loss=CrossEntropy (
+      class_weight={}
+    )
+    l2=0
+  )
+), GaussianNB (), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  KNNClassifier (
+    n_neighbors=5
+    window_size=100
+    min_distance_keep=0.
+    weighted=True
+    cleanup_every=0
+    distance_func=None
+    softmax=False
+  )
+)]
+
+ +
+ Stochastic Gradient Tree +
SGTClassifier (
+  delta=1e-07
+  grace_period=200
+  init_pred=0.
+  max_depth=inf
+  lambda_value=0.1
+  gamma=1.
+  nominal_attributes=[]
+  feature_quantizer=StaticQuantizer (
+    n_bins=64
+    warm_start=100
+    buckets=None
+  )
+)
+
+ +
+ Streaming Random Patches +
SRPClassifier (
+  model=HoeffdingTreeClassifier (
+    grace_period=50
+    max_depth=inf
+    split_criterion="info_gain"
+    split_confidence=0.01
+    tie_threshold=0.05
+    leaf_prediction="nba"
+    nb_threshold=0
+    nominal_attributes=None
+    splitter=GaussianSplitter (
+      n_splits=10
+    )
+    binary_split=False
+    max_size=100.
+    memory_estimate_period=1000000
+    stop_mem_management=False
+    remove_poor_attrs=False
+    merit_preprune=True
+  )
+  n_models=10
+  subspace_size=0.6
+  training_method="patches"
+  lam=6
+  drift_detector=ADWIN (
+    delta=1e-05
+  )
+  warning_detector=ADWIN (
+    delta=0.0001
+  )
+  disable_detector="off"
+  disable_weighted_vote=False
+  seed=None
+  metric=Accuracy (
+    cm=ConfusionMatrix (
+      classes=[]
+    )
+  )
+)
+
+ +
+ Voting +
VotingClassifier (
+  models=[Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  SoftmaxRegression (
+    optimizer=SGD (
+      lr=Constant (
+        learning_rate=0.01
+      )
+    )
+    loss=CrossEntropy (
+      class_weight={}
+    )
+    l2=0
+  )
+), GaussianNB (), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  KNNClassifier (
+    n_neighbors=5
+    window_size=100
+    min_distance_keep=0.
+    weighted=True
+    cleanup_every=0
+    distance_func=None
+    softmax=False
+  )
+)]
+  use_probabilities=True
+)
+
+ +
+ [baseline] Last Class +
NoChangeClassifier ()
+
+ +
+ k-Nearest Neighbors +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  KNNClassifier (
+    n_neighbors=5
+    window_size=100
+    min_distance_keep=0.
+    weighted=True
+    cleanup_every=0
+    distance_func=None
+    softmax=False
+  )
+)
+
+ + + +## Multiclass classification + +### Results + +
+ +### Datasets + +
+ ImageSegments +
Image segments classification.
+
+This dataset contains features that describe image segments into 7 classes: brickface, sky,
+foliage, cement, window, path, and grass.
+
+    Name  ImageSegments                                                  
+    Task  Multi-class classification                                     
+ Samples  2,310                                                          
+Features  18                                                             
+  Sparse  False                                                          
+    Path  /Users/mastelini/Documents/river/river/datasets/segment.csv.zip
+
+ +
+ Insects +
Insects dataset.
+
+This dataset has different variants, which are:
+
+- abrupt_balanced
+- abrupt_imbalanced
+- gradual_balanced
+- gradual_imbalanced
+- incremental-abrupt_balanced
+- incremental-abrupt_imbalanced
+- incremental-reoccurring_balanced
+- incremental-reoccurring_imbalanced
+- incremental_balanced
+- incremental_imbalanced
+- out-of-control
+
+The number of samples and the difficulty change from one variant to another. The number of
+classes is always the same (6), except for the last variant (24).
+
+      Name  Insects                                                                                 
+      Task  Multi-class classification                                                              
+   Samples  52,848                                                                                  
+  Features  33                                                                                      
+   Classes  6                                                                                       
+    Sparse  False                                                                                   
+      Path  /Users/mastelini/river_data/Insects/INSECTS-abrupt_balanced_norm.arff                   
+       URL  http://sites.labic.icmc.usp.br/vsouza/repository/creme/INSECTS-abrupt_balanced_norm.arff
+      Size  15.66 MB                                                                                
+Downloaded  True                                                                                    
+   Variant  abrupt_balanced                                                                         
+
+Parameters
+----------
+    variant
+        Indicates which variant of the dataset to load.
+
+ +
+ Keystroke +
CMU keystroke dataset.
+
+Users are tasked to type in a password. The task is to determine which user is typing in the
+password.
+
+The only difference with the original dataset is that the "sessionIndex" and "rep" attributes
+have been dropped.
+
+      Name  Keystroke                                                       
+      Task  Multi-class classification                                      
+   Samples  20,400                                                          
+  Features  31                                                              
+    Sparse  False                                                           
+      Path  /Users/mastelini/river_data/Keystroke/DSL-StrongPasswordData.csv
+       URL  http://www.cs.cmu.edu/~keystroke/DSL-StrongPasswordData.csv     
+      Size  4.45 MB                                                         
+Downloaded  True                                                            
+
+ +### Models + +
+ ADWIN Bagging +
[HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+)]
+
+ +
+ AdaBoost +
[HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+)]
+
+ +
+ Adaptive Random Forest +
[]
+
+ +
+ Bagging +
[HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+)]
+
+ +
+ Extremely Fast Decision Tree +
ExtremelyFastDecisionTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  min_samples_reevaluate=20
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+)
+
+ +
+ Hoeffding Adaptive Tree +
HoeffdingAdaptiveTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  bootstrap_sampling=True
+  drift_window_threshold=300
+  adwin_confidence=0.002
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+  seed=42
+)
+
+ +
+ Hoeffding Tree +
HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+)
+
+ +
+ Leveraging Bagging +
[HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+)]
+
+ +
+ Naive Bayes +
GaussianNB ()
+
+ +
+ Stacking +
[Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  SoftmaxRegression (
+    optimizer=SGD (
+      lr=Constant (
+        learning_rate=0.01
+      )
+    )
+    loss=CrossEntropy (
+      class_weight={}
+    )
+    l2=0
+  )
+), GaussianNB (), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  KNNClassifier (
+    n_neighbors=5
+    window_size=100
+    min_distance_keep=0.
+    weighted=True
+    cleanup_every=0
+    distance_func=None
+    softmax=False
+  )
+)]
+
+ +
+ Streaming Random Patches +
SRPClassifier (
+  model=HoeffdingTreeClassifier (
+    grace_period=50
+    max_depth=inf
+    split_criterion="info_gain"
+    split_confidence=0.01
+    tie_threshold=0.05
+    leaf_prediction="nba"
+    nb_threshold=0
+    nominal_attributes=None
+    splitter=GaussianSplitter (
+      n_splits=10
+    )
+    binary_split=False
+    max_size=100.
+    memory_estimate_period=1000000
+    stop_mem_management=False
+    remove_poor_attrs=False
+    merit_preprune=True
+  )
+  n_models=10
+  subspace_size=0.6
+  training_method="patches"
+  lam=6
+  drift_detector=ADWIN (
+    delta=1e-05
+  )
+  warning_detector=ADWIN (
+    delta=0.0001
+  )
+  disable_detector="off"
+  disable_weighted_vote=False
+  seed=None
+  metric=Accuracy (
+    cm=ConfusionMatrix (
+      classes=[]
+    )
+  )
+)
+
+ +
+ Voting +
VotingClassifier (
+  models=[Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  SoftmaxRegression (
+    optimizer=SGD (
+      lr=Constant (
+        learning_rate=0.01
+      )
+    )
+    loss=CrossEntropy (
+      class_weight={}
+    )
+    l2=0
+  )
+), GaussianNB (), HoeffdingTreeClassifier (
+  grace_period=200
+  max_depth=inf
+  split_criterion="info_gain"
+  split_confidence=1e-07
+  tie_threshold=0.05
+  leaf_prediction="nba"
+  nb_threshold=0
+  nominal_attributes=None
+  splitter=GaussianSplitter (
+    n_splits=10
+  )
+  binary_split=False
+  max_size=100.
+  memory_estimate_period=1000000
+  stop_mem_management=False
+  remove_poor_attrs=False
+  merit_preprune=True
+), Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  KNNClassifier (
+    n_neighbors=5
+    window_size=100
+    min_distance_keep=0.
+    weighted=True
+    cleanup_every=0
+    distance_func=None
+    softmax=False
+  )
+)]
+  use_probabilities=True
+)
+
+ +
+ [baseline] Last Class +
NoChangeClassifier ()
+
+ +
+ k-Nearest Neighbors +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  KNNClassifier (
+    n_neighbors=5
+    window_size=100
+    min_distance_keep=0.
+    weighted=True
+    cleanup_every=0
+    distance_func=None
+    softmax=False
+  )
+)
+
+ + + +## Regression + +### Results + +
+ +### Datasets + +
+ Friedman7k +
Sample from the stationary version of the Friedman dataset.
+
+This sample contains 10k instances sampled from the Friedman generator.
+
+    Name  Friedman7k
+    Task  Regression
+ Samples  7,000     
+Features  10        
+  Sparse  False     
+
+ +
+ FriedmanGSG10k +
Sample from the FriedmanGSG generator.
+
+This sample contains 10k instances sampled from the Friedman generator and presents
+global and slow gradual concept drifts that affect the data and happen after
+3.5k and 7k instances. The transition window between different concepts has a length of
+1k instances.
+
+    Name  FriedmanGSG10k
+    Task  Regression    
+ Samples  10,000        
+Features  10            
+  Sparse  False         
+
+ +
+ FriedmanLEA10k +
Sample from the FriedmanLEA generator.
+
+This sample contains 10k instances sampled from the Friedman generator and presents
+local-expanding abrupt concept drifts that locally affect the data and happen after
+2k, 5k, and 8k instances.
+
+    Name  FriedmanLEA10k
+    Task  Regression    
+ Samples  10,000        
+Features  10            
+  Sparse  False         
+
+ +
+ TrumpApproval +
Donald Trump approval ratings.
+
+This dataset was obtained by reshaping the data used by FiveThirtyEight for analyzing Donald
+Trump's approval ratings. It contains 5 features, which are approval ratings collected by
+5 polling agencies. The target is the approval rating from FiveThirtyEight's model. The goal of
+this task is to see if we can reproduce FiveThirtyEight's model.
+
+    Name  TrumpApproval                                                        
+    Task  Regression                                                           
+ Samples  1,001                                                                
+Features  6                                                                    
+  Sparse  False                                                                
+    Path  /Users/mastelini/Documents/river/river/datasets/trump_approval.csv.gz
+
+ +### Models + +
+ Adaptive Model Rules +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  AMRules (
+    n_min=200
+    delta=1e-07
+    tau=0.05
+    pred_type="adaptive"
+    pred_model=LinearRegression (
+      optimizer=SGD (
+        lr=Constant (
+          learning_rate=0.01
+        )
+      )
+      loss=Squared ()
+      l2=0.
+      l1=0.
+      intercept_init=0.
+      intercept_lr=Constant (
+        learning_rate=0.01
+      )
+      clip_gradient=1e+12
+      initializer=Zeros ()
+    )
+    splitter=EBSTSplitter ()
+    drift_detector=ADWIN (
+      delta=0.002
+    )
+    alpha=0.99
+    anomaly_threshold=-0.75
+    m_min=30
+    ordered_rule_set=True
+    min_samples_split=5
+  )
+)
+
+ +
+ Adaptive Random Forest +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  []
+)
+
+ +
+ Exponentially Weighted Average +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  [LinearRegression (
+    optimizer=SGD (
+      lr=Constant (
+        learning_rate=0.01
+      )
+    )
+    loss=Squared ()
+    l2=0.
+    l1=0.
+    intercept_init=0.
+    intercept_lr=Constant (
+      learning_rate=0.01
+    )
+    clip_gradient=1e+12
+    initializer=Zeros ()
+  ), HoeffdingAdaptiveTreeRegressor (
+    grace_period=200
+    max_depth=inf
+    split_confidence=1e-07
+    tie_threshold=0.05
+    leaf_prediction="model"
+    leaf_model=LinearRegression (
+      optimizer=SGD (
+        lr=Constant (
+          learning_rate=0.01
+        )
+      )
+      loss=Squared ()
+      l2=0.
+      l1=0.
+      intercept_init=0.
+      intercept_lr=Constant (
+        learning_rate=0.01
+      )
+      clip_gradient=1e+12
+      initializer=Zeros ()
+    )
+    model_selector_decay=0.95
+    nominal_attributes=None
+    splitter=EBSTSplitter ()
+    min_samples_split=5
+    bootstrap_sampling=True
+    drift_window_threshold=300
+    adwin_confidence=0.002
+    binary_split=False
+    max_size=500.
+    memory_estimate_period=1000000
+    stop_mem_management=False
+    remove_poor_attrs=False
+    merit_preprune=True
+    seed=None
+  ), KNNRegressor (
+    n_neighbors=5
+    window_size=100
+    aggregation_method="mean"
+    min_distance_keep=0.
+    distance_func=None
+  ), AMRules (
+    n_min=200
+    delta=1e-07
+    tau=0.05
+    pred_type="adaptive"
+    pred_model=LinearRegression (
+      optimizer=SGD (
+        lr=Constant (
+          learning_rate=0.01
+        )
+      )
+      loss=Squared ()
+      l2=0.
+      l1=0.
+      intercept_init=0.
+      intercept_lr=Constant (
+        learning_rate=0.01
+      )
+      clip_gradient=1e+12
+      initializer=Zeros ()
+    )
+    splitter=EBSTSplitter ()
+    drift_detector=PageHinkley (
+      min_instances=30
+      delta=0.005
+      threshold=50
+      alpha=0.9999
+    )
+    alpha=0.99
+    anomaly_threshold=-0.75
+    m_min=30
+    ordered_rule_set=True
+    min_samples_split=5
+  )]
+)
+
+ +
+ Hoeffding Adaptive Tree +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  HoeffdingAdaptiveTreeRegressor (
+    grace_period=200
+    max_depth=inf
+    split_confidence=1e-07
+    tie_threshold=0.05
+    leaf_prediction="model"
+    leaf_model=LinearRegression (
+      optimizer=SGD (
+        lr=Constant (
+          learning_rate=0.01
+        )
+      )
+      loss=Squared ()
+      l2=0.
+      l1=0.
+      intercept_init=0.
+      intercept_lr=Constant (
+        learning_rate=0.01
+      )
+      clip_gradient=1e+12
+      initializer=Zeros ()
+    )
+    model_selector_decay=0.95
+    nominal_attributes=None
+    splitter=EBSTSplitter ()
+    min_samples_split=5
+    bootstrap_sampling=True
+    drift_window_threshold=300
+    adwin_confidence=0.002
+    binary_split=False
+    max_size=500.
+    memory_estimate_period=1000000
+    stop_mem_management=False
+    remove_poor_attrs=False
+    merit_preprune=True
+    seed=42
+  )
+)
+
+ +
+ Hoeffding Tree +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  HoeffdingAdaptiveTreeRegressor (
+    grace_period=200
+    max_depth=inf
+    split_confidence=1e-07
+    tie_threshold=0.05
+    leaf_prediction="model"
+    leaf_model=LinearRegression (
+      optimizer=SGD (
+        lr=Constant (
+          learning_rate=0.01
+        )
+      )
+      loss=Squared ()
+      l2=0.
+      l1=0.
+      intercept_init=0.
+      intercept_lr=Constant (
+        learning_rate=0.01
+      )
+      clip_gradient=1e+12
+      initializer=Zeros ()
+    )
+    model_selector_decay=0.95
+    nominal_attributes=None
+    splitter=EBSTSplitter ()
+    min_samples_split=5
+    bootstrap_sampling=True
+    drift_window_threshold=300
+    adwin_confidence=0.002
+    binary_split=False
+    max_size=500.
+    memory_estimate_period=1000000
+    stop_mem_management=False
+    remove_poor_attrs=False
+    merit_preprune=True
+    seed=None
+  )
+)
+
+ +
+ Linear Regression +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  LinearRegression (
+    optimizer=SGD (
+      lr=Constant (
+        learning_rate=0.01
+      )
+    )
+    loss=Squared ()
+    l2=0.
+    l1=0.
+    intercept_init=0.
+    intercept_lr=Constant (
+      learning_rate=0.01
+    )
+    clip_gradient=1e+12
+    initializer=Zeros ()
+  )
+)
+
+ +
+ Linear Regression with l1 regularization +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  LinearRegression (
+    optimizer=SGD (
+      lr=Constant (
+        learning_rate=0.01
+      )
+    )
+    loss=Squared ()
+    l2=0.
+    l1=1.
+    intercept_init=0.
+    intercept_lr=Constant (
+      learning_rate=0.01
+    )
+    clip_gradient=1e+12
+    initializer=Zeros ()
+  )
+)
+
+ +
+ Linear Regression with l2 regularization +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  LinearRegression (
+    optimizer=SGD (
+      lr=Constant (
+        learning_rate=0.01
+      )
+    )
+    loss=Squared ()
+    l2=1.
+    l1=0.
+    intercept_init=0.
+    intercept_lr=Constant (
+      learning_rate=0.01
+    )
+    clip_gradient=1e+12
+    initializer=Zeros ()
+  )
+)
+
+ +
+ Multi-layer Perceptron +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  MLPRegressor (
+    hidden_dims=(5,)
+    activations=(<class 'river.neural_net.activations.ReLU'>, <class 'river.neural_net.activations.ReLU'>, <class 'river.neural_net.activations.Identity'>)
+    loss=Squared ()
+    optimizer=SGD (
+      lr=Constant (
+        learning_rate=0.001
+      )
+    )
+    seed=42
+  )
+)
+
+ +
+ Passive-Aggressive Regressor, mode 1 +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  PARegressor (
+    C=1.
+    mode=1
+    eps=0.1
+    learn_intercept=True
+  )
+)
+
+ +
+ Passive-Aggressive Regressor, mode 2 +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  PARegressor (
+    C=1.
+    mode=2
+    eps=0.1
+    learn_intercept=True
+  )
+)
+
+ +
+ Stochastic Gradient Tree +
SGTRegressor (
+  delta=1e-07
+  grace_period=200
+  init_pred=0.
+  max_depth=inf
+  lambda_value=0.1
+  gamma=1.
+  nominal_attributes=[]
+  feature_quantizer=StaticQuantizer (
+    n_bins=64
+    warm_start=100
+    buckets=None
+  )
+)
+
+ +
+ Streaming Random Patches +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  SRPRegressor (
+    model=HoeffdingTreeRegressor (
+      grace_period=50
+      max_depth=inf
+      split_confidence=0.01
+      tie_threshold=0.05
+      leaf_prediction="model"
+      leaf_model=LinearRegression (
+        optimizer=SGD (
+          lr=Constant (
+            learning_rate=0.01
+          )
+        )
+        loss=Squared ()
+        l2=0.
+        l1=0.
+        intercept_init=0.
+        intercept_lr=Constant (
+          learning_rate=0.01
+        )
+        clip_gradient=1e+12
+        initializer=Zeros ()
+      )
+      model_selector_decay=0.95
+      nominal_attributes=None
+      splitter=EBSTSplitter ()
+      min_samples_split=5
+      binary_split=False
+      max_size=500.
+      memory_estimate_period=1000000
+      stop_mem_management=False
+      remove_poor_attrs=False
+      merit_preprune=True
+    )
+    n_models=10
+    subspace_size=0.6
+    training_method="patches"
+    lam=6
+    drift_detector=ADWIN (
+      delta=1e-05
+    )
+    warning_detector=ADWIN (
+      delta=0.0001
+    )
+    disable_detector="off"
+    disable_weighted_vote=True
+    drift_detection_criteria="error"
+    aggregation_method="mean"
+    seed=42
+    metric=MAE ()
+  )
+)
+
+ +
+ [baseline] Mean predictor +
StatisticRegressor (
+  statistic=Mean ()
+)
+
+ +
+ k-Nearest Neighbors +
Pipeline (
+  StandardScaler (
+    with_std=True
+  ),
+  KNNRegressor (
+    n_neighbors=5
+    window_size=100
+    aggregation_method="mean"
+    min_distance_keep=0.
+    distance_func=None
+  )
+)
+
+ + + diff --git a/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_12_0.svg b/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_12_0.svg index 3d561b17a6..2761002497 100644 --- a/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_12_0.svg +++ b/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_12_0.svg @@ -1,7 +1,7 @@ - 0 - + empty_server_form_handler @@ -26,22 +26,22 @@ 0->1 - - + + ≤ 0.5455 2 - + popup_window 0->2 - - -> 0.5455 + + +> 0.5455 @@ -55,9 +55,9 @@ 2->3 - - -≤ 0.0909 + + +≤ 0.0909 @@ -71,9 +71,9 @@ 2->4 - - -> 0.0909 + + +> 0.0909 diff --git a/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_23_0.png b/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_23_0.png index 98ef1e0b11..981054b9ac 100644 Binary files a/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_23_0.png and b/docs/recipes/on-hoeffding-trees_files/on-hoeffding-trees_23_0.png differ diff --git a/mkdocs.yml b/mkdocs.yml index 3cfe3dca28..58eacc9ff3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -28,7 +28,6 @@ theme: features: - navigation.tabs - navigation.instant - - navigation.expand - navigation.indexes # Extras diff --git a/render.py b/render.py deleted file mode 100644 index b9ab2eb5e5..0000000000 --- a/render.py +++ /dev/null @@ -1,27 +0,0 @@ -import json -from dominate.tags import * - -with open('results.json') as f: - benchmarks = json.load(f) - -_html = html() -_html.add(link(href="https://unpkg.com/tabulator-tables@5.2.6/dist/css/tabulator.min.css", rel="stylesheet")) -_html.add(script(type="text/javascript", src="https://unpkg.com/tabulator-tables@5.2.6/dist/js/tabulator.min.js")) - -_body = _html.add(body()) - -for track_name, results in benchmarks.items(): - _body.add(h2(track_name)) - _body.add(div(id=f"results")) - _body.add(script(f""" - var results = {results} - - var table = new Tabulator('#results', {{ - data: results, - layout: 'fitColumns', - columns: Object.keys(results[0]).map(x => ({{title: x, field: x}})) - }}) - """)) - -with open('benchmarks.html', 'w') as f: - print(_html, file=f) diff --git a/results.json b/results.json deleted file mode 100644 index 4fe17bce7c..0000000000 --- a/results.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "Binary classification": [ - { - "Accuracy": 0.892, - "Dataset": "Phishing", - "F1": 0.8789237668161435, - "Memory": 5811, - "Model": "Logisitic regression", - "Step": 1250, - "Time": 0 - }, - { - "Accuracy": 0.5373584905660377, - "Dataset": "Bananas", - "F1": 0.22109275730622616, - "Memory": 4423, - "Model": "Logisitic regression", - "Step": 5300, - "Time": 0 - } - ] -} \ No newline at end of file diff --git a/river/ensemble/streaming_random_patches.py b/river/ensemble/streaming_random_patches.py index 7d7898d523..a1f1c5233d 100644 --- a/river/ensemble/streaming_random_patches.py +++ b/river/ensemble/streaming_random_patches.py @@ -115,7 +115,7 @@ def learn_one(self, x: dict, y: base.typing.Target, **kwargs): def _generate_subspaces(self, features: list): n_features = len(features) - self._subspaces = [None] * n_features + self._subspaces = [None] * self.n_models if self.training_method != self._TRAIN_RESAMPLING: # Set subspaces - This only applies to subspaces and random patches options @@ -146,7 +146,7 @@ def _generate_subspaces(self, features: list): # k is negative, calculate M - k k = n_features + k - # Generate subspaces. The subspaces is a 2D array of shape + # 2. Generate subspaces. The subspaces is a 2D array of shape # (n_estimators, k) where each row contains the k-feature indices # to be used by each estimator. if k != 0 and k < n_features: diff --git a/river/ensemble/voting.py b/river/ensemble/voting.py index 2ec6c1e682..44de49d73e 100644 --- a/river/ensemble/voting.py +++ b/river/ensemble/voting.py @@ -71,7 +71,7 @@ def predict_one(self, x): agg = collections.Counter() for vote in votes: agg.update(vote) - return agg.most_common(1)[0][0] + return agg.most_common(1)[0][0] if agg else None @classmethod def _unit_test_params(cls): diff --git a/river/evaluate/__init__.py b/river/evaluate/__init__.py index 8317b3478a..7576b74fab 100644 --- a/river/evaluate/__init__.py +++ b/river/evaluate/__init__.py @@ -13,12 +13,19 @@ """ from .progressive_validation import iter_progressive_val_score, progressive_val_score -from .tracks import BinaryClassificationTrack, Track +from .tracks import ( + BinaryClassificationTrack, + MultiClassClassificationTrack, + RegressionTrack, + Track, +) __all__ = [ "load_binary_clf_tracks", "iter_progressive_val_score", "progressive_val_score", "BinaryClassificationTrack", + "MultiClassClassificationTrack", + "RegressionTrack", "Track", ] diff --git a/river/evaluate/gen.py b/river/evaluate/gen.py new file mode 100644 index 0000000000..3b547848c6 --- /dev/null +++ b/river/evaluate/gen.py @@ -0,0 +1,75 @@ +from river import datasets + + +class WrappedGenerator(datasets.base.Dataset): + def __init__(self, n_samples, n_features, task, gen): + super().__init__( + n_samples=n_samples, + n_features=n_features, + task=task, + ) + + self._gen = gen + + def __iter__(self): + for i, (x, y) in enumerate(self._gen): + if i == self.n_samples: + break + yield x, y + + +class Friedman7k(WrappedGenerator): + """Sample from the stationary version of the Friedman dataset. + + This sample contains 10k instances sampled from the Friedman generator. + + """ + + def __init__(self): + super().__init__( + n_samples=7000, + n_features=10, + task=datasets.base.REG, + gen=datasets.synth.Friedman(seed=42), + ) + + +class FriedmanLEA10k(WrappedGenerator): + """Sample from the FriedmanLEA generator. + + This sample contains 10k instances sampled from the Friedman generator and presents + local-expanding abrupt concept drifts that locally affect the data and happen after + 2k, 5k, and 8k instances. + + """ + + def __init__(self): + super().__init__( + n_samples=10000, + n_features=10, + task=datasets.base.REG, + gen=datasets.synth.FriedmanDrift( + drift_type="lea", position=(2000, 5000, 8000), seed=42 + ), + ) + + +class FriedmanGSG10k(WrappedGenerator): + """Sample from the FriedmanGSG generator. + + This sample contains 10k instances sampled from the Friedman generator and presents + global and slow gradual concept drifts that affect the data and happen after + 3.5k and 7k instances. The transition window between different concepts has a length of + 1k instances. + + """ + + def __init__(self): + super().__init__( + n_samples=10000, + n_features=10, + task=datasets.base.REG, + gen=datasets.synth.FriedmanDrift( + drift_type="gsg", position=(3500, 7000), transition_window=1000, seed=42 + ), + ) diff --git a/river/evaluate/tracks.py b/river/evaluate/tracks.py index 989089abc5..d9bee4a8b1 100644 --- a/river/evaluate/tracks.py +++ b/river/evaluate/tracks.py @@ -1,5 +1,7 @@ from river import datasets, evaluate, metrics +from .gen import Friedman7k, FriedmanGSG10k, FriedmanLEA10k + class Track: """A track evaluate a model's performance. @@ -16,9 +18,8 @@ class Track: ---------- name The name of the track. - dataset - The dataset from which samples will be retrieved. A slice must be used if the dataset - is a data generator. + datasets + The datasets that compose the track. metric The metric(s) used to track performance. @@ -50,3 +51,30 @@ def __init__(self): datasets=[datasets.Phishing(), datasets.Bananas()], metric=metrics.Accuracy() + metrics.F1(), ) + + +class MultiClassClassificationTrack(Track): + def __init__(self): + super().__init__( + name="Multiclass classification", + datasets=[ + datasets.ImageSegments(), + datasets.Insects(), + datasets.Keystroke(), + ], + metric=metrics.Accuracy() + metrics.MicroF1() + metrics.MacroF1(), + ) + + +class RegressionTrack(Track): + def __init__(self): + super().__init__( + "Regression", + datasets=[ + datasets.TrumpApproval(), + Friedman7k(), + FriedmanLEA10k(), + FriedmanGSG10k(), + ], + metric=metrics.MAE() + metrics.RMSE() + metrics.R2(), + ) diff --git a/run.py b/run.py deleted file mode 100644 index 83ca77d1cd..0000000000 --- a/run.py +++ /dev/null @@ -1,33 +0,0 @@ -import itertools -import json -from tqdm import tqdm -from river import evaluate -from river import linear_model -from river import metrics -from river import preprocessing -from dominate.tags import * - - -benchmarks = {} - -models = { - "Logisitic regression": preprocessing.StandardScaler() | linear_model.LogisticRegression() -} -track = evaluate.BinaryClassificationTrack() -results = [] - -for model_name, model in models.items(): - for dataset in track: - res = next(track.run(model, dataset, n_checkpoints=1)) - res["Dataset"] = dataset.__class__.__name__ - res["Model"] = model_name - for k, v in res.items(): - if isinstance(v, metrics.base.Metric): - res[k] = v.get() - res["Time"] = res["Time"].seconds - results.append(res) - -benchmarks[track.name] = results - -with open('results.json', 'w') as f: - json.dump(benchmarks, f, sort_keys=True, indent=4) diff --git a/setup.py b/setup.py index 96e8c618aa..5281fdd946 100644 --- a/setup.py +++ b/setup.py @@ -71,7 +71,9 @@ "benckmarks": base_packages + [ "dominate", "scikit-learn", - "torch" + "torch", + "vowpalwabbit", + "slugify" ], "compat": base_packages + [ "scikit-learn",