Merge pull request #16 from dataiku/fix-make

tmwly · web-flow · commit 2eac6b58969d · 2025-05-14T11:38:18.000+02:00
Fix makefile commands, fix webapp display, fix issue with pandas&gt;2
diff --git a/.gitignore b/.gitignore
@@ -1 +1,165 @@
 *.zip
+
+state.json
+*.pyc
+.idea/
+.coverage
+.sonarlint
+htmlcov
+.DS_Store
+
+# DSS specific stuff
+dist/
+.wlock
+venv/
+.ts
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+unit.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Integration tests reports
+tests/allure_report/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+__pycache__
+.python-version
+.pyc
+
+setup.cfg
+.vscode
+
+*.ipynb
+forecast-env/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,10 @@
 # Changelog
 
+## Version 1.1.2 (2025-05-14)
+* Fix issue with webapp not displaying correctly
+* Fix bug in pandas > 2
+* Fix plugin make command
+
 ## Version 1.1.1 (2024-05-07)
 * Scoring recipe: Specify array content type for decision_rules in output schema
 
diff --git a/Makefile b/Makefile
@@ -20,34 +20,17 @@ plugin:
 
 unit-tests:
 	@echo "Running unit tests..."
-	@( \
-		PYTHON_VERSION=`python3 -V 2>&1 | sed 's/[^0-9]*//g' | cut -c 1,2`; \
-		PYTHON_VERSION_IS_CORRECT=`cat code-env/python/desc.json | python3 -c "import sys, json; print(str($$PYTHON_VERSION) in [x[-2:] for x in json.load(sys.stdin)['acceptedPythonInterpreters']]);"`; \
-		if [ $$PYTHON_VERSION_IS_CORRECT == "False" ]; then echo "Python version $$PYTHON_VERSION is not in acceptedPythonInterpreters"; exit 1; else echo "Python version $$PYTHON_VERSION is in acceptedPythonInterpreters"; fi; \
-	)
 	@( \
 		rm -rf ./env/; \
 		python3 -m venv env/; \
 		source env/bin/activate; \
 		pip install --upgrade pip;\
 		pip install --no-cache-dir -r tests/python/unit/requirements.txt; \
-		pip install --no-cache-dir -r code-env/python/spec/requirements.txt; \
 		export PYTHONPATH="$(PYTHONPATH):$(PWD)/python-lib"; \
 		pytest tests/python/unit --alluredir=tests/allure_report || ret=$$?; exit $$ret \
 	)
 
-integration-tests:
-	@echo "Running integration tests..."
-	@( \
-		rm -rf ./env/; \
-		python3 -m venv env/; \
-		source env/bin/activate; \
-		pip3 install --upgrade pip;\
-		pip install --no-cache-dir -r tests/python/integration/requirements.txt; \
-		pytest tests/python/integration --alluredir=tests/allure_report || ret=$$?; exit $$ret \
-	)
-
-tests: unit-tests integration-tests
+tests: unit-tests
 
 dist-clean:
 	rm -rf dist
diff --git a/plugin.json b/plugin.json
@@ -1,6 +1,6 @@
 {
   "id" : "decision-tree-builder",
-  "version" : "1.1.1",
+  "version" : "1.1.2",
   "meta" : {
     "label" : "Interactive Decision Tree Builder",
     "description" : "Build and explore decision trees, and use them to score and evaluate data",
diff --git a/python-lib/dku_idtb_decision_tree/autosplit.py b/python-lib/dku_idtb_decision_tree/autosplit.py
@@ -16,7 +16,8 @@ def cross_entropy(population_distrib, sample):
 def convert_categorical_columns(feature_col, target_col):
     target_distrib = target_col.value_counts(normalize=True)
     entropies = target_col.groupby(feature_col).apply(apply_cross_entropy(target_distrib))
-    return entropies.sort_index()
+    return entropies.sort_index().reset_index(drop=True)
+
 
 def autosplit(df, feature, target, numerical, max_splits):
     if len(df[target].unique()) < 2:
diff --git a/python-lib/dku_idtb_decision_tree/tree.py b/python-lib/dku_idtb_decision_tree/tree.py
@@ -168,7 +168,7 @@ def get_stats_numerical_node(self, column, target_column, mean):
 
         stats = {"bins": [], "mean": column.mean(), "max": column.max(), "min": column.min()}
         bins = pd.cut(column.fillna(mean), bins = min(10, column.nunique()), include_lowest = True, right = False)
-        target_grouped = target_column.groupby(bins)
+        target_grouped = target_column.groupby(bins, observed=False) # explicitly set observed=False to avoid deprecation warning
         target_distrib = target_grouped.apply(lambda x: x.value_counts())
         col_distrib = target_grouped.count()
         for interval, count in col_distrib.items():
diff --git a/tests/python/unit/__init__.py b/tests/python/unit/__init__.py
diff --git a/tests/python/unit/requirements.txt b/tests/python/unit/requirements.txt
@@ -0,0 +1,8 @@
+pytest<8
+allure-pytest==2.8.29
+scikit-learn
+
+scipy==1.2.1; python_version < '3.8'
+scipy==1.10.1; python_version >= '3.8'
+statsmodels==0.12.2; python_version < '3.8'
+statsmodels==0.13.5; python_version >= '3.8'
diff --git a/tests/python/unit/test_autosplit.py b/tests/python/unit/test_autosplit.py
diff --git a/tests/python/unit/test_score.py b/tests/python/unit/test_score.py
@@ -1,8 +1,10 @@
 import pandas as pd
+import numpy as np
 from dku_idtb_scoring.score import add_scoring_columns, get_scored_df_schema
 from dku_idtb_decision_tree.tree import ScoringTree
 from pytest import raises
 
+
 nodes = {
 	"0": {
 		"id": 0,
@@ -71,42 +73,42 @@
 
 def get_input_df():
 	return pd.DataFrame([[.2, "u", "A"],
-						[7, pd.np.nan, "B"],
+						[7, np.nan, "B"],
 						[4, "u", "A"],
 						[3, "v", "A"],
-						[pd.np.nan, "u", "C"]], columns=("num", "cat", "target"))
+						[np.nan, "u", "C"]], columns=("num", "cat", "target"))
 
 def test_score():
 	df = get_input_df()
 	add_scoring_columns(tree, df, True)
 	expected_df = pd.DataFrame([
 		[.2, "u", "A", .8, .2, "A", str(["num < 4"]), 1.0, "hello there"],
-		[7, pd.np.nan, "B", pd.np.nan, pd.np.nan, pd.np.nan, str(["4 ≤ num", "cat not in {}".format(["u", "v"])]), 4.0, "general Kenobi"],
+		[7, np.nan, "B", np.nan, np.nan, np.nan, str(["4 ≤ num", "cat not in {}".format(["u", "v"])]), 4.0, "general Kenobi"],
 		[4, "u", "A", .25, .75, "B", str(["4 ≤ num", "cat in {}".format(["u", "v"])]), 3.0, None],
 		[3, "v", "A", .8, .2, "A", str(["num < 4"]), 1.0, "hello there"],
-		[pd.np.nan, "u", "C", .8, .2, "A", str(["num < 4"]), 1.0, "hello there"]
+		[np.nan, "u", "C", .8, .2, "A", str(["num < 4"]), 1.0, "hello there"]
 	], columns=("num", "cat", "target", "proba_A", "proba_B", "prediction", "decision_rule", "leaf_id", "label"))
 	pd.testing.assert_frame_equal(df, expected_df)
 
 	df = get_input_df()
 	add_scoring_columns(tree, df, False, True, False)
 	expected_df = pd.DataFrame([
 		[.2, "u", "A", "A", str(["num < 4"]), 1.0, "hello there"],
-		[7, pd.np.nan, "B", pd.np.nan, str(["4 ≤ num", "cat not in {}".format(["u", "v"])]), 4.0, "general Kenobi"],
+		[7, np.nan, "B", np.nan, str(["4 ≤ num", "cat not in {}".format(["u", "v"])]), 4.0, "general Kenobi"],
 		[4, "u", "A", "B", str(["4 ≤ num", "cat in {}".format(["u", "v"])]), 3.0, None],
 		[3, "v", "A", "A", str(["num < 4"]), 1.0, "hello there"],
-		[pd.np.nan, "u", "C", pd.np.nan, str(["num < 4"]), 1.0, "hello there"]
+		[np.nan, "u", "C", np.nan, str(["num < 4"]), 1.0, "hello there"]
 	], columns=("num", "cat", "target", "prediction", "decision_rule", "leaf_id", "label"))
 	pd.testing.assert_frame_equal(df, expected_df)
 
 	df = get_input_df()
 	add_scoring_columns(tree, df, False, True, True)
 	expected_df = pd.DataFrame([
 		[.2, "u", "A", "A", True, str(["num < 4"]), 1.0, "hello there"],
-		[7, pd.np.nan, "B", pd.np.nan, pd.np.nan, str(["4 ≤ num", "cat not in {}".format(["u", "v"])]), 4.0, "general Kenobi"],
+		[7, np.nan, "B", np.nan, np.nan, str(["4 ≤ num", "cat not in {}".format(["u", "v"])]), 4.0, "general Kenobi"],
 		[4, "u", "A", "B", False, str(["4 ≤ num", "cat in {}".format(["u", "v"])]), 3.0, None],
 		[3, "v", "A", "A", True, str(["num < 4"]), 1.0, "hello there"],
-		[pd.np.nan, "u", "C", pd.np.nan, pd.np.nan, str(["num < 4"]), 1.0, "hello there"]
+		[np.nan, "u", "C", np.nan, np.nan, str(["num < 4"]), 1.0, "hello there"]
 	], columns=("num", "cat", "target", "prediction", "prediction_correct", "decision_rule", "leaf_id", "label"))
 	pd.testing.assert_frame_equal(df, expected_df)
 
diff --git a/tests/python/unit/test_tree.py b/tests/python/unit/test_tree.py
@@ -1,19 +1,20 @@
 import pandas as pd
+import numpy as np
 from dku_idtb_decision_tree.tree import InteractiveTree
 
 df = pd.DataFrame([
                     [1, 5.5, "x", "n", "A"],
-                    [2, 7.7, "y", pd.np.nan, "A"],
-                    [pd.np.nan, 7, "z", pd.np.nan, "B"],
+                    [2, 7.7, "y", np.nan, "A"],
+                    [np.nan, 7, "z", np.nan, "B"],
                     [3, 1.2, "z", "n", "B"],
-                    [4, 7.1, "z", pd.np.nan, "C"],
+                    [4, 7.1, "z", np.nan, "C"],
                     [5, .4, "x", "p", "A"],
-                    [6, 8, "z", pd.np.nan, "A"],
+                    [6, 8, "z", np.nan, "A"],
                     [7, 5.5, "y", "p", "B"],
                     [8, 1.5, "z", "n", "B"],
                     [9, 3, "y", "n", "C"],
-                    [10, 7.5, "x", pd.np.nan, "B"],
-                    [11, 6, "x", pd.np.nan, "B"]
+                    [10, 7.5, "x", np.nan, "B"],
+                    [11, 6, "x", np.nan, "B"]
                 ], columns=("num_1", "num_2", "cat_1", "cat_2", "target"))
 
 def test_get_stats():
diff --git a/webapps/interactive-decision-tree-builder/webapp.json b/webapps/interactive-decision-tree-builder/webapp.json
@@ -5,6 +5,7 @@
     "icon": "icon-sitemap"
   },
 
+  "hideWebAppConfig": false,
   "baseType": "STANDARD",
   "standardWebAppLibraries": ["d3", "dataiku", "font_awesome"],
   "hasBackend": "true",

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"id" : "decision-tree-builder",`
`3`		`- "version" : "1.1.1",`
	`3`	`+ "version" : "1.1.2",`
`4`	`4`	`"meta" : {`
`5`	`5`	`"label" : "Interactive Decision Tree Builder",`
`6`	`6`	`"description" : "Build and explore decision trees, and use them to score and evaluate data",`