Skip to content

Commit 2eac6b5

Browse files
authored
Merge pull request #16 from dataiku/fix-make
Fix makefile commands, fix webapp display, fix issue with pandas>2
2 parents 887f6e3 + 476e569 commit 2eac6b5

File tree

12 files changed

+200
-35
lines changed

12 files changed

+200
-35
lines changed

.gitignore

+164
Original file line numberDiff line numberDiff line change
@@ -1 +1,165 @@
11
*.zip
2+
3+
state.json
4+
*.pyc
5+
.idea/
6+
.coverage
7+
.sonarlint
8+
htmlcov
9+
.DS_Store
10+
11+
# DSS specific stuff
12+
dist/
13+
.wlock
14+
venv/
15+
.ts
16+
17+
# Byte-compiled / optimized / DLL files
18+
__pycache__/
19+
*.py[cod]
20+
*$py.class
21+
22+
# C extensions
23+
*.so
24+
25+
# Distribution / packaging
26+
.Python
27+
build/
28+
develop-eggs/
29+
dist/
30+
downloads/
31+
eggs/
32+
.eggs/
33+
lib/
34+
lib64/
35+
parts/
36+
sdist/
37+
var/
38+
wheels/
39+
pip-wheel-metadata/
40+
share/python-wheels/
41+
*.egg-info/
42+
.installed.cfg
43+
*.egg
44+
MANIFEST
45+
46+
# PyInstaller
47+
# Usually these files are written by a python script from a template
48+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
49+
*.manifest
50+
*.spec
51+
52+
# Installer logs
53+
pip-log.txt
54+
pip-delete-this-directory.txt
55+
56+
# Unit test / coverage reports
57+
htmlcov/
58+
.tox/
59+
.nox/
60+
.coverage
61+
.coverage.*
62+
.cache
63+
nosetests.xml
64+
coverage.xml
65+
unit.xml
66+
*.cover
67+
*.py,cover
68+
.hypothesis/
69+
.pytest_cache/
70+
cover/
71+
72+
# Integration tests reports
73+
tests/allure_report/
74+
75+
# Translations
76+
*.mo
77+
*.pot
78+
79+
# Django stuff:
80+
*.log
81+
local_settings.py
82+
db.sqlite3
83+
db.sqlite3-journal
84+
85+
# Flask stuff:
86+
instance/
87+
.webassets-cache
88+
89+
# Scrapy stuff:
90+
.scrapy
91+
92+
# Sphinx documentation
93+
docs/_build/
94+
95+
# PyBuilder
96+
target/
97+
98+
# Jupyter Notebook
99+
.ipynb_checkpoints
100+
101+
# IPython
102+
profile_default/
103+
ipython_config.py
104+
105+
# pyenv
106+
# For a library or package, you might want to ignore these files since the code is
107+
# intended to run in multiple environments; otherwise, check them in:
108+
# .python-version
109+
110+
# pipenv
111+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
112+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
113+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
114+
# install all needed dependencies.
115+
#Pipfile.lock
116+
117+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
118+
__pypackages__/
119+
120+
# Celery stuff
121+
celerybeat-schedule
122+
celerybeat.pid
123+
124+
# SageMath parsed files
125+
*.sage.py
126+
127+
# Environments
128+
.env
129+
.venv
130+
env/
131+
venv/
132+
ENV/
133+
env.bak/
134+
venv.bak/
135+
136+
# Spyder project settings
137+
.spyderproject
138+
.spyproject
139+
140+
# Rope project settings
141+
.ropeproject
142+
143+
# mkdocs documentation
144+
/site
145+
146+
# mypy
147+
.mypy_cache/
148+
.dmypy.json
149+
dmypy.json
150+
151+
# Pyre type checker
152+
.pyre/
153+
154+
# pytype static type analyzer
155+
.pytype/
156+
157+
__pycache__
158+
.python-version
159+
.pyc
160+
161+
setup.cfg
162+
.vscode
163+
164+
*.ipynb
165+
forecast-env/

CHANGELOG.md

+5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## Version 1.1.2 (2025-05-14)
4+
* Fix issue with webapp not displaying correctly
5+
* Fix bug in pandas > 2
6+
* Fix plugin make command
7+
38
## Version 1.1.1 (2024-05-07)
49
* Scoring recipe: Specify array content type for decision_rules in output schema
510

Makefile

+1-18
Original file line numberDiff line numberDiff line change
@@ -20,34 +20,17 @@ plugin:
2020

2121
unit-tests:
2222
@echo "Running unit tests..."
23-
@( \
24-
PYTHON_VERSION=`python3 -V 2>&1 | sed 's/[^0-9]*//g' | cut -c 1,2`; \
25-
PYTHON_VERSION_IS_CORRECT=`cat code-env/python/desc.json | python3 -c "import sys, json; print(str($$PYTHON_VERSION) in [x[-2:] for x in json.load(sys.stdin)['acceptedPythonInterpreters']]);"`; \
26-
if [ $$PYTHON_VERSION_IS_CORRECT == "False" ]; then echo "Python version $$PYTHON_VERSION is not in acceptedPythonInterpreters"; exit 1; else echo "Python version $$PYTHON_VERSION is in acceptedPythonInterpreters"; fi; \
27-
)
2823
@( \
2924
rm -rf ./env/; \
3025
python3 -m venv env/; \
3126
source env/bin/activate; \
3227
pip install --upgrade pip;\
3328
pip install --no-cache-dir -r tests/python/unit/requirements.txt; \
34-
pip install --no-cache-dir -r code-env/python/spec/requirements.txt; \
3529
export PYTHONPATH="$(PYTHONPATH):$(PWD)/python-lib"; \
3630
pytest tests/python/unit --alluredir=tests/allure_report || ret=$$?; exit $$ret \
3731
)
3832

39-
integration-tests:
40-
@echo "Running integration tests..."
41-
@( \
42-
rm -rf ./env/; \
43-
python3 -m venv env/; \
44-
source env/bin/activate; \
45-
pip3 install --upgrade pip;\
46-
pip install --no-cache-dir -r tests/python/integration/requirements.txt; \
47-
pytest tests/python/integration --alluredir=tests/allure_report || ret=$$?; exit $$ret \
48-
)
49-
50-
tests: unit-tests integration-tests
33+
tests: unit-tests
5134

5235
dist-clean:
5336
rm -rf dist

plugin.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"id" : "decision-tree-builder",
3-
"version" : "1.1.1",
3+
"version" : "1.1.2",
44
"meta" : {
55
"label" : "Interactive Decision Tree Builder",
66
"description" : "Build and explore decision trees, and use them to score and evaluate data",

python-lib/dku_idtb_decision_tree/autosplit.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ def cross_entropy(population_distrib, sample):
1616
def convert_categorical_columns(feature_col, target_col):
1717
target_distrib = target_col.value_counts(normalize=True)
1818
entropies = target_col.groupby(feature_col).apply(apply_cross_entropy(target_distrib))
19-
return entropies.sort_index()
19+
return entropies.sort_index().reset_index(drop=True)
20+
2021

2122
def autosplit(df, feature, target, numerical, max_splits):
2223
if len(df[target].unique()) < 2:

python-lib/dku_idtb_decision_tree/tree.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def get_stats_numerical_node(self, column, target_column, mean):
168168

169169
stats = {"bins": [], "mean": column.mean(), "max": column.max(), "min": column.min()}
170170
bins = pd.cut(column.fillna(mean), bins = min(10, column.nunique()), include_lowest = True, right = False)
171-
target_grouped = target_column.groupby(bins)
171+
target_grouped = target_column.groupby(bins, observed=False) # explicitly set observed=False to avoid deprecation warning
172172
target_distrib = target_grouped.apply(lambda x: x.value_counts())
173173
col_distrib = target_grouped.count()
174174
for interval, count in col_distrib.items():
File renamed without changes.

tests/python/unit/requirements.txt

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
pytest<8
2+
allure-pytest==2.8.29
3+
scikit-learn
4+
5+
scipy==1.2.1; python_version < '3.8'
6+
scipy==1.10.1; python_version >= '3.8'
7+
statsmodels==0.12.2; python_version < '3.8'
8+
statsmodels==0.13.5; python_version >= '3.8'
File renamed without changes.

python-tests/test_score.py renamed to tests/python/unit/test_score.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
import pandas as pd
2+
import numpy as np
23
from dku_idtb_scoring.score import add_scoring_columns, get_scored_df_schema
34
from dku_idtb_decision_tree.tree import ScoringTree
45
from pytest import raises
56

7+
68
nodes = {
79
"0": {
810
"id": 0,
@@ -71,42 +73,42 @@
7173

7274
def get_input_df():
7375
return pd.DataFrame([[.2, "u", "A"],
74-
[7, pd.np.nan, "B"],
76+
[7, np.nan, "B"],
7577
[4, "u", "A"],
7678
[3, "v", "A"],
77-
[pd.np.nan, "u", "C"]], columns=("num", "cat", "target"))
79+
[np.nan, "u", "C"]], columns=("num", "cat", "target"))
7880

7981
def test_score():
8082
df = get_input_df()
8183
add_scoring_columns(tree, df, True)
8284
expected_df = pd.DataFrame([
8385
[.2, "u", "A", .8, .2, "A", str(["num < 4"]), 1.0, "hello there"],
84-
[7, pd.np.nan, "B", pd.np.nan, pd.np.nan, pd.np.nan, str(["4 ≤ num", "cat not in {}".format(["u", "v"])]), 4.0, "general Kenobi"],
86+
[7, np.nan, "B", np.nan, np.nan, np.nan, str(["4 ≤ num", "cat not in {}".format(["u", "v"])]), 4.0, "general Kenobi"],
8587
[4, "u", "A", .25, .75, "B", str(["4 ≤ num", "cat in {}".format(["u", "v"])]), 3.0, None],
8688
[3, "v", "A", .8, .2, "A", str(["num < 4"]), 1.0, "hello there"],
87-
[pd.np.nan, "u", "C", .8, .2, "A", str(["num < 4"]), 1.0, "hello there"]
89+
[np.nan, "u", "C", .8, .2, "A", str(["num < 4"]), 1.0, "hello there"]
8890
], columns=("num", "cat", "target", "proba_A", "proba_B", "prediction", "decision_rule", "leaf_id", "label"))
8991
pd.testing.assert_frame_equal(df, expected_df)
9092

9193
df = get_input_df()
9294
add_scoring_columns(tree, df, False, True, False)
9395
expected_df = pd.DataFrame([
9496
[.2, "u", "A", "A", str(["num < 4"]), 1.0, "hello there"],
95-
[7, pd.np.nan, "B", pd.np.nan, str(["4 ≤ num", "cat not in {}".format(["u", "v"])]), 4.0, "general Kenobi"],
97+
[7, np.nan, "B", np.nan, str(["4 ≤ num", "cat not in {}".format(["u", "v"])]), 4.0, "general Kenobi"],
9698
[4, "u", "A", "B", str(["4 ≤ num", "cat in {}".format(["u", "v"])]), 3.0, None],
9799
[3, "v", "A", "A", str(["num < 4"]), 1.0, "hello there"],
98-
[pd.np.nan, "u", "C", pd.np.nan, str(["num < 4"]), 1.0, "hello there"]
100+
[np.nan, "u", "C", np.nan, str(["num < 4"]), 1.0, "hello there"]
99101
], columns=("num", "cat", "target", "prediction", "decision_rule", "leaf_id", "label"))
100102
pd.testing.assert_frame_equal(df, expected_df)
101103

102104
df = get_input_df()
103105
add_scoring_columns(tree, df, False, True, True)
104106
expected_df = pd.DataFrame([
105107
[.2, "u", "A", "A", True, str(["num < 4"]), 1.0, "hello there"],
106-
[7, pd.np.nan, "B", pd.np.nan, pd.np.nan, str(["4 ≤ num", "cat not in {}".format(["u", "v"])]), 4.0, "general Kenobi"],
108+
[7, np.nan, "B", np.nan, np.nan, str(["4 ≤ num", "cat not in {}".format(["u", "v"])]), 4.0, "general Kenobi"],
107109
[4, "u", "A", "B", False, str(["4 ≤ num", "cat in {}".format(["u", "v"])]), 3.0, None],
108110
[3, "v", "A", "A", True, str(["num < 4"]), 1.0, "hello there"],
109-
[pd.np.nan, "u", "C", pd.np.nan, pd.np.nan, str(["num < 4"]), 1.0, "hello there"]
111+
[np.nan, "u", "C", np.nan, np.nan, str(["num < 4"]), 1.0, "hello there"]
110112
], columns=("num", "cat", "target", "prediction", "prediction_correct", "decision_rule", "leaf_id", "label"))
111113
pd.testing.assert_frame_equal(df, expected_df)
112114

python-tests/test_tree.py renamed to tests/python/unit/test_tree.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
11
import pandas as pd
2+
import numpy as np
23
from dku_idtb_decision_tree.tree import InteractiveTree
34

45
df = pd.DataFrame([
56
[1, 5.5, "x", "n", "A"],
6-
[2, 7.7, "y", pd.np.nan, "A"],
7-
[pd.np.nan, 7, "z", pd.np.nan, "B"],
7+
[2, 7.7, "y", np.nan, "A"],
8+
[np.nan, 7, "z", np.nan, "B"],
89
[3, 1.2, "z", "n", "B"],
9-
[4, 7.1, "z", pd.np.nan, "C"],
10+
[4, 7.1, "z", np.nan, "C"],
1011
[5, .4, "x", "p", "A"],
11-
[6, 8, "z", pd.np.nan, "A"],
12+
[6, 8, "z", np.nan, "A"],
1213
[7, 5.5, "y", "p", "B"],
1314
[8, 1.5, "z", "n", "B"],
1415
[9, 3, "y", "n", "C"],
15-
[10, 7.5, "x", pd.np.nan, "B"],
16-
[11, 6, "x", pd.np.nan, "B"]
16+
[10, 7.5, "x", np.nan, "B"],
17+
[11, 6, "x", np.nan, "B"]
1718
], columns=("num_1", "num_2", "cat_1", "cat_2", "target"))
1819

1920
def test_get_stats():

webapps/interactive-decision-tree-builder/webapp.json

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"icon": "icon-sitemap"
66
},
77

8+
"hideWebAppConfig": false,
89
"baseType": "STANDARD",
910
"standardWebAppLibraries": ["d3", "dataiku", "font_awesome"],
1011
"hasBackend": "true",

0 commit comments

Comments
 (0)