Skip to content

Commit f3bc959

Browse files
fix: type schema not checking for empty columns (#1679)
* fix: type schema not checking for empty columns * fix: remove alerts unused parameters * fix: indicate user defined type on empty columns * fix(linting): code formatting --------- Co-authored-by: Azory YData Bot <azory@ydata.ai>
1 parent 1e8cb89 commit f3bc959

File tree

4 files changed

+32
-9
lines changed

4 files changed

+32
-9
lines changed

src/ydata_profiling/model/alerts.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -634,7 +634,7 @@ def supported_alerts(summary: dict) -> List[Alert]:
634634
return alerts
635635

636636

637-
def unsupported_alerts(summary: Dict[str, Any]) -> List[Alert]:
637+
def unsupported_alerts() -> List[Alert]:
638638
alerts: List[Alert] = [
639639
UnsupportedAlert(),
640640
RejectedAlert(),
@@ -657,7 +657,7 @@ def check_variable_alerts(config: Settings, col: str, description: dict) -> List
657657
alerts += generic_alerts(description)
658658

659659
if description["type"] == "Unsupported":
660-
alerts += unsupported_alerts(description)
660+
alerts += unsupported_alerts()
661661
else:
662662
alerts += supported_alerts(description)
663663

src/ydata_profiling/model/pandas/summary_pandas.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,14 @@
1616
from ydata_profiling.utils.dataframe import sort_column_names
1717

1818

19+
def _is_cast_type_defined(typeset: VisionsTypeset, series: str) -> bool:
20+
return (
21+
isinstance(typeset, ProfilingTypeSet)
22+
and typeset.type_schema
23+
and series in typeset.type_schema
24+
)
25+
26+
1927
@describe_1d.register
2028
def pandas_describe_1d(
2129
config: Settings,
@@ -38,11 +46,10 @@ def pandas_describe_1d(
3846
# Make sure pd.NA is not in the series
3947
series = series.fillna(np.nan)
4048

41-
if (
42-
isinstance(typeset, ProfilingTypeSet)
43-
and typeset.type_schema
44-
and series.name in typeset.type_schema
45-
):
49+
has_cast_type = _is_cast_type_defined(typeset, series.name)
50+
cast_type = str(typeset.type_schema[series.name]) if has_cast_type else None
51+
52+
if has_cast_type and not series.isna().all():
4653
vtype = typeset.type_schema[series.name]
4754

4855
elif config.infer_dtypes:
@@ -55,7 +62,12 @@ def pandas_describe_1d(
5562
vtype = typeset.detect_type(series)
5663

5764
typeset.type_schema[series.name] = vtype
58-
return summarizer.summarize(config, series, dtype=vtype)
65+
summary = summarizer.summarize(config, series, dtype=vtype)
66+
# Cast type is only used on unsupported columns rendering pipeline
67+
# to indicate the correct variable type when inference is not possible
68+
summary["cast_type"] = cast_type
69+
70+
return summary
5971

6072

6173
@get_series_descriptions.register

src/ydata_profiling/report/structure/variables/render_generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def render_generic(config: Settings, summary: dict) -> dict:
1212
info = VariableInfo(
1313
anchor_id=summary["varid"],
1414
alerts=summary["alerts"],
15-
var_type="Unsupported",
15+
var_type=summary["cast_type"] or "Unsupported",
1616
var_name=summary["varname"],
1717
description=summary["description"],
1818
style=config.html.style,

tests/unit/test_typeset_default.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -475,3 +475,14 @@ def test_type_schema(dataframe: pd.DataFrame, column: str, type_schema: dict):
475475
assert prof.typeset.type_schema[column] == prof.typeset._get_type(
476476
type_schema[column]
477477
)
478+
479+
480+
def test_type_schema_with_null_column():
481+
df = pd.DataFrame({"null_col": [None] * 100})
482+
prof = ProfileReport(df, type_schema={"null_col": "datetime"})
483+
description = prof.description_set
484+
assert description.variables["null_col"]["type"] == "Unsupported"
485+
486+
prof = ProfileReport(df, type_schema={"null_col": "numeric"})
487+
description = prof.description_set
488+
assert description.variables["null_col"]["type"] == "Unsupported"

0 commit comments

Comments
 (0)