Skip to content

Commit 4089cac

Browse files
author
TomAugspurger
committed
BUG: barplot with NaNs
1 parent 7800290 commit 4089cac

File tree

4 files changed

+29
-2
lines changed

4 files changed

+29
-2
lines changed

doc/source/v0.15.0.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -670,3 +670,4 @@ Bug Fixes
670670

671671
- Bug with kde plot and NaNs (:issue:`8182`)
672672
- Bug in ``GroupBy.count`` with float32 data type were nan values were not excluded (:issue:`8169`).
673+
- Bug with stacked barplots and NaNs (:issue:`8175`).

doc/source/visualization.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,13 @@ To get horizontal bar plots, pass ``kind='barh'``:
203203
@savefig barh_plot_stacked_ex.png
204204
df2.plot(kind='barh', stacked=True);
205205
206+
Pandas tries to be pragmatic about plotting DataFrames or Series
207+
that contain missing data. When it makes sense missing values will
208+
be filled with 0 (as is the case with bar plots). For other plots,
209+
like line plots, filling in 0 usually doesn't make sense, and so pandas doesn't
210+
try to guess. If you want to be explicit you can always call
211+
:meth:`~pandas.DataFrame.fillna` before plotting.
212+
206213
.. _visualization.hist:
207214

208215
Histograms

pandas/tests/test_graphics.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,6 +1479,23 @@ def test_bar_bottom_left(self):
14791479
result = [p.get_x() for p in ax.patches]
14801480
self.assertEqual(result, [1] * 5)
14811481

1482+
@slow
1483+
def test_bar_nan(self):
1484+
df = DataFrame({'A': [10, np.nan, 20], 'B': [5, 10, 20],
1485+
'C': [1, 2, 3]})
1486+
ax = df.plot(kind='bar')
1487+
expected = [10, 0, 20, 5, 10, 20, 1, 2, 3]
1488+
result = [p.get_height() for p in ax.patches]
1489+
self.assertEqual(result, expected)
1490+
1491+
ax = df.plot(kind='bar', stacked=True)
1492+
result = [p.get_height() for p in ax.patches]
1493+
self.assertEqual(result, expected)
1494+
1495+
result = [p.get_y() for p in ax.patches]
1496+
expected = [0.0, 0.0, 0.0, 10.0, 0.0, 20.0, 15.0, 10.0, 40.0]
1497+
self.assertEqual(result, expected)
1498+
14821499
@slow
14831500
def test_plot_scatter(self):
14841501
df = DataFrame(randn(6, 4),

pandas/tools/plotting.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -870,9 +870,11 @@ def _validate_color_args(self):
870870
" use one or the other or pass 'style' "
871871
"without a color symbol")
872872

873-
def _iter_data(self, data=None, keep_index=False):
873+
def _iter_data(self, data=None, keep_index=False, fillna=None):
874874
if data is None:
875875
data = self.data
876+
if fillna is not None:
877+
data = data.fillna(fillna)
876878

877879
from pandas.core.frame import DataFrame
878880
if isinstance(data, (Series, np.ndarray, Index)):
@@ -1780,7 +1782,7 @@ def _make_plot(self):
17801782
pos_prior = neg_prior = np.zeros(len(self.data))
17811783
K = self.nseries
17821784

1783-
for i, (label, y) in enumerate(self._iter_data()):
1785+
for i, (label, y) in enumerate(self._iter_data(fillna=0)):
17841786
ax = self._get_ax(i)
17851787
kwds = self.kwds.copy()
17861788
kwds['color'] = colors[i % ncolors]

0 commit comments

Comments
 (0)