diff --git a/datascience/tables.py b/datascience/tables.py index bd93b6de..3f20b597 100644 --- a/datascience/tables.py +++ b/datascience/tables.py @@ -5290,7 +5290,8 @@ def hist(self, *columns, overlay=True, bins=None, bin_column=None, unit=None, co grouped by the values in this column, and a separate histogram is generated for each group. The histograms are overlaid or plotted separately depending on the overlay argument. If None, no such - grouping is done. + grouping is done. Note: `group` cannot be used together with `bin_column` or when plotting + multiple columns. An error will be raised in these cases. side_by_side (bool): Whether histogram bins should be plotted side by side (instead of directly overlaid). Makes sense only when diff --git a/docs/reference-nb/datascience-reference.ipynb b/docs/reference-nb/datascience-reference.ipynb index 17ab3e6f..6fcf705c 100644 --- a/docs/reference-nb/datascience-reference.ipynb +++ b/docs/reference-nb/datascience-reference.ipynb @@ -207,9 +207,7 @@ { "cell_type": "code", "execution_count": 32, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -2194,7 +2192,9 @@ { "cell_type": "code", "execution_count": 80, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [ { "data": { @@ -2211,6 +2211,34 @@ "actors.hist(\"Gross\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Using `group` with `Table.hist`\n", + "\n", + "You can also group the histogram by a categorical column using `group=`:\n", + "\n", + "The number of columns must be one, and you can't use `bin_column` with `group`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "students = Table().with_columns(\n", + " 'Score', np.concatenate([\n", + " np.random.normal(75, 10, 500), # Group X: higher average score\n", + " np.random.normal(65, 10, 500) # Group Y: lower average score\n", + " ]),\n", + " 'Group', ['X'] * 500 + ['Y'] * 500 # Assign 500 Xs and 500 Ys\n", + ")\n", + "\n", + "students.hist('Score', group='Group') # Plot histogram grouped by 'Group'\n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -4234,9 +4262,7 @@ { "cell_type": "code", "execution_count": 100, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -4516,9 +4542,7 @@ { "cell_type": "code", "execution_count": 112, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -6469,7 +6493,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -6483,9 +6507,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.1" + "version": "3.13.3" } }, "nbformat": 4, - "nbformat_minor": 2 -} \ No newline at end of file + "nbformat_minor": 4 +} diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 2d37cafb..a7fa1945 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -258,6 +258,19 @@ Draw histograms with :meth:`~datascience.tables.Table.hist`: @savefig hist_overlay.png width=4in normal_data.hist(bins = range(-5, 10), overlay = True) +Draw grouped histograms with the ``group`` argument: + +.. ipython:: python + + grouped = Table().with_columns( + 'value', np.random.normal(size=100), + 'group', np.random.choice(['A', 'B'], size=100) + ) + + @savefig hist_group.png width=4in + grouped.hist('value', group='group') +Note: group cannot be used together with bin_column, and does not support multiple histogram columns. + If we treat the ``normal_data`` table as a set of x-y points, we can :meth:`~datascience.tables.Table.plot` and :meth:`~datascience.tables.Table.scatter`: