1
+ import plotly .graph_objects as go
2
+ import pandas as pd
3
+ from matplotlib .colors import Colormap
4
+ import decimal as d
5
+ from typing import List , Dict
6
+ import math
7
+
8
+ from ..bhom .analytics import bhom_analytics
9
+
10
+ def set_dimensions (df : pd .DataFrame , tick_mark_count : int , dp :int ) -> List [Dict [str , any ]]:
11
+
12
+ """Set the dimensions for a parallel coordinate plot, based on column datatypes and unique values.
13
+
14
+ Args:
15
+ df (pd.DataFrame):
16
+ The pandas DataFrame to plot.
17
+ tick_mark_count (int):
18
+ The number of tick marks to show on the parallel coordinate plot.
19
+ dp (int):
20
+ The number of decimal places to show on the tick marks.
21
+
22
+ Returns:
23
+ list[dict[str, any]]:
24
+ A list of dimensions to plot.
25
+ """
26
+
27
+ df_copy = df .copy ()
28
+ dimensions = []
29
+
30
+ for column in df_copy .columns :
31
+
32
+ dim = {}
33
+ dim ['label' ] = str (column )
34
+
35
+ if df_copy [column ].dtype == "object" :
36
+ #for catagorical data types, convert to numerical, with text as tick marks
37
+ df_copy [column ] = df_copy [column ].astype ("category" ).cat .codes
38
+
39
+ dim ['values' ] = df_copy [column ]
40
+ dim ['tickvals' ] = df_copy [column ].unique ()
41
+ dim ['ticktext' ] = df [column ].unique ()
42
+
43
+ dimensions .append (dim )
44
+ continue
45
+
46
+ dim ['values' ] = df_copy [column ]
47
+
48
+ if df_copy [column ].nunique () < tick_mark_count :
49
+
50
+ dim ['range' ] = [df_copy [column ].min (), df_copy [column ].max ()]
51
+ dim ['tickvals' ] = dim ['ticktext' ] = df_copy [column ].unique ()
52
+
53
+ dimensions .append (dim )
54
+
55
+ else :
56
+ # reduce the number of tick marks if the column has a large number of unique values
57
+ dim ['range' ] = [df_copy [column ].min (), df_copy [column ].max ()]
58
+
59
+ if (dim ['range' ][1 ] - dim ['range' ][0 ] + 1 ) < tick_mark_count :
60
+ tick_mark_count = math .ceil (dim ['range' ][1 ] - dim ['range' ][0 ]) + 1
61
+
62
+ dim ['tickvals' ] = [df_copy [column ].min () + i * (df_copy [column ].max () - df_copy [column ].min ()) / (tick_mark_count - 1 ) for i in range (tick_mark_count )]
63
+ dim ['ticktext' ] = [round (i ,dp ) for i in dim ['tickvals' ]]
64
+
65
+ dimensions .append (dim )
66
+
67
+ return dimensions
68
+
69
+ @bhom_analytics ()
70
+ def parallel_coordinate_plot (
71
+
72
+ df : pd .DataFrame = pd .DataFrame (),
73
+ variables_to_show : list = None ,
74
+ decimal_places : int = 0 ,
75
+ tick_mark_count : int = 11 ,
76
+ colour_key : str = None ,
77
+ cmap : Colormap = "viridis" ,
78
+ dimensions : List [dict ] = None ,
79
+ plot_title : str = "" ,
80
+ plot_bgcolour : str = 'black' ,
81
+ paper_bgcolour : str = 'black' ,
82
+ font_colour : str = 'white' ,
83
+ ** kwargs ,
84
+ ) -> go .Figure :
85
+ """Create a parallel coordinate plot of a pandas DataFrame.
86
+
87
+ Args:
88
+ df (pd.DataFrame):
89
+ The pandas DataFrame to plot.
90
+ variables_to_show (list, optional):
91
+ The variables to show on the parallel coordinate plot. Must be a subset of df.columns.
92
+ decimal_places (int, optional):
93
+ The number of decimal places to show on the tick marks. Defaults to 0.
94
+ tick_mark_count (int, optional):
95
+ The number of tick marks to show on the parallel coordinate plot. Defaults to 11.
96
+ colour_key (str, optional):
97
+ The column to use as the colour key. Defaults to None.
98
+ cmap (Colormap or str, optional):
99
+ The colormap to use for the colour key. Can be a matplotlib Colormap or a string representing a Plotly colorscale. Defaults to "viridis".
100
+ dimensions (list[dict], optional):
101
+ A list of dimensions to plot. If None, dimensions will be automatically generated based on the DataFrame. Defaults to None.
102
+ plot_title (str, optional):
103
+ The title of the plot. Defaults to an empty string.
104
+ plot_bgcolour (str, optional):
105
+ The background color of the plot. Defaults to 'black'.
106
+ paper_bgcolour (str, optional):
107
+ The background color of the paper. Defaults to 'black'.
108
+ font_colour (str, optional):
109
+ The color of the font used in the plot. Defaults to 'white'.
110
+ **kwargs:
111
+ Additional keyword arguments to pass to go.Parcoords().
112
+
113
+ Returns:
114
+ go.Figure:
115
+ The populated go.Figure object.
116
+ """
117
+
118
+ if variables_to_show is not None :
119
+ df = df [variables_to_show ]
120
+
121
+ if dimensions is None :
122
+ dimensions = set_dimensions (df , tick_mark_count , decimal_places )
123
+
124
+ if colour_key is None and not df .empty :
125
+ colour_key = df .columns [- 1 ]
126
+
127
+ line = dict (color = df [colour_key ], colorscale = cmap )
128
+
129
+ fig = go .Figure (
130
+ data = go .Parcoords (
131
+ line = line ,
132
+ dimensions = dimensions ,
133
+ ** kwargs
134
+ )
135
+ )
136
+
137
+ fig .update_layout (
138
+ title = plot_title ,
139
+ plot_bgcolor = plot_bgcolour ,
140
+ paper_bgcolor = paper_bgcolour ,
141
+ font_color = font_colour
142
+ )
143
+
144
+ return fig
0 commit comments