Añadiendo etiquetas a barras marinas
Estoy tratando de crear dos gráficos de barras agrupadas horizontalmente alineados. Tengo una gran cantidad de datos para varios modelos de Machine Learning y sus horarios de ejecución correspondientes y me gustaría mostrar todos estos datos de manera significativa. Mi intento hasta ahora parece lo siguiente:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
labels = ['MLP','FCN','ResNet','ROCKET','1-NN DTW','LightGBM','XGBoost','CatBoost']
Data1_Accuracy = [20, 34, 30, 35, 27,77.83125,78.7204167,78.5354167]
Data2_Accuracy = [20, 34, 30, 35, 27,75.7979167,76.2520833,77.87]
Data3_Accuracy = [20, 34, 30, 35, 27,80.14625,81.5033333,81.4625]
Data4_Accuracy = [20, 34, 30, 35, 27,78.3841667,79.34875,80.5270833]
Data5_Accuracy = [20, 34, 30, 35, 27,79.2495833,77.5370833,79.2666667]
Data6_Accuracy = [20, 34, 30, 35, 27,77.03125,77.2429167,77.9960275]
Data7_Accuracy = [20, 34, 30, 35, 27,81.3241667,80.5408333,84.2083333]
Data8_Accuracy = [20, 34, 30, 35, 27,78.1470833,78.1225,80.2754167]
Data9_Accuracy = [20, 34, 30, 35, 27,80.7383333,79.9358333,79.6916667]
Data10_Accuracy = [20, 34, 30, 35, 27,74.1095833,73.0879167,73.0529167]
Data11_Accuracy = [20, 34, 30, 35, 27,78.4775,77.8658333,78.35]
Data12_Accuracy = [20, 34, 30, 35, 27,73.0991667,71.9683333,72.75625]
Data13_Accuracy = [20, 34, 30, 35, 27,79.03,79.575,80.3870833]
Data14_Accuracy = [20, 34, 30, 35, 27,81.0241667,81.455,80.5516667]
Data15_Accuracy = [20, 34, 30, 35, 27,79.4829167,80.01375,81.68]
Data16_Accuracy = [20, 34, 30, 35, 27,81.1158333,80.9795833,80.6541667]
Data1_Times = [20, 34, 30, 35, 27,829.0177925,58.6558111,8493.968922]
Data2_Times = [20, 34, 30, 35, 27,604.5935536,64.3871907,6833.585728]
Data3_Times = [20, 34, 30, 35, 27,1286.01507,92.4329714,6821.308612]
Data4_Times = [20, 34, 30, 35, 27,757.3903304,78.7253731,5455.483287]
Data5_Times = [20, 34, 30, 35, 27,401.3722335,30.4119882,5160.041989]
Data6_Times = [20, 34, 30, 35, 27,321.4673242,54.1971346,4465.557807]
Data7_Times = [20, 34, 30, 35, 27,2598.48826,193.1256487,10811.65574]
Data8_Times = [20, 34, 30, 35, 27,1545.059628,139.9638344,7784.332016]
Data9_Times = [20, 34, 30, 35, 27,663.416329,615.3660963,3560.337827]
Data10_Times = [20, 34, 30, 35, 27,670.1615828,621.8249994,3567.653313]
Data11_Times = [20, 34, 30, 35, 27,619.1959161,572.3292757,3493.582855]
Data12_Times = [20, 34, 30, 35, 27,626.107683,579.0746278,3528.605614]
Data13_Times = [20, 34, 30, 35, 27,2936.5633,2631.284413,6465.254111]
Data14_Times = [20, 34, 30, 35, 27,2967.02757,2672.068268,6551.57865]
Data15_Times = [20, 34, 30, 35, 27,4102.511475,3711.899848,7704.401239]
Data16_Times = [20, 34, 30, 35, 27,4075.485739,3726.896591,7737.482708]
Data1_TimesInHours = np.array(Data1_Times) / 3600
Data2_TimesInHours = np.array(Data2_Times) / 3600
Data3_TimesInHours = np.array(Data3_Times) / 3600
Data4_TimesInHours = np.array(Data4_Times) / 3600
Data5_TimesInHours = np.array(Data5_Times) / 3600
Data6_TimesInHours = np.array(Data6_Times) / 3600
Data7_TimesInHours = np.array(Data7_Times) / 3600
Data8_TimesInHours = np.array(Data8_Times) / 3600
Data9_TimesInHours = np.array(Data9_Times) / 3600
Data10_TimesInHours = np.array(Data10_Times) / 3600
Data11_TimesInHours = np.array(Data11_Times) / 3600
Data12_TimesInHours = np.array(Data12_Times) / 3600
Data13_TimesInHours = np.array(Data13_Times) / 3600
Data14_TimesInHours = np.array(Data14_Times) / 3600
Data15_TimesInHours = np.array(Data15_Times) / 3600
Data16_TimesInHours = np.array(Data16_Times) / 3600
accuraciesDataFrame = pd.DataFrame({'Index': labels,
'Data1_Accuracy': Data1_Accuracy,
'Data2_Accuracy': Data2_Accuracy,
'Data3_Accuracy': Data3_Accuracy,
'Data4_Accuracy': Data4_Accuracy,
'Data5_Accuracy': Data5_Accuracy,
'Data6_Accuracy': Data6_Accuracy,
'Data7_Accuracy': Data7_Accuracy,
'Data8_Accuracy': Data8_Accuracy,
'Data9_Accuracy': Data9_Accuracy,
'Data10_Accuracy': Data10_Accuracy,
'Data11_Accuracy': Data11_Accuracy,
'Data12_Accuracy)': Data12_Accuracy,
'Data13_Accuracy': Data13_Accuracy,
'Data14_Accuracy': Data14_Accuracy,
'Data15_Accuracy': Data15_Accuracy,
'Data16_Accuracy': Data16_Accuracy},
columns = ['Index','Data1_Accuracy','Data2_Accuracy','Data3_Accuracy','Data4_Accuracy','Data5_Accuracy','Data6_Accuracy','Data7_Accuracy','Data8_Accuracy','Data9_Accuracy','Data10_Accuracy',
'Data11_Accuracy','Data12_Accuracy','Data13_Accuracy','Data14_Accuracy','Data15_Accuracy','Data16_Accuracy'])
timesDataFrame = pd.DataFrame({'Index': labels,
'Data1_TimesInHours': Data1_TimesInHours,
'Data2_TimesInHours': Data2_TimesInHours,
'Data3_TimesInHours': Data3_TimesInHours,
'Data4_TimesInHours': Data4_TimesInHours,
'Data5_TimesInHours': Data5_TimesInHours,
'Data6_TimesInHours': Data6_TimesInHours,
'Data7_TimesInHours': Data7_TimesInHours,
'Data8_TimesInHours': Data8_TimesInHours,
'Data9_TimesInHours': Data9_TimesInHours,
'Data10_TimesInHours': Data10_TimesInHours,
'Data11_TimesInHours': Data11_TimesInHours,
'Data12_TimesInHours': Data12_TimesInHours,
'Data13_TimesInHours': Data13_TimesInHours,
'Data14_TimesInHours': Data14_TimesInHours,
'Data15_TimesInHours': Data15_TimesInHours,
'Data16_TimesInHours': Data16_TimesInHours},
columns = [
'Index','Data1_TimesInHours','Data2_TimesInHours','Data3_TimesInHours','Data4_TimesInHours',
'Data5_TimesInHours','Data6_TimesInHours','Data7_TimesInHours','Data8_TimesInHours','Data9_TimesInHours','Data10_TimesInHours',
'Data11_TimesInHours','Data12_TimesInHours','Data13_TimesInHours','Data14_TimesInHours','Data15_TimesInHours','Data16_TimesInHours'
])
accuraciesDataFrameMelted = pd.melt(accuraciesDataFrame, id_vars=['Index'])
timesDataFrameMelted = pd.melt(timesDataFrame, id_vars=['Index'])
fig, axs = plt.subplots(1,2)
fig.set_size_inches(30,10)
xRangeFirstChart = list(range(0,101))
fig.suptitle('Rounded accuracies (%) and times for training and evaluation (h) for different data types and models',fontsize=26)
g1 = sns.barplot(x='value', y='Index', hue='variable', data=accuraciesDataFrameMelted, ax=axs[0])
axs[0].set_xlim([xRangeFirstChart[0],xRangeFirstChart[-1]])
axs[0].set_ylabel('Model',fontsize=24)
axs[0].set_xlabel('Rounded Accuracy (%)',fontsize=24)
axs[0].set_title('Rounded accuracies (%) for different data types and models',fontsize=22)
g2 = sns.barplot(x='value', y='Index', hue='variable', data=timesDataFrameMelted, ax=axs[1])
axs[0].get_legend().remove()
axs[1].get_legend().remove()
axs[1].get_yaxis().set_visible(False)
axs[1].set_xlabel('Training and evaluation time (h)',fontsize=24)
axs[1].set_title('Rounded training and evaluation time (h) for different data types and models',fontsize=22)
plt.savefig('PathToFigure/MyFigure.png', dpi=300, bbox_inches='tight', pad_inches=0)
Lo que me falta es una forma de escribir las etiquetas "Data 1", "Data 2", Data 3", etc... en cada barra. Por favor, consulte la imagen para una visualización de lo que estoy tratando de lograr. Cualquier ayuda es muy apreciada!
Pregunta hecha hace 3 años, 4 meses, 29 días - Por techinnovator
3 Respuestas:
-
Como hay tantos bares en un gráfico, Yo usaría
sns.catplot
para dibujar las diferentes categorías en un Facet Grid y entonces sería mucho mejor añadir etiquetas, que usted puede hacer con la función personalizadaadd_labels
(por favor, tenga en cuenta los diferentes parámetros -- no dude en eliminar algunos/add otros. Me he adaptado de esta solución).También puede hacer que el eje x sea más variable si pasa
sharex=False
al crear los catplots (ver final de esta solución)También,
sns.catplot
no funciona bien con añadir a subplotas, para que puedas guardar como una figura. Por eso uso.plt.close(fig)
para deshacerse de la figura en blanco que creamos, y esto también significaría añadir cualquier formato (como agregar un título) a esa figura sería inútil, ya que nos estamos deshaciendo de la figura al final; sin embargo, hay hacks. Uno es guardar como figuras separadas y utilizar una solución desde Aquí.: para combinar en uno .pdf. Creo que sería mejor tener el espacio extra de un gráfico por página o imagen. Otra opción es usar un poco de un hack para entrar en una figura:fig, ax = plt.subplots(nrows=2) sns.set_context('paper', font_scale=1.4) plt.style.use('dark_background') n_cols=4 #this is used later in a couple of places to make dynamic g1 = sns.catplot(data=accuraciesDataFrameMelted, x='value', y='variable', col='Index', kind='bar', col_wrap=n_cols, ax=ax[0]) g1.fig.suptitle('Rounded accuracies (%) for different data types and models',fontsize=22) plt.subplots_adjust(top=0.9, bottom=-0.5) g2 = sns.catplot(data=timesDataFrameMelted, x='value', y='variable', col='Index', kind='bar', col_wrap=n_cols, ax=ax[1]) g2.fig.suptitle('Rounded training and evaluation time (h) for different data types and models',fontsize=22) plt.subplots_adjust(top=0.9, bottom=-0.5) def add_labels(graph, category_size, axis_number, omit_thresh, width_var, num_format): for i in range(category_size): ax = graph.facet_axis(axis_number,i) for p in ax.patches: if p.get_width() > omit_thresh: # omit labels close to zero or other threshold width = p.get_width() * width_var # get bar length ax.text(width, # set the text at 1 unit right of the bar p.get_y() + p.get_height() / 2, # get Y coordinate + X coordinate / 2 num_format.format(p.get_width()), # set variable to display, 2 decimals ha = 'center', # horizontal alignment va = 'center') # vertical alignment else: pass l1 = len(accuraciesDataFrameMelted['Index'].unique()) l2 = len(timesDataFrame['Index'].unique()) add_labels(graph=g1, category_size=l1, axis_number=0, omit_thresh=1, width_var=0.5, num_format='{:1.0f}') add_labels(graph=g2, category_size=l2, axis_number=1, omit_thresh=0.1, width_var=0.5, num_format='{:1.2f}') for g, i in zip([g1,g2], [0, n_cols]): g.axes[i].set_ylabel('Model') for g in [g1,g2]: g.set_titles("{col_name}", fontsize=12) g1.set_axis_labels(x_var="Rounded Accuracy (%)", y_var="Model") g2.set_axis_labels(x_var="Training and evaluation time (h)", y_var="Model") plt.close(fig) g1.fig.savefig('g1.pdf',dpi=300, bbox_inches = "tight") g2.fig.savefig('g2.pdf',dpi=300, bbox_inches = "tight") plt.show()
(Zoomed In para mostrar el primer gráfico)
(Zoomed Para mostrar ambos gráficos)
También puede hacer que el eje x sea más variable si pasa
sharex=False
cuando crea elcatplot
, haciendo los cambios a continuación (pasarsharex
y cambiar uno de los params en mi función a 'omit_thresh=0:g1 = sns.catplot(data=accuraciesDataFrameMelted, x='value', y='variable', col='Index', kind='bar', col_wrap=n_cols, ax=ax[0], sharex=False) g2 = sns.catplot(data=timesDataFrameMelted, x='value', y='variable', col='Index', kind='bar', col_wrap=n_cols, ax=ax[1], sharex=False) add_labels(graph=g1, category_size=l1, axis_number=0, omit_thresh=0, width_var=0.5, num_format='{:1.0f}') add_labels(graph=g2, category_size=l2, axis_number=1, omit_thresh=0, width_var=0.5, num_format='{:1.3f}')
Respondida el Dec 16, 2020 a las 19:41 - por bugbusterx
Votos positivos: 0 | Votos negativos: 0 -
Para añadir etiquetas a las barras en tu gráfico, puedes hacer uso de la función
annotate
de Matplotlib. Esta función te permite añadir texto en coordenadas específicas de tu gráfico. En tu caso, puedes iterar sobre cada barra y añadir la etiqueta correspondiente.Aquí te muestro cómo modificar tu código para añadir las etiquetas "Data1", "Data2", etc. a las barras de tus gráficos:
python import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns labels = ['MLP','FCN','ResNet','ROCKET','1-NN DTW','LightGBM','XGBoost','CatBoost']
Tu código existente...
Plotting
fig, axs = plt.subplots(1,2) fig.set_size_inches(30,10) xRangeFirstChart = list(range(0,101)) fig.suptitle('Rounded accuracies (%) and times for training and evaluation (h) for different data types and models',fontsize=26) g1 = sns.barplot(x='value', y='Index', hue='variable', data=accuraciesDataFrameMelted, ax=axs[0]) for i, bar in enumerate(axs[0].patches): label = f'Data {i%16+1}' width = bar.get_width() axs[0].text(width + 1, bar.get_y() + bar.get_height() / 2, label, ha='center', va='center', fontsize=12, color='black', clip_on=True) axs[0].set_xlim([xRangeFirstChart[0],xRangeFirstChart[-1]]) axs[0].set_ylabel('Model',fontsize=24) axs[0].set_xlabel('Rounded Accuracy (%)',fontsize=24) axs[0].set_title('Rounded accuracies (%) for different data types and models',fontsize=22)Resto de tu código...
plt.savefig('PathToFigure/MyFigure.png', dpi=300, bbox_inches='tight', pad_inches=0)Con el código adicional proporcionado, ahora las etiquetas "Data 1", "Data 2", etc. se mostrarán a la derecha de cada barra en el primer gráfico. Puedes ajustar la posición y el estilo de las etiquetas según tus preferencias.
Respondida el Dec 17, 2020 a las 19:38 - por Gpt
Votos positivos: 0 | Votos negativos: 0 -
Puedes añadir las etiquetas a tus barras utilizando el método
annotate()
de Matplotlib. Aquí tienes un ejemplo modificado de tu código que añade las etiquetas a cada barra:``` import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns
labels = ['MLP','FCN','ResNet','ROCKET','1-NN DTW','LightGBM','XGBoost','CatBoost']
Data1Accuracy = [20, 34, 30, 35, 27,77.83125,78.7204167,78.5354167] Data2Accuracy = [20, 34, 30, 35, 27,75.7979167,76.2520833,77.87] Data3Accuracy = [20, 34, 30, 35, 27,80.14625,81.5033333,81.4625] Data4Accuracy = [20, 34, 30, 35, 27,78.3841667,79.34875,80.5270833] Data5Accuracy = [20, 34, 30, 35, 27,79.2495833,77.5370833,79.2666667] Data6Accuracy = [20, 34, 30, 35, 27,77.03125,77.2429167,77.9960275] Data7Accuracy = [20, 34, 30, 35, 27,81.3241667,80.5408333,84.2083333] Data8Accuracy = [20, 34, 30, 35, 27,78.1470833,78.1225,80.2754167] Data9Accuracy = [20, 34, 30, 35, 27,80.7383333,79.9358333,79.6916667] Data10Accuracy = [20, 34, 30, 35, 27,74.1095833,73.0879167,73.0529167] Data11Accuracy = [20, 34, 30, 35, 27,78.4775,77.8658333,78.35] Data12Accuracy = [20, 34, 30, 35, 27,73.0991667,71.9683333,72.75625] Data13Accuracy = [20, 34, 30, 35, 27,79.03,79.575,80.3870833] Data14Accuracy = [20, 34, 30, 35, 27,81.0241667,81.455,80.5516667] Data15Accuracy = [20, 34, 30, 35, 27,79.4829167,80.01375,81.68] Data16Accuracy = [20, 34, 30, 35, 27,81.1158333,80.9795833,80.6541667]
Data1Times = [20, 34, 30, 35, 27,829.0177925,58.6558111,8493.968922] Data2Times = [20, 34, 30, 35, 27,604.5935536,64.3871907,6833.585728] Data3Times = [20, 34, 30, 35, 27,1286.01507,92.4329714,6821.308612] Data4Times = [20, 34, 30, 35, 27,757.3903304,78.7253731,5455.483287] Data5Times = [20, 34, 30, 35, 27,401.3722335,30.4119882,5160.041989] Data6Times = [20, 34, 30, 35, 27,321.4673242,54.1971346,4465.557807] Data7Times = [20, 34, 30, 35, 27,2598.48826,193.1256487,10811.65574] Data8Times = [20, 34, 30, 35, 27,1545.059628,139.9638344,7784.332016] Data9Times = [20, 34, 30, 35, 27,663.416329,615.3660963,3560.337827] Data10Times = [20, 34, 30, 35, 27,670.1615828,621.8249994,3567.653313] Data11Times = [20, 34, 30, 35, 27,619.1959161,572.3292757,3493.582855] Data12Times = [20, 34, 30, 35, 27,626.107683,579.0746278,3528.605614] Data13Times = [20, 34, 30, 35, 27,2936.5633,2631.284413,6465.254111] Data14Times = [20, 34, 30, 35, 27,2967.02757,2672.068268,6551.57865] Data15Times = [20, 34, 30, 35, 27,4102.511475,3711.899848,7704.401239] Data16Times = [20, 34, 30, 35, 27,4075.485739,3726.896591,7737.482708]
Data1TimesInHours = np.array(Data1Times) / 3600 Data2TimesInHours = np.array(Data2Times) / 3600 Data3TimesInHours = np.array(Data3Times) / 3600
Respondida el Dec 17, 2020 a las 19:48 - por Gemini
Votos positivos: 0 | Votos negativos: 0