Submission statistic¶

Showing some particular statistic over user submission. The main aspects are an evolution in time and success rate. * number of submmission over time * succesful submission over time * leaderboard evolving - scores and ranking

The ANHIR challenge is hosted on https://anhir.grand-challenge.org.

These particular results are for ANHIR workshop hosted at ISBI 2019 in Venice, Italy. In case you want to get some further evaluation related to new submission, you may contact one of the ANHIR administrators because the full submission is required, see https://anhir.grand-challenge.org/Organizers.

[1]:

%matplotlib inline
%load_ext autoreload
%autoreload 2

import os, sys
import glob, json
import shutil

import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

sys.path += [os.path.abspath('.'), os.path.abspath('..')]  # Add path to root
from birl.utilities.data_io import update_path
from birl.utilities.drawing import draw_matrix_user_ranking
from birl.utilities.evaluate import grouping_cumulative, aggregate_user_score_timeline

Loading submission table¶

The required input table is exported from https://anhir.grand-challenge.org/evaluation/jobs/ which is accessible for ANHIR administrators.

[2]:

# temporary folder for figures submissions
PATH_TEMP = os.path.abspath(os.path.expanduser('~/Desktop/ANHIR_submissions'))
PATH_SUBMISSIONS = os.path.join(update_path('bm_ANHIR'), 'submissions.csv')
df = pd.read_csv(PATH_SUBMISSIONS)
# conver the data-time columns
for col in ['Created', 'Updated']:
    df[col] = pd.to_datetime(df[col])
df['Team'] = df['User'].apply(lambda txt: txt[txt.index(' (') + 2: txt.index(')')]
                                          if all(c in txt for c in '()') else None)
df['User'] = df['User'].apply(lambda txt: txt[:txt.index(' (')] if all(c in txt for c in '()') else txt)
# compute time remaining to deadline
df['Till_Deadline'] = - df['Created'].sub(pd.to_datetime('April 1, 2019, 9:00 a.m.'), axis=0)
df['Deadline_days'] = (df['Till_Deadline'] / np.timedelta64(1, 'D')).astype(int)
print ('loaded records: %i' % len(df))
df.head(3)

loaded records: 279

[2]:

	ID	Created	Updated	User	Status	Position	Comment	Hide/Publish	Team	Till_Deadline
0	104f40bb-7e23-484d-8383-4a476bff5684	2019-04-01 04:24:00	2019-04-01 04:26:00	Mohammed	Succeeded	9.0	NaN	NaN	NMPN	04:36:00
1	6c82a929-2073-4c4f-964c-8c03ff1aecb0	2019-04-01 04:18:00	2019-04-01 04:19:00	Mohammed	Succeeded	NaN	NaN	NaN	NMPN	04:42:00
2	0eaa70e6-0be9-44aa-843c-02cba8b4cae9	2019-04-01 03:59:00	2019-04-01 04:00:00	Mohammed	Succeeded	NaN	NaN	NaN	NMPN	05:01:00

Visualise participants info¶

Showing charts related to individual participants

[3]:

dfx = grouping_cumulative(df, col_index='User', col_column='Status')
ax = dfx.plot(kind='bar', stacked=True, figsize=(len(dfx) / 2, 4), grid=True, cmap='RdYlGn')
_= plt.ylabel('# submissions')
ax.get_figure().tight_layout()
ax.get_figure().savefig(os.path.join(PATH_TEMP, 'plot_team-submit-succes.pdf'))

# normalize over the sum of all cases
dfx = (dfx.T / dfx.T.sum().astype(float)).T
ax = dfx.plot(kind='bar', stacked=True, figsize=(len(dfx) / 2, 4), grid=True, cmap='RdYlGn')
ax.legend(loc='upper center', bbox_to_anchor=(1.15, 0.7), ncol=1)
_= plt.ylabel('% submissions')
ax.get_figure().tight_layout()
ax.get_figure().savefig(os.path.join(PATH_TEMP, 'bar_team-submit-succes.pdf'))

../_images/notebooks_ANHIR_submissions-stat_5_0.png

../_images/notebooks_ANHIR_submissions-stat_5_1.png

[4]:

dfx = grouping_cumulative(df, col_index='Deadline_days', col_column='Status')
_= dfx.plot(kind='area', cmap='RdYlGn', figsize=(len(dfx) / 3, 4), grid=True)
_= plt.xlim([len(dfx), 0]), plt.ylabel('# submissions')
ax.get_figure().tight_layout()
ax.get_figure().savefig(os.path.join(PATH_TEMP, 'plot_time-submit-succes.pdf'))

# normalize over the sum of all cases
dfx = (dfx.T / dfx.T.sum().astype(float)).T
ax = dfx.plot(kind='bar', stacked=True, cmap='RdYlGn', figsize=(len(dfx) / 3, 3), grid=True)
ax.legend(loc='upper center', bbox_to_anchor=(1.15, 0.7), ncol=1)
_= plt.xlim([len(dfx), 0]), plt.ylabel('% submissions')
ax.get_figure().tight_layout()
ax.get_figure().savefig(os.path.join(PATH_TEMP, 'bar_time-submit-succes.pdf'))

../_images/notebooks_ANHIR_submissions-stat_6_0.png

../_images/notebooks_ANHIR_submissions-stat_6_1.png

Visualise submissions info¶

Showing charts related to particular submissions

[5]:

dfx = grouping_cumulative(df, col_index='Deadline_days', col_column='User')
ax = dfx.plot(kind='area', cmap='nipy_spectral', figsize=(len(dfx) / 2, 4), grid=True)
_= plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=int(np.ceil(len(dfx.columns) / 3)), fancybox=True, shadow=True)
_= plt.xlim([len(dfx), 0]), plt.ylabel('# submissions')
ax.get_figure().tight_layout()
ax.get_figure().savefig(os.path.join(PATH_TEMP, 'plot_time-submit-team.pdf'))

ax = dfx.plot(kind='bar', stacked=True, cmap='nipy_spectral', figsize=(len(dfx) / 2, 3), grid=True)
_= plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=int(np.ceil(len(dfx.columns) / 3)), fancybox=True, shadow=True)
_= plt.xlim([len(dfx), 0]), plt.ylabel('# submissions')
ax.get_figure().tight_layout()
ax.get_figure().savefig(os.path.join(PATH_TEMP, 'bar_time-submit-team.pdf'))

../_images/notebooks_ANHIR_submissions-stat_8_0.png

../_images/notebooks_ANHIR_submissions-stat_8_1.png

[6]:

dfx = grouping_cumulative(df, col_index='Deadline_days', col_column='Team')
_= dfx.plot(kind='area', cmap='nipy_spectral', figsize=(len(dfx) / 2, 4), grid=True)
_= plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=int(np.ceil(len(dfx.columns) / 2)), fancybox=True, shadow=True)
_= plt.xlim([len(dfx), 0]), plt.ylabel('# submissions')

_= dfx.plot(kind='bar', stacked=True, cmap='nipy_spectral', figsize=(len(dfx) / 2, 3), grid=True)
_= plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05), ncol=int(np.ceil(len(dfx.columns) / 2)), fancybox=True, shadow=True)
_= plt.xlim([len(dfx), 0]), plt.ylabel('# submissions')

../_images/notebooks_ANHIR_submissions-stat_9_0.png

../_images/notebooks_ANHIR_submissions-stat_9_1.png

Leader-board evolving¶

Evolving of the automatic leader board over time with respect to a particular score. Exporting on rob results of a particular evalation method is required, see for example https://anhir.grand-challenge.org/evaluation/methods/80cebd88-9133-4802-98cf-53d878e1bb30/ (to do so you have to be challnge admin)

[7]:

import yaml # https://stackoverflow.com/questions/988228

PATH_SUBMISSIONS = os.path.join(update_path('bm_ANHIR'), 'submission-results.csv')
df_res = pd.read_csv(PATH_SUBMISSIONS)
print ('loaded records: %i' % len(df_res))
df_res.head(3)

loaded records: 183

[7]:

	Created	Updated	Job	Submission	Status	Result
0	March 19, 2019, 12:02 p.m.	March 19, 2019, 12:04 p.m.	16ab9924-1d33-4a1e-84d7-6e55c6f201b6	e0845c22-8030-4227-a3ca-c5047d380eae	Succeeded	{'cases': 'not exported', 'computer': {'file':...
1	March 19, 2019, 12:23 p.m.	March 19, 2019, 12:25 p.m.	9cc3dd8f-5331-4748-841b-76db1fdd2534	f4e4453b-7b35-47e2-822a-adf04d2de0ae	Succeeded	{'cases': 'not exported', 'computer': {'file':...
2	March 19, 2019, 1 p.m.	March 19, 2019, 1:03 p.m.	a2325f59-0561-4883-bf89-08b2d15ed444	57d86dc8-bcfd-4661-abca-42ac5f71bf9a	Succeeded	{'cases': 'not exported', 'computer': {'file':...

Merge results with user submissions

[8]:

df2 = df.merge(df_res[['Job', 'Submission', 'Result']], how='right', left_on='ID', right_on='Job')
print ('loaded records: %i' % len(df2))
# get aggregated results
result_str = df2.iloc[0]['Result']
# parse keys from aggregated results
agg_cols = list(yaml.load(result_str)['aggregates'].keys())
# convert aggregated resulst to table columns
for col in agg_cols:
    df2[col] = df2['Result'].apply(lambda ar: yaml.load(ar)['aggregates'][col]
                                   if isinstance(ar, str) else None)
df2 = df2[['Created', 'User', 'Team', 'Till_Deadline', 'Deadline_days'] + agg_cols]
df2.head(3)

loaded records: 183

/home/jb/.local/lib/python3.6/site-packages/ipykernel_launcher.py:6: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.

/home/jb/.local/lib/python3.6/site-packages/ipykernel_launcher.py:10: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.
  # Remove the CWD from sys.path while we load stuff.

[8]:

	Created	User	Team	Till_Deadline	Average-Max-rTRE	Average-Norm-Time	Average-Robustness	Average-Median-rTRE	Average-Rank-Max-rTRE	Average-used-landmarks	Average-Max-rTRE-Robust	Average-Norm-Time-Robust	Average-Rank-Median-rTRE	Average-Median-rTRE-Robust
0	2019-04-01 04:24:00	Mohammed	NMPN	04:36:00	0.0750648	2.540539	0.959315	0.021120	None	1.029598	0.062872	2.549469	None	0.014818
1	2019-04-01 04:18:00	Mohammed	NMPN	04:42:00	0.0756789	2.487868	0.955414	0.021160	None	1.029467	0.062904	2.499697	None	0.014807
2	2019-04-01 03:59:00	Mohammed	NMPN	05:01:00	0.0774333	2.521503	0.951477	0.023891	None	1.029401	0.063235	2.539681	None	0.016295

Visualisations¶

Showing the evolving over time from perspective of the score and mean-time ranking

Timeline: Average Median rTRE

[9]:

dfx = aggregate_user_score_timeline(df2, 'Deadline_days', 'User', 'Average-Median-rTRE',
                                    lower_better=True, top_down=False, interp=True)
ax = dfx.plot(style='-', cmap='nipy_spectral', figsize=(len(dfx) / 2, 3), grid=True)
_= plt.legend(loc='upper center', bbox_to_anchor=(1.2, 1.1), ncol=1)
_= plt.xlim([len(dfx), 0]), plt.ylim([0, 0.1]),
_= plt.ylabel('Average-Median-rTRE'), plt.xlabel('Deadline_days')
ax.get_figure().tight_layout()
ax.get_figure().savefig(os.path.join(PATH_TEMP, 'plot_time-score_avg-median-rTRE.pdf'))

dfx.index = ['D-%i' % c for c in dfx.index]
fig = draw_matrix_user_ranking(dfx.T[dfx.index[::-1]])
fig.savefig(os.path.join(PATH_TEMP, 'chess_time-ranking_avg-median-rTRE.pdf'))

/home/jb/Dropbox/Workspace/BIRL/birl/utilities/evaluate.py:301: RuntimeWarning: All-NaN axis encountered
  mtx[aggrs.index(agg), users.index(usr)] = fn_best(dfgg[col_score])
/home/jb/Dropbox/Workspace/BIRL/birl/utilities/evaluate.py:309: RuntimeWarning: All-NaN slice encountered
  mtx[i, j] = fn_best(vals)
/home/jb/Dropbox/Workspace/BIRL/birl/utilities/evaluate.py:223: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
  ranking = np.zeros(df_stat.as_matrix().shape)
/home/jb/Dropbox/Workspace/BIRL/birl/utilities/visualisation.py:454: FutureWarning: Method .as_matrix will be removed in a future version. Use .values instead.
  fig, _ = plt.subplots(figsize=np.array(df_stat.as_matrix().shape[::-1]) * 0.35)

../_images/notebooks_ANHIR_submissions-stat_16_1.png

../_images/notebooks_ANHIR_submissions-stat_16_2.png

Timeline: Execution time

[10]:

dfx = aggregate_user_score_timeline(df2, 'Deadline_days', 'User', 'Average-Norm-Time',
                                    lower_better=True, top_down=False, interp=True)
ax = dfx.plot(style='-', cmap='nipy_spectral', figsize=(len(dfx) / 2, 3), grid=True)
_= plt.legend(loc='upper center', bbox_to_anchor=(1.2, 1.1), ncol=1)
_= plt.xlim([len(dfx), 0]), plt.yscale('log')
_= plt.ylabel('Average-Norm-Time'), plt.xlabel('Deadline_days')
ax.get_figure().tight_layout()
ax.get_figure().savefig(os.path.join(PATH_TEMP, 'plot_time-score_avg-time.pdf'))

dfx.index = ['D-%i' % c for c in dfx.index]
fig = draw_matrix_user_ranking(dfx.T[dfx.index[::-1]])
fig.savefig(os.path.join(PATH_TEMP, 'chess_time-ranking_avg-time.pdf'))

../_images/notebooks_ANHIR_submissions-stat_18_0.png

../_images/notebooks_ANHIR_submissions-stat_18_1.png

Timeline: Robustness

[11]:

dfx = aggregate_user_score_timeline(df2, 'Deadline_days', 'User', 'Average-Robustness',
                                    lower_better=False, top_down=False, interp=True)
ax = dfx.plot(style='-', cmap='nipy_spectral', figsize=(len(dfx) / 2, 3), grid=True)
_= plt.legend(loc='upper center', bbox_to_anchor=(1.2, 1.1), ncol=1)
_= plt.xlim([len(dfx), 0]), plt.ylim([0.8, 1.0]),
_= plt.ylabel('Average-Robustness'), plt.xlabel('Deadline_days')
ax.get_figure().tight_layout()
ax.get_figure().savefig(os.path.join(PATH_TEMP, 'plot_time-score_avg-robust.pdf'))

dfx.index = ['D-%i' % c for c in dfx.index]
fig = draw_matrix_user_ranking(dfx.T[dfx.index[::-1]], higher_better=True)
fig.savefig(os.path.join(PATH_TEMP, 'chess_time-ranking_avg-robust.pdf'))

../_images/notebooks_ANHIR_submissions-stat_20_0.png

../_images/notebooks_ANHIR_submissions-stat_20_1.png

[ ]: