Commit 1ef5130c authored by Paul Primus's avatar Paul Primus
Browse files

add submission package

parent bd88a8b7
%% Cell type:code id: tags:
``` python
from pymongo import MongoClient
from matplotlib import pyplot as plt
import numpy as np
from dcase2020_task2.data_sets.mcm_dataset import INVERSE_CLASS_MAP, TRAINING_ID_MAP, CLASS_MAP
from scipy.stats import rankdata
baseline_auc = {
'name': 'baseline',
0: {0: 0.5441, 2: 0.7340, 4: 0.6161, 6: 0.7392},
1: {0: 0.6715, 2: 0.6153, 4: 0.8833, 6: 0.7455},
2: {0: 0.9619, 2: 0.7897, 4: 0.9430, 6: 0.6959},
3: {1: 0.8136, 2: 0.8597, 3: 0.6330, 4: 0.8445},
4: {1: 0.7807, 2: 0.6416, 3: 0.7535},
5: {0: 0.6876, 2: 0.6818, 4: 0.7430, 6: 0.5390}
}
baseline_pauc = {
'name': 'baseline',
0: {0: 0.4937, 2: 0.5481, 4: 0.5326, 6: 0.5235},
1: {0: 0.5674, 2: 0.5810, 4: 0.6710, 6: 0.5802},
2: {0: 0.8144, 2: 0.6368, 4: 0.7198, 6: 0.4902},
3: {1: 0.6840, 2: 0.7772, 3: 0.5521, 4: 0.6897},
4: {1: 0.6425, 2: 0.5601, 3: 0.6103},
5: {0: 0.5170, 2: 0.5183, 4: 0.5197, 6: 0.4843}
}
baseline_both = {}
for t in baseline_auc:
if t == 'name':
baseline_both[t] = 'baseline'
continue
else:
baseline_both[t] = {}
for i in baseline_auc[t]:
baseline_both[t][i] = np.array([baseline_auc[t][i], baseline_pauc[t][i]])
def get_experiment(runs, name):
experiment_dict = dict()
for i in range(6):
experiment_dict[i] = dict()
experiment_dict['name'] = name
for experiment in runs:
if experiment['config'].get('id') == name:
machine_dict = experiment_dict.get(experiment['config']['machine_type'])
result = experiment.get('result')
machine_type = INVERSE_CLASS_MAP[experiment['config']['machine_type']]
machine_id = experiment['config']['machine_id']
if result:
machine_dict[experiment['config']['machine_id']] = result.get(
machine_type, {}
).get(
f'json://{machine_id}', -1
).get('py/tuple', [0, 0])[:2]
else:
machine_dict[experiment['config']['machine_id']] = np.array([0, 0])
return experiment_dict
def get_record(experiment):
record = []
for i in range(6):
for j in TRAINING_ID_MAP[i]:
v = experiment.get(i)
if v:
v = v.get(j, [0, 0])
else:
v = np.array([0, 0])
record.append(np.array(v))
assert len(record) == 23
return experiment['name'], record
```
%% Cell type:code id: tags:
``` python
client = MongoClient('mongodb://student2.cp.jku.at:27017/')
experiments = [r for r in client.resnet_gridsearch.runs.find({"experiment.name": "dcase2020_task2_ClassificationExperiment"})]
print(f'Loaded {len(experiments)} runs.')
```
%% Output
Loaded 405 runs.
%% Cell type:code id: tags:
``` python
descriptors = set()
for experiment in experiments:
descriptors = descriptors.union(set([experiment['config']['id']]))
descriptors = list(descriptors)
print(f'Loaded {len(descriptors)} distinct experiments.')
```
%% Output
Loaded 10 distinct experiments.
%% Cell type:code id: tags:
``` python
# Extract Results
# Concatenate Baseline Results
n, m = get_record(baseline_both)
names = [n]
metrics = [np.array(m)]
for descriptor in descriptors:
n, m = get_record(
get_experiment(
experiments,
descriptor
)
)
names.append(n)
metrics.append(np.array(m))
```
%% Cell type:code id: tags:
``` python
data = np.array(metrics)
auc_ranks = []
pauc_ranks = []
idxes = [0, 4, 8, 12, 16, 19, 23]
best_idxes = []
for type_, (i, j) in enumerate(zip(idxes[:-1], idxes[1:])):
average_auc = data[:, i:j, 0].mean(axis=1)
average_pauc = data[:, i:j, 1].mean(axis=1)
best_idx = np.argmax(average_auc + average_pauc)
best_idxes.append(
(best_idx, names[best_idx])
)
print(f'Best Model for Machine Type {type_}: {best_idxes[-1]}')
auc_ranks.append(rankdata(-average_auc))
pauc_ranks.append(rankdata(-average_pauc))
ranks = np.stack([np.array(list(zip(*auc_ranks))), np.array(list(zip(*pauc_ranks)))], axis=-1).mean(axis=-1).mean(axis=-1)
sorted_model_indices = list(np.argsort(ranks))
names = np.array(names)
for i, (n, r, j) in enumerate(zip(names[sorted_model_indices], ranks[sorted_model_indices], sorted_model_indices)):
print(f'{i}-{r}: ID-{j} {n}')
```
%% Output
Best Model for Machine Type 0: (9, 'resnet_gridsearch_normal_loose_1e-4_100_BCE')
Best Model for Machine Type 1: (6, 'resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE')
Best Model for Machine Type 2: (6, 'resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE')
Best Model for Machine Type 3: (5, 'resnet_gridsearch_a_bit_smaller_loose_1e-4_100_BCE')
Best Model for Machine Type 4: (8, 'resnet_gridsearch_normal_loose_1e-5_100_AUC')
Best Model for Machine Type 5: (1, 'resnet_gridsearch_a_bit_smaller_loose_1e-4_100_AUC')
0-3.75: ID-6 resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE
1-4.416666666666667: ID-3 resnet_gridsearch_a_bit_larger_loose_1e-4_100_AUC
2-4.833333333333333: ID-4 resnet_gridsearch_a_bit_larger_loose_1e-5_100_BCE
3-5.083333333333333: ID-5 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_BCE
4-5.166666666666667: ID-10 resnet_gridsearch_a_bit_larger_loose_1e-5_100_AUC
5-5.583333333333333: ID-9 resnet_gridsearch_normal_loose_1e-4_100_BCE
6-5.916666666666667: ID-2 resnet_gridsearch_normal_loose_1e-5_100_BCE
7-6.083333333333333: ID-7 resnet_gridsearch_normal_loose_1e-4_100_AUC
8-6.916666666666667: ID-1 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_AUC
9-7.416666666666667: ID-8 resnet_gridsearch_normal_loose_1e-5_100_AUC
10-10.833333333333334: ID-0 baseline
%% Cell type:code id: tags:
``` python
# Create Submission 1
```
%% Cell type:code id: tags:
``` python
from dcase2020_task2.data_sets import INVERSE_CLASS_MAP, EVALUATION_ID_MAP
import os
from shutil import copyfile
best_model_folder = names[sorted_model_indices[0]]
for machine_type in range(6):
for model_id in EVALUATION_ID_MAP[machine_type]:
machine_type_str = INVERSE_CLASS_MAP[machine_type]
src_path = os.path.join('..', 'experiment_logs', best_model_folder)
src = os.path.join(src_path, f'anomaly_score_{machine_type_str}_id_{model_id}_mean.csv')
dst_path = os.path.join('..', 'submission_package', 'task2', 'Primus_CP-JKU_task2_1')
dst = os.path.join(dst_path, f'anomaly_score_{machine_type_str}_id_{model_id}.csv')
dst = os.path.join(dst_path, f'anomaly_score_{machine_type_str}_id_{model_id:02d}.csv')
copyfile(src, dst)
```
%% Cell type:code id: tags:
``` python
# Create Submission 2
```
%% Cell type:code id: tags:
``` python
from dcase2020_task2.data_sets import INVERSE_CLASS_MAP, EVALUATION_ID_MAP
import os
from shutil import copyfile
for machine_type, (idx, folder_name) in enumerate(best_idxes):
for model_id in EVALUATION_ID_MAP[machine_type]:
machine_type_str = INVERSE_CLASS_MAP[machine_type]
src_path = os.path.join('..', 'experiment_logs', folder_name)
src = os.path.join(src_path, f'anomaly_score_{machine_type_str}_id_{model_id}_mean.csv')
dst_path = os.path.join('..', 'submission_package', 'task2', 'Primus_CP-JKU_task2_2')
dst = os.path.join(dst_path, f'anomaly_score_{machine_type_str}_id_{model_id}.csv')
dst = os.path.join(dst_path, f'anomaly_score_{machine_type_str}_id_{model_id:02d}.csv')
copyfile(src, dst)
```
%% Cell type:code id: tags:
``` python
best_idxes
```
%% Output
[(9, 'resnet_gridsearch_normal_loose_1e-4_100_BCE'),\n (6, 'resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE'),\n (6, 'resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE'),\n (5, 'resnet_gridsearch_a_bit_smaller_loose_1e-4_100_BCE'),\n (8, 'resnet_gridsearch_normal_loose_1e-5_100_AUC'),\n (1, 'resnet_gridsearch_a_bit_smaller_loose_1e-4_100_AUC')]
%% Cell type:code id: tags:
``` python
```
......
......@@ -5,16 +5,16 @@ submission:
# Label is used to index submissions.
# Generate your label following way to avoid overlapping codes among submissions:
# [Last name of corresponding author]_[Abbreviation of institute of the corresponding author]_task[task number]_[index number of your submission (1-4)]
label: Primus_JKU_task2_1
label: Primus_CP-JKU_task2_1
# Submission name
# This name will be used in the results tables when space permits.
name: Outlier Exposed Convolutional Classifier
name: Vanilla Outlier Exposed ResNet
# Submission name abbreviated
# This abbreviated name will be used in the results table when space is tight.
# Use a maximum of 10 characters.
abbreviation: OECC
abbreviation: OER
# Authors of the submitted system.
# Mark authors in the order you want them to appear in submission lists.
......@@ -102,7 +102,8 @@ system:
# In case of ensemble approaches, add up parameters for all subsystems.
# In case embeddings are used, add up parameter count of the embedding extraction networks and classification network.
# Use numerical value.
total_parameters: 269992
# TODO
total_parameters: 20000000
# List of external datasets used in the submission.
# Development dataset is used here only as an example, list only external datasets
......@@ -116,7 +117,8 @@ system:
# URL to the source code of the system [optional, highly recommended]
# Reproducibility will be used to evaluate submitted systems.
source_code: https://github.com/y-kawagu/dcase2020_task2_baseline
# TODO
source_code: !!null
# System results
results:
......@@ -128,6 +130,7 @@ results:
# Average of AUCs over all Machine IDs [%]
# No need to round numbers
# TODO:
ToyCar:
averaged_auc: 78.77
averaged_pauc: 67.58
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment