Commit 483f75aa authored by Paul Primus's avatar Paul Primus
Browse files

add final submission package

parent dfebc6ca
%% Cell type:code id: tags:
``` python
from pymongo import MongoClient
from matplotlib import pyplot as plt
import numpy as np
from dcase2020_task2.data_sets.mcm_dataset import INVERSE_CLASS_MAP, TRAINING_ID_MAP, CLASS_MAP
from scipy.stats import rankdata
baseline_auc = {
'name': 'baseline',
0: {0: 0.5441, 2: 0.7340, 4: 0.6161, 6: 0.7392},
1: {0: 0.6715, 2: 0.6153, 4: 0.8833, 6: 0.7455},
2: {0: 0.9619, 2: 0.7897, 4: 0.9430, 6: 0.6959},
3: {1: 0.8136, 2: 0.8597, 3: 0.6330, 4: 0.8445},
4: {1: 0.7807, 2: 0.6416, 3: 0.7535},
5: {0: 0.6876, 2: 0.6818, 4: 0.7430, 6: 0.5390}
}
baseline_pauc = {
'name': 'baseline',
0: {0: 0.4937, 2: 0.5481, 4: 0.5326, 6: 0.5235},
1: {0: 0.5674, 2: 0.5810, 4: 0.6710, 6: 0.5802},
2: {0: 0.8144, 2: 0.6368, 4: 0.7198, 6: 0.4902},
3: {1: 0.6840, 2: 0.7772, 3: 0.5521, 4: 0.6897},
4: {1: 0.6425, 2: 0.5601, 3: 0.6103},
5: {0: 0.5170, 2: 0.5183, 4: 0.5197, 6: 0.4843}
}
baseline_both = {}
for t in baseline_auc:
if t == 'name':
baseline_both[t] = 'baseline'
continue
else:
baseline_both[t] = {}
for i in baseline_auc[t]:
baseline_both[t][i] = np.array([baseline_auc[t][i], baseline_pauc[t][i]])
def get_experiment(runs, name):
experiment_dict = dict()
for i in range(6):
experiment_dict[i] = dict()
experiment_dict['name'] = name
for experiment in runs:
if experiment['config'].get('id') == name:
machine_dict = experiment_dict.get(experiment['config']['machine_type'])
result = experiment.get('result')
machine_type = INVERSE_CLASS_MAP[experiment['config']['machine_type']]
machine_id = experiment['config']['machine_id']
if result:
machine_dict[experiment['config']['machine_id']] = result.get(
machine_type, {}
).get(
f'json://{machine_id}', -1
).get('py/tuple', [0, 0])[:2]
else:
machine_dict[experiment['config']['machine_id']] = np.array([0, 0])
return experiment_dict
def get_record(experiment):
record = []
for i in range(6):
for j in TRAINING_ID_MAP[i]:
v = experiment.get(i)
if v:
v = v.get(j, [0, 0])
else:
v = np.array([0, 0])
record.append(np.array(v))
assert len(record) == 23
return experiment['name'], record
```
%% Cell type:code id: tags:
``` python
client = MongoClient('mongodb://student2.cp.jku.at:27017/')
experiments = [r for r in client.resnet_gridsearch.runs.find({"experiment.name": "dcase2020_task2_ClassificationExperiment"})]
print(f'Loaded {len(experiments)} runs.')
```
%%%% Output: stream
Loaded 563 runs.
Loaded 572 runs.
%% Cell type:code id: tags:
``` python
descriptors = set()
for experiment in experiments:
descriptors = descriptors.union(set([experiment['config']['id']]))
descriptors = list(descriptors)
descriptors = [d for d in descriptors if d.split('_')[-1] != 'rerun']
print(f'Loaded {len(descriptors)} distinct experiments.')
```
%%%% Output: stream
Loaded 13 distinct experiments.
%% Cell type:code id: tags:
``` python
# Extract Results
# Concatenate Baseline Results
n, m = get_record(baseline_both)
names = [n]
metrics = [np.array(m)]
for descriptor in descriptors:
n, m = get_record(
get_experiment(
experiments,
descriptor
)
)
names.append(n)
metrics.append(np.array(m))
```
%% Cell type:code id: tags:
``` python
data = np.array(metrics)
auc_ranks = []
pauc_ranks = []
idxes = [0, 4, 8, 12, 16, 19, 23]
for type_, (i, j) in enumerate(zip(idxes[:-1], idxes[1:])):
average_auc = data[:, i:j, 0].mean(axis=1)
average_pauc = data[:, i:j, 1].mean(axis=1)
print(f'Best Model for Machine Type {type_}: {np.argsort(average_auc + average_pauc)[::-1]}')
auc_ranks.append(rankdata(-average_auc))
pauc_ranks.append(rankdata(-average_pauc))
```
%%%% Output: stream
Best Model for Machine Type 0: [ 1 6 5 12 11 9 2 10 3 7 13 4 8 0]
Best Model for Machine Type 1: [ 1 10 6 9 3 5 13 2 11 12 7 8 4 0]
Best Model for Machine Type 2: [10 5 12 1 11 3 2 13 9 6 4 8 7 0]
Best Model for Machine Type 3: [ 2 12 4 5 11 6 10 3 1 13 9 8 7 0]
Best Model for Machine Type 4: [13 3 12 11 10 9 7 6 8 4 5 2 0 1]
Best Model for Machine Type 5: [ 4 8 2 7 10 5 9 6 13 11 12 3 0 1]
Best Model for Machine Type 4: [13 3 12 11 10 9 7 6 8 4 5 2 1 0]
Best Model for Machine Type 5: [ 1 4 8 2 7 10 5 9 6 13 11 12 3 0]
%% Cell type:code id: tags:
``` python
ranks = np.stack([np.array(list(zip(*auc_ranks))), np.array(list(zip(*pauc_ranks)))], axis=-1).mean(axis=-1).mean(axis=-1)
indices = list(np.argsort(ranks))
names = np.array(names)
for i, (n, r, j) in enumerate(zip(names[indices], ranks[indices], indices)):
print(f'{i}: ID-{j} {n}')
```
%%%% Output: stream
0: ID-10 resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE
1: ID-5 resnet_gridsearch_a_bit_larger_loose_1e-4_100_AUC
2: ID-12 resnet_gridsearch_a_bit_larger_loose_1e-5_100_BCE
3: ID-2 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_BCE
4: ID-11 resnet_gridsearch_a_bit_larger_loose_1e-5_100_AUC
5: ID-6 resnet_gridsearch_normal_loose_1e-4_100_BCE
6: ID-1 resnet_gridsearch_2_a_bit_larger_loose_1e-4_0.99_100_BCE
0: ID-1 resnet_gridsearch_2_a_bit_larger_loose_1e-4_0.99_100_BCE
1: ID-10 resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE
2: ID-5 resnet_gridsearch_a_bit_larger_loose_1e-4_100_AUC
3: ID-12 resnet_gridsearch_a_bit_larger_loose_1e-5_100_BCE
4: ID-2 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_BCE
5: ID-11 resnet_gridsearch_a_bit_larger_loose_1e-5_100_AUC
6: ID-6 resnet_gridsearch_normal_loose_1e-4_100_BCE
7: ID-3 resnet_gridsearch_normal_loose_1e-5_100_BCE
8: ID-9 resnet_gridsearch_normal_loose_1e-4_100_AUC
9: ID-4 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_AUC
10: ID-13 resnet_gridsearch_normal_loose_1e-5_100_AUC
11: ID-7 resnet_gridsearch_a_bit_smaller_loose_1e-5_100_AUC
12: ID-8 resnet_gridsearch_a_bit_smaller_loose_1e-5_100_BCE
13: ID-0 baseline
%% Cell type:code id: tags:
``` python
indices
```
%%%% Output: execute_result
[10, 5, 12, 2, 11, 6, 1, 3, 9, 4, 13, 7, 8, 0]
[1, 10, 5, 12, 2, 11, 6, 3, 9, 4, 13, 7, 8, 0]
%% Cell type:code id: tags:
``` python
metric = 'auroc_mean'
bar_width = 0.6
bar_spacing=0.00
top = 10
top_k_data = data[[10, 1], :, 0 if metric == 'auroc_mean' else 1][:top]
baseline_data = data[0:1, :, 0 if metric == 'auroc_mean' else 1]
to_visualize = np.concatenate([baseline_data, top_k_data])
plt.figure(figsize=(20,10))
plt.rcParams.update({'font.size': 22})
plt.title(f'{metric}')
labels = []
for i in range(6):
for j in TRAINING_ID_MAP[i]:
labels.append("{}, {}".format(INVERSE_CLASS_MAP[i][:6], j))
for i, d in enumerate(to_visualize):
plt.bar(
np.arange(len(labels)) + i * (bar_width / len(to_visualize) + bar_spacing),
d,
bar_width/ len(to_visualize),
)
plt.xticks(np.arange(len(labels)), labels, rotation='vertical')
plt.ylim(0.5, 1.)
plt.yticks(np.arange(0.5, 1., 0.1))
plt.grid()
plt.savefig(f'top_{top}_auc.png')
plt.show()
data.shape
```
%%%% Output: display_data
![]()
%%%% Output: execute_result
(14, 23, 2)
%% Cell type:code id: tags:
``` python
metric = 'pauroc_mean'
bar_width = 0.6
bar_spacing=0.00
top = 10
top_k_data = data[indices, :, 0 if metric == 'auroc_mean' else 1][:top]
baseline_data = data[0:1, :, 0 if metric == 'auroc_mean' else 1]
to_visualize = np.concatenate([baseline_data, top_k_data])
plt.figure(figsize=(20,10))
plt.rcParams.update({'font.size': 22})
plt.title(f'{metric}')
labels = []
for i in range(6):
for j in TRAINING_ID_MAP[i]:
labels.append("{}, {}".format(INVERSE_CLASS_MAP[i][:6], j))
for i, d in enumerate(to_visualize):
plt.bar(
np.arange(len(labels)) + i * (bar_width / len(to_visualize) + bar_spacing),
d,
bar_width/ len(to_visualize),
)
plt.xticks(np.arange(len(labels)), labels, rotation='vertical')
plt.ylim(0.45, 1.)
plt.yticks(np.arange(0.5, 1., 0.1))
plt.grid()
plt.savefig(f'top_{top}_auc.png')
plt.show()
data.shape
```
%%%% Output: display_data
![]()
%%%% Output: execute_result
(11, 23, 2)
%% Cell type:code id: tags:
``` python
```
......
This diff is collapsed.
%% Cell type:code id: tags:
``` python
from pymongo import MongoClient
from matplotlib import pyplot as plt
import numpy as np
from dcase2020_task2.data_sets.mcm_dataset import INVERSE_CLASS_MAP, TRAINING_ID_MAP, EVALUATION_ID_MAP, CLASS_MAP
from scipy.stats import rankdata
import os
from shutil import copyfile
import pandas as pd
baseline_auc = {
'name': 'baseline',
0: {0: 0.5441, 2: 0.7340, 4: 0.6161, 6: 0.7392},
1: {0: 0.6715, 2: 0.6153, 4: 0.8833, 6: 0.7455},
2: {0: 0.9619, 2: 0.7897, 4: 0.9430, 6: 0.6959},
3: {1: 0.8136, 2: 0.8597, 3: 0.6330, 4: 0.8445},
4: {1: 0.7807, 2: 0.6416, 3: 0.7535},
5: {0: 0.6876, 2: 0.6818, 4: 0.7430, 6: 0.5390}
}
baseline_pauc = {
'name': 'baseline',
0: {0: 0.4937, 2: 0.5481, 4: 0.5326, 6: 0.5235},
1: {0: 0.5674, 2: 0.5810, 4: 0.6710, 6: 0.5802},
2: {0: 0.8144, 2: 0.6368, 4: 0.7198, 6: 0.4902},
3: {1: 0.6840, 2: 0.7772, 3: 0.5521, 4: 0.6897},
4: {1: 0.6425, 2: 0.5601, 3: 0.6103},
5: {0: 0.5170, 2: 0.5183, 4: 0.5197, 6: 0.4843}
}
baseline_both = {}
for t in baseline_auc:
if t == 'name':
baseline_both[t] = 'baseline'
continue
else:
baseline_both[t] = {}
for i in baseline_auc[t]:
baseline_both[t][i] = np.array([baseline_auc[t][i], baseline_pauc[t][i]])
def get_experiment(runs, name):
experiment_dict = dict()
for i in range(6):
experiment_dict[i] = dict()
experiment_dict['name'] = name
for experiment in runs:
if experiment['config'].get('id') == name:
machine_dict = experiment_dict.get(experiment['config']['machine_type'])
result = experiment.get('result')
machine_type = INVERSE_CLASS_MAP[experiment['config']['machine_type']]
machine_id = experiment['config']['machine_id']
if result:
machine_dict[experiment['config']['machine_id']] = result.get(
machine_type, {}
).get(
f'json://{machine_id}', -1
).get('py/tuple', [0, 0])[:2]
else:
machine_dict[experiment['config']['machine_id']] = np.array([0, 0])
return experiment_dict
def get_record(experiment):
record = []
for i in range(6):
for j in TRAINING_ID_MAP[i]:
v = experiment.get(i)
if v:
v = v.get(j, [0, 0])
else:
v = np.array([0, 0])
record.append(np.array(v))
assert len(record) == 23
return experiment['name'], record
```
%% Cell type:code id: tags:
``` python
client = MongoClient('mongodb://student2.cp.jku.at:27017/')
experiments = [r for r in client.resnet_gridsearch.runs.find({"experiment.name": "dcase2020_task2_ClassificationExperiment"})]
print(f'Loaded {len(experiments)} runs.')
```
%%%% Output: stream
Loaded 563 runs.
Loaded 572 runs.
%% Cell type:code id: tags:
``` python
descriptors = set()
for experiment in experiments:
descriptors = descriptors.union(set([experiment['config']['id']]))
descriptors = list(descriptors)
print(f'Loaded {len(descriptors)} distinct experiments.')
```
%%%% Output: stream
Loaded 25 distinct experiments.
%% Cell type:code id: tags:
``` python
descriptors = [d for d in descriptors if d.split('_')[-1] != 'rerun']
descriptors = [d for d in descriptors if d.split('_')[2] != '2']
# descriptors = [d for d in descriptors if d.split('_')[2] != '2']
# for descriptor in descriptors:
# print(descriptor)
print(f'Loaded {len(descriptors)} distinct experiments, without reruns.')
```
%%%% Output: stream
Loaded 12 distinct experiments, without reruns.
Loaded 13 distinct experiments, without reruns.
%% Cell type:code id: tags:
``` python
# Extract Results
# Concatenate Baseline Results
n, m = get_record(baseline_both)
names = [n]
metrics = [np.array(m)]
for descriptor in descriptors:
n, m = get_record(
get_experiment(
experiments,
descriptor
)
)
names.append(n)
metrics.append(np.array(m))
```
%% Cell type:code id: tags:
``` python
data = np.array(metrics)
auc_ranks = []
pauc_ranks = []
idxes = [0, 4, 8, 12, 16, 19, 23]
best_idxes = []
for type_, (i, j) in enumerate(zip(idxes[:-1], idxes[1:])):
average_auc = data[:, i:j, 0].mean(axis=1)
average_pauc = data[:, i:j, 1].mean(axis=1)
best_idxes.append(
np.argsort(average_auc + average_pauc)[::-1]
)
print(f'Best Model for Machine Type {type_}: {best_idxes[-1]}')
auc_ranks.append(rankdata(-average_auc))
pauc_ranks.append(rankdata(-average_pauc))
ranks = np.stack([np.array(list(zip(*auc_ranks))), np.array(list(zip(*pauc_ranks)))], axis=-1).mean(axis=-1).mean(axis=-1)
sorted_model_indices = list(np.argsort(ranks))
names = np.array(names)
for i, (n, r, j) in enumerate(zip(names[sorted_model_indices], ranks[sorted_model_indices], sorted_model_indices)):
print(f'{i:02d}: ID-{j:02d} {n}')
```
%%%% Output: stream
Best Model for Machine Type 0: [ 2 9 1 5 6 10 4 8 7 12 11 3 0]
Best Model for Machine Type 1: [ 4 2 6 8 9 12 10 5 1 7 3 11 0]
Best Model for Machine Type 2: [ 4 9 1 5 8 10 12 6 2 11 3 7 0]
Best Model for Machine Type 3: [10 1 11 9 5 2 4 8 12 6 3 7 0]
Best Model for Machine Type 4: [12 8 1 5 4 6 7 2 3 11 9 10 0]
Best Model for Machine Type 5: [11 3 10 7 4 9 6 2 12 5 1 8 0]
Best Model for Machine Type 0: [ 8 2 10 1 5 6 11 4 9 7 13 12 3 0]
Best Model for Machine Type 1: [ 8 4 2 6 9 10 13 11 5 1 7 3 12 0]
Best Model for Machine Type 2: [ 4 10 1 8 5 9 11 13 6 2 12 3 7 0]
Best Model for Machine Type 3: [11 1 12 10 5 2 4 9 8 13 6 3 7 0]
Best Model for Machine Type 4: [13 9 1 5 4 6 7 2 3 12 10 11 8 0]
Best Model for Machine Type 5: [ 8 12 3 11 7 4 10 6 2 13 5 1 9 0]
00: ID-04 resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE
01: ID-09 resnet_gridsearch_a_bit_larger_loose_1e-4_100_AUC
02: ID-01 resnet_gridsearch_a_bit_larger_loose_1e-5_100_BCE
03: ID-10 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_BCE
04: ID-05 resnet_gridsearch_a_bit_larger_loose_1e-5_100_AUC
05: ID-02 resnet_gridsearch_normal_loose_1e-4_100_BCE
06: ID-08 resnet_gridsearch_normal_loose_1e-5_100_BCE
07: ID-06 resnet_gridsearch_normal_loose_1e-4_100_AUC
08: ID-12 resnet_gridsearch_normal_loose_1e-5_100_AUC
09: ID-11 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_AUC
10: ID-07 resnet_gridsearch_a_bit_smaller_loose_1e-5_100_AUC
11: ID-03 resnet_gridsearch_a_bit_smaller_loose_1e-5_100_BCE
12: ID-00 baseline
01: ID-08 resnet_gridsearch_2_a_bit_larger_loose_1e-4_0.99_100_BCE
02: ID-10 resnet_gridsearch_a_bit_larger_loose_1e-4_100_AUC
03: ID-01 resnet_gridsearch_a_bit_larger_loose_1e-5_100_BCE
04: ID-11 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_BCE
05: ID-05 resnet_gridsearch_a_bit_larger_loose_1e-5_100_AUC
06: ID-02 resnet_gridsearch_normal_loose_1e-4_100_BCE
07: ID-09 resnet_gridsearch_normal_loose_1e-5_100_BCE
08: ID-06 resnet_gridsearch_normal_loose_1e-4_100_AUC
09: ID-12 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_AUC
10: ID-13 resnet_gridsearch_normal_loose_1e-5_100_AUC
11: ID-07 resnet_gridsearch_a_bit_smaller_loose_1e-5_100_AUC
12: ID-03 resnet_gridsearch_a_bit_smaller_loose_1e-5_100_BCE
13: ID-00 baseline
%% Cell type:code id: tags:
``` python
import sklearn
def compute_auc(src):
scores = pd.read_csv(src, names=['file_name', 'score'], index_col=False).to_numpy()[:, 1]
names = pd.read_csv(src, names=['file_name', 'score'], index_col=False).to_numpy()[:, 0]
names = np.array([1 if name.split('_')[0] == 'anomaly' else 0 for name in names])
return sklearn.metrics.roc_auc_score(names, scores), sklearn.metrics.roc_auc_score(names, scores, max_fpr=0.1)
run_ids = names
```
%% Cell type:code id: tags:
``` python
# Create Submission 1
```
%% Cell type:code id: tags:
``` python
for machine_type in range(6):
for machine_id in EVALUATION_ID_MAP[machine_type]:
best_model_folder = run_ids[sorted_model_indices[0]]
src_path = os.path.join('..', 'experiment_logs', best_model_folder)
src = os.path.join(src_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id}_mean.csv')
dst_path = os.path.join('..', 'submission_package', 'task2', 'Primus_CP-JKU_task2_1')
dst = os.path.join(dst_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id:02d}.csv')
copyfile(src, dst)
```
%% Cell type:code id: tags:
``` python
for machine_type in range(6):
auc = []
pauc = []
for machine_id in TRAINING_ID_MAP[machine_type]:
best_model_folder = run_ids[sorted_model_indices[0]]
src_path = os.path.join('..', 'experiment_logs', best_model_folder)
src = os.path.join(src_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id}_mean.csv')
a, p = compute_auc(src)
<