Commit c6f82fba authored by Paul Primus's avatar Paul Primus
Browse files

add final submission package

parent 483f75aa
%% Cell type:code id: tags:
``` python
from pymongo import MongoClient
from matplotlib import pyplot as plt
import numpy as np
from dcase2020_task2.data_sets.mcm_dataset import INVERSE_CLASS_MAP, TRAINING_ID_MAP, EVALUATION_ID_MAP, CLASS_MAP
from scipy.stats import rankdata
import os
from shutil import copyfile
import pandas as pd
baseline_auc = {
'name': 'baseline',
0: {0: 0.5441, 2: 0.7340, 4: 0.6161, 6: 0.7392},
1: {0: 0.6715, 2: 0.6153, 4: 0.8833, 6: 0.7455},
2: {0: 0.9619, 2: 0.7897, 4: 0.9430, 6: 0.6959},
3: {1: 0.8136, 2: 0.8597, 3: 0.6330, 4: 0.8445},
4: {1: 0.7807, 2: 0.6416, 3: 0.7535},
5: {0: 0.6876, 2: 0.6818, 4: 0.7430, 6: 0.5390}
}
baseline_pauc = {
'name': 'baseline',
0: {0: 0.4937, 2: 0.5481, 4: 0.5326, 6: 0.5235},
1: {0: 0.5674, 2: 0.5810, 4: 0.6710, 6: 0.5802},
2: {0: 0.8144, 2: 0.6368, 4: 0.7198, 6: 0.4902},
3: {1: 0.6840, 2: 0.7772, 3: 0.5521, 4: 0.6897},
4: {1: 0.6425, 2: 0.5601, 3: 0.6103},
5: {0: 0.5170, 2: 0.5183, 4: 0.5197, 6: 0.4843}
}
baseline_both = {}
for t in baseline_auc:
if t == 'name':
baseline_both[t] = 'baseline'
continue
else:
baseline_both[t] = {}
for i in baseline_auc[t]:
baseline_both[t][i] = np.array([baseline_auc[t][i], baseline_pauc[t][i]])
def get_experiment(runs, name):
experiment_dict = dict()
for i in range(6):
experiment_dict[i] = dict()
experiment_dict['name'] = name
for experiment in runs:
if experiment['config'].get('id') == name:
machine_dict = experiment_dict.get(experiment['config']['machine_type'])
result = experiment.get('result')
machine_type = INVERSE_CLASS_MAP[experiment['config']['machine_type']]
machine_id = experiment['config']['machine_id']
if result:
machine_dict[experiment['config']['machine_id']] = result.get(
machine_type, {}
).get(
f'json://{machine_id}', -1
).get('py/tuple', [0, 0])[:2]
else:
machine_dict[experiment['config']['machine_id']] = np.array([0, 0])
return experiment_dict
def get_record(experiment):
record = []
for i in range(6):
for j in TRAINING_ID_MAP[i]:
v = experiment.get(i)
if v:
v = v.get(j, [0, 0])
else:
v = np.array([0, 0])
record.append(np.array(v))
assert len(record) == 23
return experiment['name'], record
```
%% Cell type:code id: tags:
``` python
client = MongoClient('mongodb://student2.cp.jku.at:27017/')
experiments = [r for r in client.resnet_gridsearch.runs.find({"experiment.name": "dcase2020_task2_ClassificationExperiment"})]
print(f'Loaded {len(experiments)} runs.')
```
%%%% Output: stream
Loaded 572 runs.
%% Cell type:code id: tags:
``` python
descriptors = set()
for experiment in experiments:
descriptors = descriptors.union(set([experiment['config']['id']]))
descriptors = list(descriptors)
print(f'Loaded {len(descriptors)} distinct experiments.')
```
%%%% Output: stream
Loaded 25 distinct experiments.
%% Cell type:code id: tags:
``` python
descriptors = [d for d in descriptors if d.split('_')[-1] != 'rerun']
# descriptors = [d for d in descriptors if d.split('_')[2] != '2']
# for descriptor in descriptors:
# print(descriptor)
print(f'Loaded {len(descriptors)} distinct experiments, without reruns.')
```
%%%% Output: stream
Loaded 13 distinct experiments, without reruns.
%% Cell type:code id: tags:
``` python
# Extract Results
# Concatenate Baseline Results
n, m = get_record(baseline_both)
names = [n]
metrics = [np.array(m)]
for descriptor in descriptors:
n, m = get_record(
get_experiment(
experiments,
descriptor
)
)
names.append(n)
metrics.append(np.array(m))
```
%% Cell type:code id: tags:
``` python
data = np.array(metrics)
auc_ranks = []
pauc_ranks = []
idxes = [0, 4, 8, 12, 16, 19, 23]
best_idxes = []
for type_, (i, j) in enumerate(zip(idxes[:-1], idxes[1:])):
average_auc = data[:, i:j, 0].mean(axis=1)
average_pauc = data[:, i:j, 1].mean(axis=1)
best_idxes.append(
np.argsort(average_auc + average_pauc)[::-1]
)
print(f'Best Model for Machine Type {type_}: {best_idxes[-1]}')
auc_ranks.append(rankdata(-average_auc))
pauc_ranks.append(rankdata(-average_pauc))
ranks = np.stack([np.array(list(zip(*auc_ranks))), np.array(list(zip(*pauc_ranks)))], axis=-1).mean(axis=-1).mean(axis=-1)
sorted_model_indices = list(np.argsort(ranks))
names = np.array(names)
for i, (n, r, j) in enumerate(zip(names[sorted_model_indices], ranks[sorted_model_indices], sorted_model_indices)):
print(f'{i:02d}: ID-{j:02d} {n}')
```
%%%% Output: stream
Best Model for Machine Type 0: [ 8 2 10 1 5 6 11 4 9 7 13 12 3 0]
Best Model for Machine Type 1: [ 8 4 2 6 9 10 13 11 5 1 7 3 12 0]
Best Model for Machine Type 2: [ 4 10 1 8 5 9 11 13 6 2 12 3 7 0]
Best Model for Machine Type 3: [11 1 12 10 5 2 4 9 8 13 6 3 7 0]
Best Model for Machine Type 4: [13 9 1 5 4 6 7 2 3 12 10 11 8 0]
Best Model for Machine Type 5: [ 8 12 3 11 7 4 10 6 2 13 5 1 9 0]
00: ID-04 resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE
01: ID-08 resnet_gridsearch_2_a_bit_larger_loose_1e-4_0.99_100_BCE
02: ID-10 resnet_gridsearch_a_bit_larger_loose_1e-4_100_AUC
03: ID-01 resnet_gridsearch_a_bit_larger_loose_1e-5_100_BCE
04: ID-11 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_BCE
Best Model for Machine Type 0: [ 8 9 11 6 5 3 7 12 10 1 13 4 2 0]
Best Model for Machine Type 1: [ 8 12 9 3 10 11 13 7 5 6 1 2 4 0]
Best Model for Machine Type 2: [12 11 6 8 5 10 7 13 3 9 4 2 1 0]
Best Model for Machine Type 3: [ 7 6 4 11 5 9 12 10 8 13 3 2 1 0]
Best Model for Machine Type 4: [13 10 6 5 12 3 1 9 2 4 11 7 8 0]
Best Model for Machine Type 5: [ 8 4 2 7 1 12 11 3 9 13 5 6 10 0]
00: ID-08 resnet_gridsearch_2_a_bit_larger_loose_1e-4_0.99_100_BCE
01: ID-12 resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE
02: ID-11 resnet_gridsearch_a_bit_larger_loose_1e-4_100_AUC
03: ID-06 resnet_gridsearch_a_bit_larger_loose_1e-5_100_BCE
04: ID-07 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_BCE
05: ID-05 resnet_gridsearch_a_bit_larger_loose_1e-5_100_AUC
06: ID-02 resnet_gridsearch_normal_loose_1e-4_100_BCE
07: ID-09 resnet_gridsearch_normal_loose_1e-5_100_BCE
08: ID-06 resnet_gridsearch_normal_loose_1e-4_100_AUC
09: ID-12 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_AUC
06: ID-09 resnet_gridsearch_normal_loose_1e-4_100_BCE
07: ID-10 resnet_gridsearch_normal_loose_1e-5_100_BCE
08: ID-03 resnet_gridsearch_normal_loose_1e-4_100_AUC
09: ID-04 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_AUC
10: ID-13 resnet_gridsearch_normal_loose_1e-5_100_AUC
11: ID-07 resnet_gridsearch_a_bit_smaller_loose_1e-5_100_AUC
12: ID-03 resnet_gridsearch_a_bit_smaller_loose_1e-5_100_BCE
11: ID-01 resnet_gridsearch_a_bit_smaller_loose_1e-5_100_AUC
12: ID-02 resnet_gridsearch_a_bit_smaller_loose_1e-5_100_BCE
13: ID-00 baseline
%% Cell type:code id: tags:
``` python
import sklearn
def compute_auc(src):
scores = pd.read_csv(src, names=['file_name', 'score'], index_col=False).to_numpy()[:, 1]
names = pd.read_csv(src, names=['file_name', 'score'], index_col=False).to_numpy()[:, 0]
names = np.array([1 if name.split('_')[0] == 'anomaly' else 0 for name in names])
return sklearn.metrics.roc_auc_score(names, scores), sklearn.metrics.roc_auc_score(names, scores, max_fpr=0.1)
run_ids = names
```
%% Cell type:code id: tags:
``` python
# Create Submission 1
```
%% Cell type:code id: tags:
``` python
print(run_ids[sorted_model_indices[0]])
for machine_type in range(6):
for machine_id in EVALUATION_ID_MAP[machine_type]:
best_model_folder = run_ids[sorted_model_indices[0]]
src_path = os.path.join('..', 'experiment_logs', best_model_folder)
src = os.path.join(src_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id}_mean.csv')
dst_path = os.path.join('..', 'submission_package', 'task2', 'Primus_CP-JKU_task2_1')
dst = os.path.join(dst_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id:02d}.csv')
copyfile(src, dst)
```
%%%% Output: stream
resnet_gridsearch_2_a_bit_larger_loose_1e-4_0.99_100_BCE
%% Cell type:code id: tags:
``` python
print(run_ids[sorted_model_indices[0]])
for machine_type in range(6):
auc = []
pauc = []
for machine_id in TRAINING_ID_MAP[machine_type]:
best_model_folder = run_ids[sorted_model_indices[0]]
src_path = os.path.join('..', 'experiment_logs', best_model_folder)
src = os.path.join(src_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id}_mean.csv')
a, p = compute_auc(src)
auc.append(a)
pauc.append(p)
print(f'\t{INVERSE_CLASS_MAP[machine_type]}:\n\t\taveraged_auc: {np.mean(auc)}\n\t\taveraged_pauc: {np.mean(pauc)}')
```
%%%% Output: stream
resnet_gridsearch_2_a_bit_larger_loose_1e-4_0.99_100_BCE
fan:
averaged_auc: 0.9226685446121019
averaged_pauc: 0.8230212986543588
averaged_auc: 0.9344751194772806
averaged_pauc: 0.8364269016605993
pump:
averaged_auc: 0.9297781495399142
averaged_pauc: 0.8722867745313565
averaged_auc: 0.9300024538524538
averaged_pauc: 0.8738435129147204
slider:
averaged_auc: 0.9894779962546816
averaged_pauc: 0.9454464813719693
averaged_auc: 0.9836423220973782
averaged_pauc: 0.9154346540508576
ToyCar:
averaged_auc: 0.9489897900841298
averaged_pauc: 0.8752479934828495
averaged_auc: 0.931301008739688
averaged_pauc: 0.8765960785325235
ToyConveyor:
averaged_auc: 0.83764533636162
averaged_pauc: 0.7279798253268154
averaged_auc: 0.798378625478885
averaged_pauc: 0.7003532276454104
valve:
averaged_auc: 0.9424583333333334
averaged_pauc: 0.8903508771929824
averaged_auc: 0.9858539915966387
averaged_pauc: 0.9664464469998526
%% Cell type:code id: tags:
``` python
# Create Submission 2
```
%% Cell type:code id: tags:
``` python
for machine_type, idxes in enumerate(best_idxes):
for machine_id in EVALUATION_ID_MAP[machine_type]:
idx = idxes[0]
best_model_folder = run_ids[idx]
src_path = os.path.join('..', 'experiment_logs', best_model_folder)
src = os.path.join(src_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id}_mean.csv')
dst_path = os.path.join('..', 'submission_package', 'task2', 'Primus_CP-JKU_task2_2')
dst = os.path.join(dst_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id:02d}.csv')
copyfile(src, dst)
```
%% Cell type:code id: tags:
``` python
for machine_type, idxes in enumerate(best_idxes):
auc = []
pauc = []
for machine_id in TRAINING_ID_MAP[machine_type]:
idx = idxes[0]
best_model_folder = run_ids[idx]
src_path = os.path.join('..', 'experiment_logs', best_model_folder)
src = os.path.join(src_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id}_mean.csv')
a, p = compute_auc(src)
auc.append(a)
pauc.append(p)
print(f'\t{INVERSE_CLASS_MAP[machine_type]}:\n\t\taveraged_auc: {np.mean(auc)}\n\t\taveraged_pauc: {np.mean(pauc)}')
```
%%%% Output: stream
fan:
averaged_auc: 0.9286317167841518
averaged_pauc: 0.8352913487070679
averaged_auc: 0.9344751194772806
averaged_pauc: 0.8364269016605993
pump:
averaged_auc: 0.9297781495399142
averaged_pauc: 0.8722867745313565
averaged_auc: 0.9300024538524538
averaged_pauc: 0.8738435129147204
slider:
averaged_auc: 0.9894779962546816
averaged_pauc: 0.9454464813719693
ToyCar:
averaged_auc: 0.9566950093931226
averaged_pauc: 0.8961968600747151
ToyConveyor:
averaged_auc: 0.8526503235962499
averaged_pauc: 0.7259891865658302
valve:
averaged_auc: 0.9776656162464985
averaged_pauc: 0.9357400855078873
averaged_auc: 0.9858539915966387
averaged_pauc: 0.9664464469998526
%% Cell type:code id: tags:
``` python
# Create Submission 3 # median ensemble
```
%% Cell type:code id: tags:
``` python
for machine_type, idxes in enumerate(best_idxes):
for machine_id in EVALUATION_ID_MAP[machine_type]:
file_names = []
scores = []
for idx in idxes[:5]:
best_model_folder = run_ids[idx]
src_path = os.path.join('..', 'experiment_logs', best_model_folder)
src = os.path.join(src_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id}_mean.csv')
scores.append(pd.read_csv(src, names=['file_name', 'score'], index_col=False).to_numpy()[:, 1])
file_names.append(pd.read_csv(src, names=['file_name', 'score'], index_col=False).to_numpy()[:, 0])
scores = list(np.median(np.array(scores).T, axis=-1).reshape(-1))
dst_path = os.path.join('..', 'submission_package', 'task2', 'Primus_CP-JKU_task2_3')
dst = os.path.join(dst_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id:02d}.csv')
pd.DataFrame(list(zip(file_names[0], scores))).to_csv(dst, index=False, header=False)
```
%% Cell type:code id: tags:
``` python
for machine_type, idxes in enumerate(best_idxes):
auc = []
pauc = []
for machine_id in TRAINING_ID_MAP[machine_type]:
file_names = []
scores = []
for idx in idxes[:5]:
best_model_folder = run_ids[idx]
src_path = os.path.join('..', 'experiment_logs', best_model_folder)
src = os.path.join(src_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id}_mean.csv')
scores.append(pd.read_csv(src, names=['file_name', 'score'], index_col=False).to_numpy()[:, 1])
file_names.append(pd.read_csv(src, names=['file_name', 'score'], index_col=False).to_numpy()[:, 0])
scores = list(np.median(np.array(scores).T, axis=-1).reshape(-1))
file_names = np.array([1 if name.split('_')[0] == 'anomaly' else 0 for name in file_names[0]])
a, p = sklearn.metrics.roc_auc_score(file_names, scores), sklearn.metrics.roc_auc_score(file_names, scores, max_fpr=0.1)
auc.append(a)
pauc.append(p)
print(f'\t{INVERSE_CLASS_MAP[machine_type]}:\n\t\taveraged_auc: {np.mean(auc)}\n\t\taveraged_pauc: {np.mean(pauc)}')
```
%%%% Output: stream
fan:
averaged_auc: 0.9281888985937587
averaged_pauc: 0.8283523606556178
pump:
averaged_auc: 0.9209936334730452
averaged_pauc: 0.8705862272890137
slider:
averaged_auc: 0.9858871722846442
averaged_pauc: 0.9268061304947763
ToyCar:
averaged_auc: 0.9547106714040676
averaged_pauc: 0.8913650442572985
ToyConveyor:
averaged_auc: 0.8514805262648587
averaged_pauc: 0.7374989794507775
valve:
averaged_auc: 0.9696039915966386
averaged_pauc: 0.9118411838419578
%% Cell type:code id: tags:
``` python
# Create Submission 4 # mean ensemble
```
%% Cell type:code id: tags:
``` python
for machine_type, idxes in enumerate(best_idxes):
for machine_id in EVALUATION_ID_MAP[machine_type]:
file_names = []
scores = []
for idx in idxes[:13]:
best_model_folder = run_ids[idx]
src_path = os.path.join('..', 'experiment_logs', best_model_folder)
src = os.path.join(src_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id}_mean.csv')
scores.append(pd.read_csv(src, names=['file_name', 'score'], index_col=False).to_numpy()[:, 1])
file_names.append(pd.read_csv(src, names=['file_name', 'score'], index_col=False).to_numpy()[:, 0])
scores = list(np.median(np.array(scores).T, axis=-1).reshape(-1))
dst_path = os.path.join('..', 'submission_package', 'task2', 'Primus_CP-JKU_task2_4')
dst = os.path.join(dst_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id:02d}.csv')
pd.DataFrame(list(zip(file_names[0], scores))).to_csv(dst, index=False, header=False)
```
%% Cell type:code id: tags:
``` python
for machine_type, idxes in enumerate(best_idxes):
auc = []
pauc = []
for machine_id in TRAINING_ID_MAP[machine_type]:
file_names = []
scores = []
for idx in idxes[:13]:
best_model_folder = run_ids[idx]
src_path = os.path.join('..', 'experiment_logs', best_model_folder)
src = os.path.join(src_path, f'anomaly_score_{INVERSE_CLASS_MAP[machine_type]}_id_{machine_id}_mean.csv')
scores.append(pd.read_csv(src, names=['file_name', 'score'], index_col=False).to_numpy()[:, 1])
file_names.append(pd.read_csv(src, names=['file_name', 'score'], index_col=False).to_numpy()[:, 0])
scores = list(np.median(np.array(scores).T, axis=-1).reshape(-1))
file_names = np.array([1 if name.split('_')[0] == 'anomaly' else 0 for name in file_names[0]])
a, p = sklearn.metrics.roc_auc_score(file_names, scores), sklearn.metrics.roc_auc_score(file_names, scores, max_fpr=0.1)
auc.append(a)
pauc.append(p)
print(f'\t{INVERSE_CLASS_MAP[machine_type]}:\n\t\taveraged_auc: {np.mean(auc)}\n\t\taveraged_pauc: {np.mean(pauc)}')
```
%%%% Output: stream
fan:
averaged_auc: 0.9230309586203593
averaged_pauc: 0.8285210664235381
pump:
averaged_auc: 0.9146627457465693
averaged_pauc: 0.8678065821022478
slider:
averaged_auc: 0.9822893258426966
averaged_pauc: 0.9108762073723635
ToyCar:
averaged_auc: 0.9504648370497427
averaged_pauc: 0.8890240180210388
ToyConveyor:
averaged_auc: 0.8254031447576784
averaged_pauc: 0.7026976317730064
valve:
averaged_auc: 0.93825
averaged_pauc: 0.8792763157894736
%% Cell type:code id: tags:
``` python
```
......
id_01_00000000.wav,1.1210983991622925
id_01_00000001.wav,1.1466665267944336
id_01_00000002.wav,0.8824521899223328
id_01_00000003.wav,1.0552852153778076
id_01_00000004.wav,-1.183948278427124
id_01_00000005.wav,0.6197622418403625
id_01_00000006.wav,-1.167053461074829
id_01_00000007.wav,-1.1847805976867676
id_01_00000008.wav,0.9049229621887207
id_01_00000009.wav,-1.1753199100494385
id_01_00000010.wav,0.5300949811935425
id_01_00000011.wav,0.5063624978065491
id_01_00000012.wav,1.0821229219436646
id_01_00000013.wav,-1.1448915004730225
id_01_00000014.wav,-1.1791231632232666
id_01_00000015.wav,0.3743613362312317
id_01_00000016.wav,-1.100714087486267
id_01_00000017.wav,-1.1817433834075928
id_01_00000018.wav,0.6172735691070557
id_01_00000019.wav,1.1466976404190063
id_01_00000020.wav,-0.5077235698699951
id_01_00000021.wav,-1.1287075281143188
id_01_00000022.wav,0.5645463466644287
id_01_00000023.wav,1.1332900524139404
id_01_00000024.wav,-1.1781843900680542
id_01_00000025.wav,-1.1658868789672852
id_01_00000026.wav,0.8868559002876282
id_01_00000027.wav,-1.161880373954773
id_01_00000028.wav,-1.1402015686035156
id_01_00000029.wav,0.7164785861968994
id_01_00000030.wav,1.0656511783599854
id_01_00000031.wav,-1.1550978422164917
id_01_00000032.wav,-1.178971767425537
id_01_00000033.wav,-0.30746325850486755
id_01_00000034.wav,1.1352827548980713
id_01_00000035.wav,1.099847674369812
id_01_00000036.wav,-1.1172916889190674
id_01_00000037.wav,0.45853546261787415
id_01_00000038.wav,1.1095504760742188
id_01_00000039.wav,1.1442252397537231
id_01_00000040.wav,-1.1692746877670288
id_01_00000041.wav,1.1507630348205566
id_01_00000042.wav,-1.173393964767456
id_01_00000043.wav,1.123020887374878
id_01_00000044.wav,-1.1788341999053955
id_01_00000045.wav,0.13773688673973083
id_01_00000046.wav,0.15781493484973907
id_01_00000047.wav,0.34982699155807495
id_01_00000048.wav,0.9027346968650818
id_01_00000049.wav,-1.1752989292144775
id_01_00000050.wav,0.8768077492713928
id_01_00000051.wav,0.0644167959690094
id_01_00000052.wav,0.3088078200817108
id_01_00000053.wav,1.1015907526016235