{ "cells": [ { "cell_type": "code", "execution_count": 2, "outputs": [], "source": [ "from pymongo import MongoClient\n", "from matplotlib import pyplot as plt\n", "import numpy as np\n", "from dcase2020_task2.data_sets.mcm_dataset import INVERSE_CLASS_MAP, TRAINING_ID_MAP, CLASS_MAP\n", "from scipy.stats import rankdata\n", "\n", "baseline_auc = {\n", " 'name': 'baseline',\n", " 0: {0: 0.5441, 2: 0.7340, 4: 0.6161, 6: 0.7392},\n", " 1: {0: 0.6715, 2: 0.6153, 4: 0.8833, 6: 0.7455},\n", " 2: {0: 0.9619, 2: 0.7897, 4: 0.9430, 6: 0.6959},\n", " 3: {1: 0.8136, 2: 0.8597, 3: 0.6330, 4: 0.8445},\n", " 4: {1: 0.7807, 2: 0.6416, 3: 0.7535},\n", " 5: {0: 0.6876, 2: 0.6818, 4: 0.7430, 6: 0.5390}\n", " }\n", " \n", "baseline_pauc = {\n", " 'name': 'baseline',\n", " 0: {0: 0.4937, 2: 0.5481, 4: 0.5326, 6: 0.5235},\n", " 1: {0: 0.5674, 2: 0.5810, 4: 0.6710, 6: 0.5802},\n", " 2: {0: 0.8144, 2: 0.6368, 4: 0.7198, 6: 0.4902},\n", " 3: {1: 0.6840, 2: 0.7772, 3: 0.5521, 4: 0.6897},\n", " 4: {1: 0.6425, 2: 0.5601, 3: 0.6103},\n", " 5: {0: 0.5170, 2: 0.5183, 4: 0.5197, 6: 0.4843}\n", "}\n", "\n", "baseline_both = {}\n", "for t in baseline_auc:\n", " if t == 'name':\n", " baseline_both[t] = 'baseline'\n", " continue\n", " else:\n", " baseline_both[t] = {}\n", " for i in baseline_auc[t]:\n", " baseline_both[t][i] = np.array([baseline_auc[t][i], baseline_pauc[t][i]])\n", "\n", "\n", "def get_experiment(runs, name):\n", " experiment_dict = dict()\n", " for i in range(6):\n", " experiment_dict[i] = dict()\n", " \n", " experiment_dict['name'] = name\n", " \n", " for experiment in runs:\n", " if experiment['config'].get('id') == name:\n", " machine_dict = experiment_dict.get(experiment['config']['machine_type'])\n", " result = experiment.get('result')\n", " machine_type = INVERSE_CLASS_MAP[experiment['config']['machine_type']]\n", " machine_id = experiment['config']['machine_id']\n", " \n", " if result:\n", " machine_dict[experiment['config']['machine_id']] = result.get(\n", " machine_type, {}\n", " ).get(\n", " f'json://{machine_id}', -1\n", " ).get('py/tuple', [0, 0])[:2]\n", " else:\n", " machine_dict[experiment['config']['machine_id']] = np.array([0, 0])\n", " return experiment_dict\n", "\n", "\n", "def get_record(experiment):\n", " record = []\n", " for i in range(6):\n", " for j in TRAINING_ID_MAP[i]:\n", " v = experiment.get(i)\n", " if v:\n", " v = v.get(j, [0, 0])\n", " else:\n", " v = np.array([0, 0])\n", " record.append(np.array(v))\n", " assert len(record) == 23\n", " return experiment['name'], record" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n", "is_executing": false } } }, { "cell_type": "code", "execution_count": 3, "outputs": [ { "name": "stdout", "text": [ "Loaded 405 runs.\n" ], "output_type": "stream" } ], "source": [ "client = MongoClient('mongodb://student2.cp.jku.at:27017/')\n", "experiments = [r for r in client.resnet_gridsearch.runs.find({\"experiment.name\": \"dcase2020_task2_ClassificationExperiment\"})]\n", "print(f'Loaded {len(experiments)} runs.')" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n", "is_executing": false } } }, { "cell_type": "code", "execution_count": 4, "outputs": [ { "name": "stdout", "text": [ "Loaded 10 distinct experiments.\n" ], "output_type": "stream" } ], "source": [ "descriptors = set()\n", "for experiment in experiments:\n", " descriptors = descriptors.union(set([experiment['config']['id']]))\n", "descriptors = list(descriptors)\n", "print(f'Loaded {len(descriptors)} distinct experiments.')" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n", "is_executing": false } } }, { "cell_type": "code", "execution_count": 5, "outputs": [], "source": [ "# Extract Results\n", "# Concatenate Baseline Results\n", "n, m = get_record(baseline_both)\n", "names = [n]\n", "metrics = [np.array(m)]\n", "\n", "for descriptor in descriptors:\n", " n, m = get_record(\n", " get_experiment(\n", " experiments, \n", " descriptor\n", " )\n", " )\n", " names.append(n)\n", " metrics.append(np.array(m))" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n", "is_executing": false } } }, { "cell_type": "code", "execution_count": 33, "outputs": [ { "name": "stdout", "text": [ "Best Model for Machine Type 0: (9, 'resnet_gridsearch_normal_loose_1e-4_100_BCE')\n", "Best Model for Machine Type 1: (6, 'resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE')\n", "Best Model for Machine Type 2: (6, 'resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE')\n", "Best Model for Machine Type 3: (5, 'resnet_gridsearch_a_bit_smaller_loose_1e-4_100_BCE')\n", "Best Model for Machine Type 4: (8, 'resnet_gridsearch_normal_loose_1e-5_100_AUC')\n", "Best Model for Machine Type 5: (1, 'resnet_gridsearch_a_bit_smaller_loose_1e-4_100_AUC')\n", "0-3.75: ID-6 resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE\n", "1-4.416666666666667: ID-3 resnet_gridsearch_a_bit_larger_loose_1e-4_100_AUC\n", "2-4.833333333333333: ID-4 resnet_gridsearch_a_bit_larger_loose_1e-5_100_BCE\n", "3-5.083333333333333: ID-5 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_BCE\n", "4-5.166666666666667: ID-10 resnet_gridsearch_a_bit_larger_loose_1e-5_100_AUC\n", "5-5.583333333333333: ID-9 resnet_gridsearch_normal_loose_1e-4_100_BCE\n", "6-5.916666666666667: ID-2 resnet_gridsearch_normal_loose_1e-5_100_BCE\n", "7-6.083333333333333: ID-7 resnet_gridsearch_normal_loose_1e-4_100_AUC\n", "8-6.916666666666667: ID-1 resnet_gridsearch_a_bit_smaller_loose_1e-4_100_AUC\n", "9-7.416666666666667: ID-8 resnet_gridsearch_normal_loose_1e-5_100_AUC\n", "10-10.833333333333334: ID-0 baseline\n" ], "output_type": "stream" } ], "source": [ "data = np.array(metrics)\n", "auc_ranks = []\n", "pauc_ranks = []\n", "idxes = [0, 4, 8, 12, 16, 19, 23]\n", "best_idxes = []\n", "for type_, (i, j) in enumerate(zip(idxes[:-1], idxes[1:])):\n", " average_auc = data[:, i:j, 0].mean(axis=1)\n", " average_pauc = data[:, i:j, 1].mean(axis=1)\n", " best_idx = np.argmax(average_auc + average_pauc)\n", " best_idxes.append(\n", " (best_idx, names[best_idx])\n", " )\n", " print(f'Best Model for Machine Type {type_}: {best_idxes[-1]}')\n", " auc_ranks.append(rankdata(-average_auc))\n", " pauc_ranks.append(rankdata(-average_pauc))\n", "\n", "\n", "ranks = np.stack([np.array(list(zip(*auc_ranks))), np.array(list(zip(*pauc_ranks)))], axis=-1).mean(axis=-1).mean(axis=-1)\n", "\n", "sorted_model_indices = list(np.argsort(ranks))\n", "names = np.array(names)\n", "for i, (n, r, j) in enumerate(zip(names[sorted_model_indices], ranks[sorted_model_indices], sorted_model_indices)):\n", " print(f'{i}-{r}: ID-{j} {n}')\n", " " ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n", "is_executing": false } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "# Create Submission 1" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 34, "outputs": [], "source": [ "from dcase2020_task2.data_sets import INVERSE_CLASS_MAP, EVALUATION_ID_MAP\n", "import os\n", "from shutil import copyfile\n", "\n", "best_model_folder = names[sorted_model_indices[0]]\n", "\n", "for machine_type in range(6):\n", " for model_id in EVALUATION_ID_MAP[machine_type]:\n", " machine_type_str = INVERSE_CLASS_MAP[machine_type]\n", " \n", " src_path = os.path.join('..', 'experiment_logs', best_model_folder)\n", " src = os.path.join(src_path, f'anomaly_score_{machine_type_str}_id_{model_id}_mean.csv')\n", " \n", " dst_path = os.path.join('..', 'submission_package', 'task2', 'Primus_CP-JKU_task2_1')\n", " dst = os.path.join(dst_path, f'anomaly_score_{machine_type_str}_id_{model_id}.csv')\n", "\n", " copyfile(src, dst)\n" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n", "is_executing": false } } }, { "cell_type": "code", "execution_count": 13, "outputs": [], "source": [ "# Create Submission 2" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n", "is_executing": false } } }, { "cell_type": "code", "execution_count": 29, "outputs": [], "source": [ "from dcase2020_task2.data_sets import INVERSE_CLASS_MAP, EVALUATION_ID_MAP\n", "import os\n", "from shutil import copyfile\n", "\n", "for machine_type, (idx, folder_name) in enumerate(best_idxes):\n", " \n", " for model_id in EVALUATION_ID_MAP[machine_type]:\n", " machine_type_str = INVERSE_CLASS_MAP[machine_type]\n", " \n", " src_path = os.path.join('..', 'experiment_logs', folder_name)\n", " src = os.path.join(src_path, f'anomaly_score_{machine_type_str}_id_{model_id}_mean.csv')\n", " \n", " dst_path = os.path.join('..', 'submission_package', 'task2', 'Primus_CP-JKU_task2_2')\n", " dst = os.path.join(dst_path, f'anomaly_score_{machine_type_str}_id_{model_id}.csv')\n", "\n", " copyfile(src, dst)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n", "is_executing": false } } }, { "cell_type": "code", "execution_count": 30, "outputs": [ { "data": { "text/plain": "[(9, 'resnet_gridsearch_normal_loose_1e-4_100_BCE'),\n (6, 'resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE'),\n (6, 'resnet_gridsearch_a_bit_larger_loose_1e-4_100_BCE'),\n (5, 'resnet_gridsearch_a_bit_smaller_loose_1e-4_100_BCE'),\n (8, 'resnet_gridsearch_normal_loose_1e-5_100_AUC'),\n (1, 'resnet_gridsearch_a_bit_smaller_loose_1e-4_100_AUC')]" }, "metadata": {}, "output_type": "execute_result", "execution_count": 30 } ], "source": [ "\n", "\n", "best_idxes" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n", "is_executing": false } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "\n" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } } ], "metadata": { "kernelspec": { "name": "python3", "language": "python", "display_name": "Python 3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" }, "pycharm": { "stem_cell": { "cell_type": "raw", "source": [], "metadata": { "collapsed": false } } } }, "nbformat": 4, "nbformat_minor": 0 }