GemsLab
diff --git a/‎Example.ipynb‎
Lines changed: 312 additions & 0 deletions b/‎Example.ipynb‎
Lines changed: 312 additions & 0 deletions
@@ -0,0 +1,312 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load Embedding Methods and Datasets "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "graphwave\n",
+      "degree2\n",
+      "drne\n",
+      "node2vec\n",
+      "degree\n",
+      "role2vec\n",
+      "line\n",
+      "degree1\n",
+      "struc2vec\n",
+      "xnetmf\n",
+      "multilens\n",
+      "segk\n",
+      "riwalk\n"
+     ]
+    }
+   ],
+   "source": [
+    "from semb.methods import load as load_method\n",
+    "from semb.methods import get_method_ids\n",
+    "for mid in get_method_ids():\n",
+    "    print(mid)\n",
+    "    load_method(mid)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "airports\n"
+     ]
+    }
+   ],
+   "source": [
+    "from semb.datasets import load as load_dataset\n",
+    "from semb.datasets import get_dataset_ids\n",
+    "for did in get_dataset_ids():\n",
+    "    print(did)\n",
+    "    load_dataset(did)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Get Embedding Result Using struc2vec"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "rm /Users/mark/GoogleDrive/UM/S4/GEMS/Git/StrucEmbeddingLibrary/semb/methods/struc2vec/pickles/weights_distances-layer-*.pickle\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Define a hyper-class to load the embedding method\n",
+    "EmbMethodClass = load_method(\"struc2vec\")\n",
+    "\n",
+    "# Get airports datasets\n",
+    "AirportDataProvider = load_dataset(\"airports\")\n",
+    "airport_datasets = AirportDataProvider().get_datasets()\n",
+    "brazil_airport_graph = AirportDataProvider().load_dataset(airport_datasets[0])\n",
+    "\n",
+    "# Call the embedding method with the graph for initialization\n",
+    "struc2vec = EmbMethodClass(brazil_airport_graph, \n",
+    "                           num_walks=10, \n",
+    "                           walk_length=80, \n",
+    "                           window_size=10, \n",
+    "                           dim=128, \n",
+    "                           opt1=True, opt2=True, opt3=True, until_layer=2)\n",
+    "struc2vec.train()\n",
+    "\n",
+    "# Get the embedding result with the get_embeddings() method,\n",
+    "# The return type is a dictionary with key as node_id and value as the embedding\n",
+    "dict_struc2vec_emb = struc2vec.get_embeddings()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'dim': 128,\n",
+       " 'walk_length': 80,\n",
+       " 'num_walks': 10,\n",
+       " 'window_size': 10,\n",
+       " 'until_layer': None,\n",
+       " 'iter': 5,\n",
+       " 'workers': 1,\n",
+       " 'weighted': False,\n",
+       " 'directed': False,\n",
+       " 'opt1': False,\n",
+       " 'opt2': False,\n",
+       " 'opt3': False}"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# This shows the tunable parameters for the certain embedding method\n",
+    "EmbMethodClass.__PARAMS__"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load Evaluation Library and Perform Evaluation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from semb.evaluations.classification import *\n",
+    "from semb.evaluations.clustering import *\n",
+    "from semb.evaluations.utils import *"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Perform Classification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Read in 131 node labels.\n",
+      ">>> Label 0 appears 32 times\n",
+      ">>> Label 1 appears 32 times\n",
+      ">>> Label 3 appears 35 times\n",
+      ">>> Label 2 appears 32 times\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Read the label file with the get_label(fn) function\n",
+    "dict_labels = get_label(\"./sample-data/labels/airport_Brazil_label.txt\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'overall': {'accuracy': {'mean': 0.7633, 'std': 0.0787},\n",
+       "  'f1_macro': {'mean': 0.7548, 'std': 0.0765},\n",
+       "  'f1_micro': {'mean': 0.7633, 'std': 0.0787},\n",
+       "  'auc_micro': {'mean': 0.9182, 'std': 0.0327},\n",
+       "  'auc_macro': {'mean': 0.9224, 'std': 0.0301}},\n",
+       " 'detailed': {0: {'accuracy': 0.7778,\n",
+       "   'f1_macro': 0.7515,\n",
+       "   'f1_micro': 0.7778,\n",
+       "   'auc_micro': 0.9204,\n",
+       "   'auc_macro': 0.9298},\n",
+       "  1: {'accuracy': 0.6154,\n",
+       "   'f1_macro': 0.6209,\n",
+       "   'f1_micro': 0.6154,\n",
+       "   'auc_micro': 0.858,\n",
+       "   'auc_macro': 0.866},\n",
+       "  2: {'accuracy': 0.7692,\n",
+       "   'f1_macro': 0.7448,\n",
+       "   'f1_micro': 0.7692,\n",
+       "   'auc_micro': 0.9413,\n",
+       "   'auc_macro': 0.926},\n",
+       "  3: {'accuracy': 0.8462,\n",
+       "   'f1_macro': 0.8421,\n",
+       "   'f1_micro': 0.8462,\n",
+       "   'auc_micro': 0.9527,\n",
+       "   'auc_macro': 0.9561},\n",
+       "  4: {'accuracy': 0.8077,\n",
+       "   'f1_macro': 0.8148,\n",
+       "   'f1_micro': 0.8077,\n",
+       "   'auc_micro': 0.9186,\n",
+       "   'auc_macro': 0.9339}}}"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "perform_classification(dict_struc2vec_emb, dict_labels)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Perform Clustering"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/sklearn/metrics/cluster/supervised.py:859: FutureWarning: The behavior of NMI will change in version 0.22. To match the behavior of 'v_measure_score', NMI will use average_method='arithmetic' by default.\n",
+      "  FutureWarning)\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'overall': {'purity': [0.6412213740458015], 'nmi': [0.4771373196787525]}}"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "perform_clustering(dict_struc2vec_emb, dict_labels)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Perform Centrality Correlation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from semb.evaluations.centrality_correlation import *\n",
+    "centrality_correlation(brazil_airport_graph, \n",
+    "                       dict_struc2vec_emb, \n",
+    "                       centrality='clustering_coeff', \n",
+    "                       similarity='euclidean')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "SEMB",
+   "language": "python",
+   "name": "semb"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}