Best-SVM.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import seaborn as sn\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.decomposition import PCA\n",
    "from sklearn.manifold import TSNE\n",
    "from mpl_toolkits.mplot3d import Axes3D\n",
    "import seaborn as sns\n",
    "from sklearn.neural_network import MLPClassifier\n",
    "from sklearn.model_selection import train_test_split\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>samplecodenumber</th>\n",
       "      <th>clumpthickness</th>\n",
       "      <th>cellsize</th>\n",
       "      <th>cellshape</th>\n",
       "      <th>marginaladhesion</th>\n",
       "      <th>singleepithelialcellsize</th>\n",
       "      <th>barenuclei</th>\n",
       "      <th>blandchromatin</th>\n",
       "      <th>normalnucleoli</th>\n",
       "      <th>mitoses</th>\n",
       "      <th>class</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1000025</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>1002945</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>7</td>\n",
       "      <td>10</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>1015425</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>1016277</td>\n",
       "      <td>6</td>\n",
       "      <td>8</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>1017023</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   samplecodenumber  clumpthickness  cellsize  cellshape  marginaladhesion  \\\n",
       "0           1000025               5         1          1                 1   \n",
       "1           1002945               5         4          4                 5   \n",
       "2           1015425               3         1          1                 1   \n",
       "3           1016277               6         8          8                 1   \n",
       "4           1017023               4         1          1                 3   \n",
       "\n",
       "   singleepithelialcellsize barenuclei  blandchromatin  normalnucleoli  \\\n",
       "0                         2          1               3               1   \n",
       "1                         7         10               3               2   \n",
       "2                         2          2               3               1   \n",
       "3                         3          4               3               7   \n",
       "4                         2          1               3               1   \n",
       "\n",
       "   mitoses  class  \n",
       "0        1      2  \n",
       "1        1      2  \n",
       "2        1      2  \n",
       "3        1      2  \n",
       "4        1      2  "
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "##Standaridzed preprocessign with rest of group\n",
    "\n",
    "df=pd.read_csv('breast-cancer-wisconsin.csv')\n",
    "features = ['samplecodenumber', 'clumpthickness', 'cellsize', 'cellshape', 'marginaladhesion','singleepithelialcellsize','barenuclei','blandchromatin','normalnucleoli','mitoses']\n",
    "\n",
    "#impute missing values (all of which are in barenuclei) with mean of barenuclei\n",
    "df=df.replace('?',3.54465593)\n",
    "\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[-0.11623873  0.20693572 -0.69999505 ... -0.17966213 -0.61182504\n",
      "  -0.34391178]\n",
      " [-0.1115035   0.20693572  0.28384518 ... -0.17966213 -0.28411186\n",
      "  -0.34391178]\n",
      " [-0.09126525 -0.50386559 -0.69999505 ... -0.17966213 -0.61182504\n",
      "  -0.34391178]\n",
      " ...\n",
      " [-0.29657482  0.20693572  2.25152563 ...  1.87236122  2.33759359\n",
      "   0.23956962]\n",
      " [-0.28254589 -0.14846494  1.59563215 ...  2.69317056  1.02674087\n",
      "  -0.34391178]\n",
      " [-0.28254589 -0.14846494  1.59563215 ...  2.69317056  0.37131451\n",
      "  -0.34391178]]\n"
     ]
    }
   ],
   "source": [
    "# Separating out the features\n",
    "\n",
    "x = df.loc[:, features].values# Separating out the target\n",
    "y = df.loc[:,['class']].values# Standardizing the features\n",
    "x = StandardScaler().fit_transform(x)\n",
    "print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\sruth\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
      "  y = column_or_1d(y, warn=True)\n",
      "C:\\Users\\sruth\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
      "  \"avoid this warning.\", FutureWarning)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
       "    decision_function_shape='ovr', degree=3, gamma='auto_deprecated',\n",
       "    kernel='rbf', max_iter=-1, probability=False, random_state=None,\n",
       "    shrinking=True, tol=0.001, verbose=False)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#RBF Kernel had best result\n",
    "from sklearn.svm import SVC\n",
    "svc_model=SVC()\n",
    "svc_model.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_predict = svc_model.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Classification Report\n",
      "              precision    recall  f1-score   support\n",
      "\n",
      "           2       0.99      0.99      0.99       137\n",
      "           4       0.97      0.97      0.97        73\n",
      "\n",
      "    accuracy                           0.98       210\n",
      "   macro avg       0.98      0.98      0.98       210\n",
      "weighted avg       0.98      0.98      0.98       210\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from sklearn.metrics import classification_report, confusion_matrix\n",
    "print(\"Classification Report\")\n",
    "print(classification_report(y_test, y_predict))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "            0         1         2         3         4         5         6  \\\n",
      "0    0.236782  0.206936 -0.699995 -0.743299  1.119088 -0.097628 -0.706991   \n",
      "1    0.272864 -1.214667 -0.699995 -0.743299 -0.633247 -0.549561 -0.706991   \n",
      "2   -1.165331 -0.148465 -0.699995 -0.743299 -0.282780 -0.549561 -0.706991   \n",
      "3   -0.387798 -0.148465 -0.372048 -0.069850  0.768621 -0.097628  1.237845   \n",
      "4    0.176966 -0.503866 -0.699995 -0.743299 -0.633247 -0.097628 -0.706991   \n",
      "..        ...       ...       ...       ...       ...       ...       ...   \n",
      "205 -0.751713  1.983939  0.283845 -0.069850  2.520955  0.354305  1.793513   \n",
      "206 -1.635161  1.628538 -0.699995 -0.406574  1.119088  0.354305  1.793513   \n",
      "207 -0.610736 -1.214667 -0.699995 -0.743299 -0.633247 -0.549561 -0.706991   \n",
      "208  0.156741  1.273138 -0.372048 -0.069850 -0.633247  1.258172 -0.151324   \n",
      "209  0.426435 -1.214667 -0.699995 -0.743299 -0.633247 -0.549561 -0.706991   \n",
      "\n",
      "            7         8         9  \n",
      "0   -1.000471 -0.611825 -0.343912  \n",
      "1   -0.590067 -0.611825 -0.343912  \n",
      "2   -1.000471 -0.611825 -0.343912  \n",
      "3    1.461957  1.026741 -0.343912  \n",
      "4   -0.590067 -0.611825 -0.343912  \n",
      "..        ...       ...       ...  \n",
      "205  2.693171 -0.611825 -0.343912  \n",
      "206  1.461957  1.354454  0.239570  \n",
      "207 -0.590067 -0.611825 -0.343912  \n",
      "208  1.461957 -0.611825 -0.343912  \n",
      "209 -1.000471 -0.611825 -0.343912  \n",
      "\n",
      "[210 rows x 10 columns]\n"
     ]
    }
   ],
   "source": [
    "datafeatures = np.array(X_test)\n",
    "fin_results = pd.DataFrame(data = datafeatures)\n",
    "print(fin_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "            0\n",
      "0    0.236782\n",
      "1    0.272864\n",
      "2   -1.165331\n",
      "3   -0.387798\n",
      "4    0.176966\n",
      "..        ...\n",
      "205 -0.751713\n",
      "206 -1.635161\n",
      "207 -0.610736\n",
      "208  0.156741\n",
      "209  0.426435\n",
      "\n",
      "[210 rows x 1 columns]\n"
     ]
    }
   ],
   "source": [
    "fin_results.drop(fin_results.columns[1:], axis=1, inplace=True)\n",
    "print(fin_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2 2 2 4 2 2 4 2 2 2 4 4 2 4 4 4 2 2 2 2 2 4 4 2 4 2 4 2 2 2 2 4 4 4 4 4 2\n",
      " 4 2 2 2 2 2 2 2 4 2 2 2 2 2 2 4 2 4 4 4 2 2 4 2 2 2 2 2 2 2 4 2 2 2 2 2 2\n",
      " 4 4 2 2 2 2 4 4 2 4 2 2 2 4 4 2 4 2 2 2 2 4 2 2 2 2 2 2 4 2 2 4 4 2 4 2 4\n",
      " 2 2 2 2 2 2 2 2 2 2 4 4 4 2 4 2 4 2 2 2 4 4 2 4 2 2 2 4 4 2 4 2 2 2 4 2 4\n",
      " 2 2 2 2 2 4 2 2 2 2 2 2 2 4 2 2 4 4 2 2 2 2 2 4 4 2 2 4 4 4 4 4 4 2 2 2 2\n",
      " 4 2 2 2 2 2 2 2 4 2 4 4 4 4 2 2 2 2 2 4 4 4 2 4 2]\n"
     ]
    }
   ],
   "source": [
    "y_test = np.array(y_test)\n",
    "y_test = y_test.ravel()\n",
    "np.array(y_test)\n",
    "print(y_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "            0  y_test\n",
      "0    0.236782       2\n",
      "1    0.272864       2\n",
      "2   -1.165331       2\n",
      "3   -0.387798       4\n",
      "4    0.176966       2\n",
      "..        ...     ...\n",
      "205 -0.751713       4\n",
      "206 -1.635161       4\n",
      "207 -0.610736       2\n",
      "208  0.156741       4\n",
      "209  0.426435       2\n",
      "\n",
      "[210 rows x 2 columns]\n"
     ]
    }
   ],
   "source": [
    "fin_results.insert(1, 'y_test', y_test, allow_duplicates = True) \n",
    "print(fin_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2 2 2 4 2 2 4 2 2 2 4 4 2 4 4 4 2 2 2 2 2 4 4 2 4 2 4 4 2 2 2 4 4 4 4 4 2\n",
      " 4 2 2 2 2 2 2 2 4 2 2 2 2 2 2 4 2 4 2 4 2 2 4 2 2 2 2 2 2 2 4 2 2 2 2 2 2\n",
      " 2 4 2 2 2 2 4 4 2 4 2 2 2 4 4 2 4 2 2 2 2 4 2 2 2 2 2 2 4 2 2 4 4 2 4 2 4\n",
      " 2 2 2 2 2 2 2 2 2 2 4 4 4 2 4 2 4 2 2 2 4 4 2 4 2 2 2 4 4 2 4 2 2 2 4 2 4\n",
      " 2 2 2 2 2 4 2 2 2 2 2 2 2 4 2 2 4 4 2 2 2 2 2 4 4 2 2 4 4 4 4 4 4 2 2 2 2\n",
      " 4 2 2 4 2 2 2 2 4 2 4 4 4 4 2 2 2 2 2 4 4 4 2 4 2]\n"
     ]
    }
   ],
   "source": [
    "y_predict = np.array(y_predict)\n",
    "y_predict = y_predict.ravel()\n",
    "np.array(y_predict)\n",
    "print(y_predict)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "            0  y_predict  y_test\n",
      "0    0.236782          2       2\n",
      "1    0.272864          2       2\n",
      "2   -1.165331          2       2\n",
      "3   -0.387798          4       4\n",
      "4    0.176966          2       2\n",
      "..        ...        ...     ...\n",
      "205 -0.751713          4       4\n",
      "206 -1.635161          4       4\n",
      "207 -0.610736          2       2\n",
      "208  0.156741          4       4\n",
      "209  0.426435          2       2\n",
      "\n",
      "[210 rows x 3 columns]\n"
     ]
    }
   ],
   "source": [
    "fin_results.insert(1, 'y_predict', y_predict, allow_duplicates = True) \n",
    "print(fin_results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "fin_results.to_csv(\"svmp_rediction\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import seaborn as sn\n",
	"import numpy as np\n",
	"import matplotlib.pyplot as plt\n",
	"from sklearn.preprocessing import StandardScaler\n",
	"from sklearn.decomposition import PCA\n",
	"from sklearn.manifold import TSNE\n",
	"from mpl_toolkits.mplot3d import Axes3D\n",
	"import seaborn as sns\n",
	"from sklearn.neural_network import MLPClassifier\n",
	"from sklearn.model_selection import train_test_split\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 19,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style scoped>\n",
	" .dataframe tbody tr th:only-of-type {\n",
	" vertical-align: middle;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: right;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>samplecodenumber</th>\n",
	" <th>clumpthickness</th>\n",
	" <th>cellsize</th>\n",
	" <th>cellshape</th>\n",
	" <th>marginaladhesion</th>\n",
	" <th>singleepithelialcellsize</th>\n",
	" <th>barenuclei</th>\n",
	" <th>blandchromatin</th>\n",
	" <th>normalnucleoli</th>\n",
	" <th>mitoses</th>\n",
	" <th>class</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <td>0</td>\n",
	" <td>1000025</td>\n",
	" <td>5</td>\n",
	" <td>1</td>\n",
	" <td>1</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" <td>1</td>\n",
	" <td>3</td>\n",
	" <td>1</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>1</td>\n",
	" <td>1002945</td>\n",
	" <td>5</td>\n",
	" <td>4</td>\n",
	" <td>4</td>\n",
	" <td>5</td>\n",
	" <td>7</td>\n",
	" <td>10</td>\n",
	" <td>3</td>\n",
	" <td>2</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>2</td>\n",
	" <td>1015425</td>\n",
	" <td>3</td>\n",
	" <td>1</td>\n",
	" <td>1</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" <td>2</td>\n",
	" <td>3</td>\n",
	" <td>1</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>3</td>\n",
	" <td>1016277</td>\n",
	" <td>6</td>\n",
	" <td>8</td>\n",
	" <td>8</td>\n",
	" <td>1</td>\n",
	" <td>3</td>\n",
	" <td>4</td>\n",
	" <td>3</td>\n",
	" <td>7</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <td>4</td>\n",
	" <td>1017023</td>\n",
	" <td>4</td>\n",
	" <td>1</td>\n",
	" <td>1</td>\n",
	" <td>3</td>\n",
	" <td>2</td>\n",
	" <td>1</td>\n",
	" <td>3</td>\n",
	" <td>1</td>\n",
	" <td>1</td>\n",
	" <td>2</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"</div>"
	],
	"text/plain": [
	" samplecodenumber clumpthickness cellsize cellshape marginaladhesion \\\n",
	"0 1000025 5 1 1 1 \n",
	"1 1002945 5 4 4 5 \n",
	"2 1015425 3 1 1 1 \n",
	"3 1016277 6 8 8 1 \n",
	"4 1017023 4 1 1 3 \n",
	"\n",
	" singleepithelialcellsize barenuclei blandchromatin normalnucleoli \\\n",
	"0 2 1 3 1 \n",
	"1 7 10 3 2 \n",
	"2 2 2 3 1 \n",
	"3 3 4 3 7 \n",
	"4 2 1 3 1 \n",
	"\n",
	" mitoses class \n",
	"0 1 2 \n",
	"1 1 2 \n",
	"2 1 2 \n",
	"3 1 2 \n",
	"4 1 2 "
	]
	},
	"execution_count": 19,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"##Standaridzed preprocessign with rest of group\n",
	"\n",
	"df=pd.read_csv('breast-cancer-wisconsin.csv')\n",
	"features = ['samplecodenumber', 'clumpthickness', 'cellsize', 'cellshape', 'marginaladhesion','singleepithelialcellsize','barenuclei','blandchromatin','normalnucleoli','mitoses']\n",
	"\n",
	"#impute missing values (all of which are in barenuclei) with mean of barenuclei\n",
	"df=df.replace('?',3.54465593)\n",
	"\n",
	"df.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 20,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[[-0.11623873 0.20693572 -0.69999505 ... -0.17966213 -0.61182504\n",
	" -0.34391178]\n",
	" [-0.1115035 0.20693572 0.28384518 ... -0.17966213 -0.28411186\n",
	" -0.34391178]\n",
	" [-0.09126525 -0.50386559 -0.69999505 ... -0.17966213 -0.61182504\n",
	" -0.34391178]\n",
	" ...\n",
	" [-0.29657482 0.20693572 2.25152563 ... 1.87236122 2.33759359\n",
	" 0.23956962]\n",
	" [-0.28254589 -0.14846494 1.59563215 ... 2.69317056 1.02674087\n",
	" -0.34391178]\n",
	" [-0.28254589 -0.14846494 1.59563215 ... 2.69317056 0.37131451\n",
	" -0.34391178]]\n"
	]
	}
	],
	"source": [
	"# Separating out the features\n",
	"\n",
	"x = df.loc[:, features].values# Separating out the target\n",
	"y = df.loc[:,['class']].values# Standardizing the features\n",
	"x = StandardScaler().fit_transform(x)\n",
	"print(x)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {},
	"outputs": [],
	"source": [
	"from sklearn.model_selection import train_test_split\n",
	"X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [
	{
	"name": "stderr",
	"output_type": "stream",
	"text": [
	"C:\\Users\\sruth\\Anaconda3\\lib\\site-packages\\sklearn\\utils\\validation.py:724: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n",
	" y = column_or_1d(y, warn=True)\n",
	"C:\\Users\\sruth\\Anaconda3\\lib\\site-packages\\sklearn\\svm\\base.py:193: FutureWarning: The default value of gamma will change from 'auto' to 'scale' in version 0.22 to account better for unscaled features. Set gamma explicitly to 'auto' or 'scale' to avoid this warning.\n",
	" \"avoid this warning.\", FutureWarning)\n"
	]
	},
	{
	"data": {
	"text/plain": [
	"SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,\n",
	" decision_function_shape='ovr', degree=3, gamma='auto_deprecated',\n",
	" kernel='rbf', max_iter=-1, probability=False, random_state=None,\n",
	" shrinking=True, tol=0.001, verbose=False)"
	]
	},
	"execution_count": 22,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"#RBF Kernel had best result\n",
	"from sklearn.svm import SVC\n",
	"svc_model=SVC()\n",
	"svc_model.fit(X_train, y_train)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 23,
	"metadata": {},
	"outputs": [],
	"source": [
	"y_predict = svc_model.predict(X_test)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 24,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Classification Report\n",
	" precision recall f1-score support\n",
	"\n",
	" 2 0.99 0.99 0.99 137\n",
	" 4 0.97 0.97 0.97 73\n",
	"\n",
	" accuracy 0.98 210\n",
	" macro avg 0.98 0.98 0.98 210\n",
	"weighted avg 0.98 0.98 0.98 210\n",
	"\n"
	]
	}
	],
	"source": [
	"from sklearn.metrics import classification_report, confusion_matrix\n",
	"print(\"Classification Report\")\n",
	"print(classification_report(y_test, y_predict))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 25,
	"metadata": {
	"scrolled": false
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" 0 1 2 3 4 5 6 \\\n",
	"0 0.236782 0.206936 -0.699995 -0.743299 1.119088 -0.097628 -0.706991 \n",
	"1 0.272864 -1.214667 -0.699995 -0.743299 -0.633247 -0.549561 -0.706991 \n",
	"2 -1.165331 -0.148465 -0.699995 -0.743299 -0.282780 -0.549561 -0.706991 \n",
	"3 -0.387798 -0.148465 -0.372048 -0.069850 0.768621 -0.097628 1.237845 \n",
	"4 0.176966 -0.503866 -0.699995 -0.743299 -0.633247 -0.097628 -0.706991 \n",
	".. ... ... ... ... ... ... ... \n",
	"205 -0.751713 1.983939 0.283845 -0.069850 2.520955 0.354305 1.793513 \n",
	"206 -1.635161 1.628538 -0.699995 -0.406574 1.119088 0.354305 1.793513 \n",
	"207 -0.610736 -1.214667 -0.699995 -0.743299 -0.633247 -0.549561 -0.706991 \n",
	"208 0.156741 1.273138 -0.372048 -0.069850 -0.633247 1.258172 -0.151324 \n",
	"209 0.426435 -1.214667 -0.699995 -0.743299 -0.633247 -0.549561 -0.706991 \n",
	"\n",
	" 7 8 9 \n",
	"0 -1.000471 -0.611825 -0.343912 \n",
	"1 -0.590067 -0.611825 -0.343912 \n",
	"2 -1.000471 -0.611825 -0.343912 \n",
	"3 1.461957 1.026741 -0.343912 \n",
	"4 -0.590067 -0.611825 -0.343912 \n",
	".. ... ... ... \n",
	"205 2.693171 -0.611825 -0.343912 \n",
	"206 1.461957 1.354454 0.239570 \n",
	"207 -0.590067 -0.611825 -0.343912 \n",
	"208 1.461957 -0.611825 -0.343912 \n",
	"209 -1.000471 -0.611825 -0.343912 \n",
	"\n",
	"[210 rows x 10 columns]\n"
	]
	}
	],
	"source": [
	"datafeatures = np.array(X_test)\n",
	"fin_results = pd.DataFrame(data = datafeatures)\n",
	"print(fin_results)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 26,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" 0\n",
	"0 0.236782\n",
	"1 0.272864\n",
	"2 -1.165331\n",
	"3 -0.387798\n",
	"4 0.176966\n",
	".. ...\n",
	"205 -0.751713\n",
	"206 -1.635161\n",
	"207 -0.610736\n",
	"208 0.156741\n",
	"209 0.426435\n",
	"\n",
	"[210 rows x 1 columns]\n"
	]
	}
	],
	"source": [
	"fin_results.drop(fin_results.columns[1:], axis=1, inplace=True)\n",
	"print(fin_results)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 27,
	"metadata": {
	"scrolled": true
	},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[2 2 2 4 2 2 4 2 2 2 4 4 2 4 4 4 2 2 2 2 2 4 4 2 4 2 4 2 2 2 2 4 4 4 4 4 2\n",
	" 4 2 2 2 2 2 2 2 4 2 2 2 2 2 2 4 2 4 4 4 2 2 4 2 2 2 2 2 2 2 4 2 2 2 2 2 2\n",
	" 4 4 2 2 2 2 4 4 2 4 2 2 2 4 4 2 4 2 2 2 2 4 2 2 2 2 2 2 4 2 2 4 4 2 4 2 4\n",
	" 2 2 2 2 2 2 2 2 2 2 4 4 4 2 4 2 4 2 2 2 4 4 2 4 2 2 2 4 4 2 4 2 2 2 4 2 4\n",
	" 2 2 2 2 2 4 2 2 2 2 2 2 2 4 2 2 4 4 2 2 2 2 2 4 4 2 2 4 4 4 4 4 4 2 2 2 2\n",
	" 4 2 2 2 2 2 2 2 4 2 4 4 4 4 2 2 2 2 2 4 4 4 2 4 2]\n"
	]
	}
	],
	"source": [
	"y_test = np.array(y_test)\n",
	"y_test = y_test.ravel()\n",
	"np.array(y_test)\n",
	"print(y_test)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 28,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" 0 y_test\n",
	"0 0.236782 2\n",
	"1 0.272864 2\n",
	"2 -1.165331 2\n",
	"3 -0.387798 4\n",
	"4 0.176966 2\n",
	".. ... ...\n",
	"205 -0.751713 4\n",
	"206 -1.635161 4\n",
	"207 -0.610736 2\n",
	"208 0.156741 4\n",
	"209 0.426435 2\n",
	"\n",
	"[210 rows x 2 columns]\n"
	]
	}
	],
	"source": [
	"fin_results.insert(1, 'y_test', y_test, allow_duplicates = True) \n",
	"print(fin_results)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 29,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"[2 2 2 4 2 2 4 2 2 2 4 4 2 4 4 4 2 2 2 2 2 4 4 2 4 2 4 4 2 2 2 4 4 4 4 4 2\n",
	" 4 2 2 2 2 2 2 2 4 2 2 2 2 2 2 4 2 4 2 4 2 2 4 2 2 2 2 2 2 2 4 2 2 2 2 2 2\n",
	" 2 4 2 2 2 2 4 4 2 4 2 2 2 4 4 2 4 2 2 2 2 4 2 2 2 2 2 2 4 2 2 4 4 2 4 2 4\n",
	" 2 2 2 2 2 2 2 2 2 2 4 4 4 2 4 2 4 2 2 2 4 4 2 4 2 2 2 4 4 2 4 2 2 2 4 2 4\n",
	" 2 2 2 2 2 4 2 2 2 2 2 2 2 4 2 2 4 4 2 2 2 2 2 4 4 2 2 4 4 4 4 4 4 2 2 2 2\n",
	" 4 2 2 4 2 2 2 2 4 2 4 4 4 4 2 2 2 2 2 4 4 4 2 4 2]\n"
	]
	}
	],
	"source": [
	"y_predict = np.array(y_predict)\n",
	"y_predict = y_predict.ravel()\n",
	"np.array(y_predict)\n",
	"print(y_predict)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 30,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	" 0 y_predict y_test\n",
	"0 0.236782 2 2\n",
	"1 0.272864 2 2\n",
	"2 -1.165331 2 2\n",
	"3 -0.387798 4 4\n",
	"4 0.176966 2 2\n",
	".. ... ... ...\n",
	"205 -0.751713 4 4\n",
	"206 -1.635161 4 4\n",
	"207 -0.610736 2 2\n",
	"208 0.156741 4 4\n",
	"209 0.426435 2 2\n",
	"\n",
	"[210 rows x 3 columns]\n"
	]
	}
	],
	"source": [
	"fin_results.insert(1, 'y_predict', y_predict, allow_duplicates = True) \n",
	"print(fin_results)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 31,
	"metadata": {},
	"outputs": [],
	"source": [
	"fin_results.to_csv(\"svmp_rediction\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.4"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}