From b41d511d0e8d9c5d07f038ee902df2c6a07265d4 Mon Sep 17 00:00:00 2001 From: vladislavovich-d <48733961+vladislavovich-d@users.noreply.github.com> Date: Mon, 13 May 2019 12:18:37 +0300 Subject: [PATCH] Add files via upload --- Face_Recognition/Python_FaceRec.ipynb | 757 ++++++++++++++++++++++++++ 1 file changed, 757 insertions(+) create mode 100644 Face_Recognition/Python_FaceRec.ipynb diff --git a/Face_Recognition/Python_FaceRec.ipynb b/Face_Recognition/Python_FaceRec.ipynb new file mode 100644 index 0000000..562f77e --- /dev/null +++ b/Face_Recognition/Python_FaceRec.ipynb @@ -0,0 +1,757 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 205, + "metadata": {}, + "outputs": [], + "source": [ + "import glob,os\n", + "\n", + "import numpy as np\n", + "\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from PIL import Image " + ] + }, + { + "cell_type": "code", + "execution_count": 239, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Input your start number of photo: 1\n", + "Number of images: 400\n", + "Size of picture:(112, 92)\n", + "Sample: 0\n", + "Number of photo: 1\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "def preprocesing (path):\n", + " #counting the number of pictures\n", + " path = \"faces/s*/*.pgm\"\n", + " start_photo = int(input('Input your start number of photo: '))\n", + " index1 = start_photo - 1\n", + " files = glob.glob(path)\n", + " strings = len(files)\n", + " print(f'Number of images: {strings}')\n", + "\n", + " #counting the size of picture\n", + " fname = files[index]\n", + " picture = np.array(Image.open(fname))\n", + " columns = picture.shape[0]*picture.shape[1]\n", + " height = picture.shape[0]\n", + " width = picture.shape[1]\n", + " picture_size = picture.shape\n", + " print(f'Size of picture:{picture_size}')\n", + " print(f'Sample: {index1}\\nNumber of photo: {start_photo}')\n", + " plt.imshow(picture, 'gray')\n", + " return files,strings,columns,picture_size,height,width,index1\n", + "\n", + "#picture test \n", + "files, strings, columns, picture_size,height,width, index1 = preprocesing(path)\n", + "pic_size = picture_size" + ] + }, + { + "cell_type": "code", + "execution_count": 240, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset shape: (400, 10304)\n", + "Labels shape: (400, 1)\n", + "Pic size (112, 92)\n" + ] + } + ], + "source": [ + "def create_matrix (files, strings, columns):\n", + " string = 0\n", + " labels = []\n", + " data = np.zeros((strings, columns))\n", + " for f in files:\n", + " pctr = np.array(Image.open(f))\n", + " pctr = pctr.reshape(1, pctr.shape[0] * pctr.shape[1])\n", + " \n", + " #create dataset\n", + " data[string, :] = pctr\n", + " string += 1 \n", + " \n", + " #create labels for pictures\n", + " fname,ext = os.path.splitext(f)\n", + " _,name,indx, = fname.split('/')\n", + " labels.append(int(name[1:]))\n", + " labels=np.array(labels).reshape(len(labels),1)\n", + " return data, labels\n", + "\n", + "#creating dataset from pictures\n", + "data, labels = create_matrix (files,strings,columns)\n", + "print('Dataset shape: ', data.shape)\n", + "print('Labels shape: ', labels.shape)\n", + "print('Pic size ',pic_size)" + ] + }, + { + "cell_type": "code", + "execution_count": 241, + "metadata": {}, + "outputs": [], + "source": [ + "def vectorization(v, w = (height,width)):\n", + " picture = v.reshape(w[0],w[1])\n", + " return picture\n", + "\n", + "def print_picture(ind, data, labels ,size = (height,width)):\n", + " picture = vectorization(data[ind], size)\n", + " plt.imshow(picture, 'gray')\n", + " plt.title(f'label:{(np.squeeze(labels)[ind])}')" + ] + }, + { + "cell_type": "code", + "execution_count": 242, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "print_picture(index1, data, labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 243, + "metadata": {}, + "outputs": [], + "source": [ + "def choice_ind(ind1, ind2):\n", + " \n", + " #correct indet while 1st elem has 0 \n", + " ind1 -= 1\n", + " ind2 -= 1\n", + " \n", + " #select index\n", + " ind1 = [i + 10 for i in range(ind1 - 10,390,10)]\n", + " ind2 = [i + 10 for i in range(ind2 - 10,390,10)]\n", + " test_index = np.concatenate((ind1, ind2))\n", + " \n", + " return test_index" + ] + }, + { + "cell_type": "code", + "execution_count": 244, + "metadata": {}, + "outputs": [], + "source": [ + "def split(data, ind1 = 9 , ind2 = 10):\n", + " \n", + " #create index\n", + " index = choice_ind(ind1, ind2)\n", + " \n", + " #create test dataset\n", + " test_set = data[index,:]\n", + " test_labels = labels[index]\n", + " \n", + " #create train dataset\n", + " train_set = np.delete(data, index, axis = 0)\n", + " train_labels = np.delete(labels, index)\n", + " train_labels = train_labels.reshape(len(train_labels),1)\n", + " \n", + " return train_set, train_labels, test_set, test_labels\n" + ] + }, + { + "cell_type": "code", + "execution_count": 245, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train set shape>> (320, 10304) train labels shape>> (320, 1)\n", + "Test set shape>> (80, 10304) test labels shape>> (80, 1)\n" + ] + } + ], + "source": [ + "train_set, train_labels, test_set, test_labels = split(data)\n", + "print(f'Train set shape>> {train_set.shape} train labels shape>> {train_labels.shape}')\n", + "print(f'Test set shape>> {test_set.shape} test labels shape>> {test_labels.shape}')\n", + "train_origin=train_set\n", + "test_origin=test_set" + ] + }, + { + "cell_type": "code", + "execution_count": 246, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#test trains_dataset\n", + "print_picture(index1, train_set, train_labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 247, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "#test dataset of test samples\n", + "print_picture(index1, test_set, test_labels)" + ] + }, + { + "cell_type": "code", + "execution_count": 248, + "metadata": {}, + "outputs": [], + "source": [ + "def identify_face(train_set, unknown_set):\n", + " minimal_dist = []\n", + " \n", + " #create vector of indexes\n", + " for ind in range (unknown_set.shape[0]):\n", + " dist = np.linalg.norm(train_set - unknown_set[ind,:], axis = 1, keepdims = True)\n", + " minimal_dist.append(np.argmin(dist))\n", + " pred_index = np.array(minimal_dist).reshape(len(minimal_dist), 1)\n", + " return pred_index\n", + " \n", + "def accuracy(train_labels, test_labels, pred_index, index = 1):\n", + " if index: \n", + " yhat = np.squeeze(train_labels[pred_index])\n", + " else:\n", + " yhat = pred_index\n", + " ytest = np.squeeze(test_labels)\n", + " compare = yhat == ytest\n", + " err_index = np.where(compare == False)\n", + " true_index = np.where(compare == True)\n", + " true_ans = sum(compare)\n", + " accur = (true_ans / test_labels.shape[0]) * 100\n", + " return accur, err_index, true_index\n" + ] + }, + { + "cell_type": "code", + "execution_count": 278, + "metadata": {}, + "outputs": [], + "source": [ + "def recognition(train_set, train_labels, test_set, test_labels, figsize = (10, 10), \n", + " index = 1, norm = 0, axis = 0, pca_flag = False, pca_nrg = 0.98, log = True, lbp_flag = False):\n", + " \n", + " if lbp_flag:\n", + " train_set = create_lbp_set(train_set)\n", + " test_set = create_lbp_set(test_set)\n", + " \n", + " #Normaliz\n", + " if norm == 1:\n", + " train_set = subtract_mean(train_set, axis = axis)\n", + " test_set = subtract_mean(test_set, axis = axis)\n", + " elif norm == 2:\n", + " train_set = standart_norm(train_set, axis = axis)\n", + " test_set = standart_norm(test_set, axis = axis)\n", + " \n", + " if pca_flag:\n", + " if norm == 0:\n", + " print('You need normalization!')\n", + " train_set, transform_pca = pca(train_set, pca_nrg = pca_nrg, log = log)\n", + " test_set = np.dot(transform_pca, test_set.T).T\n", + " \n", + " pred_index = identify_face(train_set, test_set)\n", + " \n", + " accur, err_index, true_index = accuracy(train_labels, test_labels, pred_index, index = index)\n", + " if log:\n", + " print(f'Accuracy >> {accur}%', )\n", + " \n", + " return accur" + ] + }, + { + "cell_type": "code", + "execution_count": 280, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Accuracy >> 96.25%\n" + ] + }, + { + "data": { + "text/plain": [ + "96.25" + ] + }, + "execution_count": 280, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#Simple recognition result\n", + "recognition(train_set, train_labels, test_set, test_labels)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Normalization & Optimization" + ] + }, + { + "cell_type": "code", + "execution_count": 281, + "metadata": {}, + "outputs": [], + "source": [ + "def subtract_mean(x, axis=0):\n", + " return x - x.mean(axis = axis,keepdims = True)\n", + "\n", + "def standart_norm(X, axis = 0):\n", + " A = (X - X.mean(axis = axis, keepdims = True)) / X.std( axis = axis, keepdims = True)\n", + " return A" + ] + }, + { + "cell_type": "code", + "execution_count": 282, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normalization by features(subtract mean)\n", + "Accuracy >> 96.25%\n" + ] + }, + { + "data": { + "text/plain": [ + "96.25" + ] + }, + "execution_count": 282, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print('Normalization by features(subtract mean)')\n", + "recognition(train_set, train_labels, test_set, test_labels,norm = 1, axis = 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 283, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normalization by samples(subtract mean)\n", + "Accuracy >> 95.0%\n" + ] + }, + { + "data": { + "text/plain": [ + "95.0" + ] + }, + "execution_count": 283, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print('Normalization by samples(subtract mean)')\n", + "recognition(train_set, train_labels, test_set, test_labels,norm = 1, axis = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 284, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normalization by features(standart norm)\n", + "Accuracy >> 96.25%\n" + ] + }, + { + "data": { + "text/plain": [ + "96.25" + ] + }, + "execution_count": 284, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print('Normalization by features(standart norm)')\n", + "recognition(train_set, train_labels, test_set, test_labels,norm = 2, axis = 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 285, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normalization by samples(standart norm)\n", + "Accuracy >> 95.0%\n" + ] + }, + { + "data": { + "text/plain": [ + "95.0" + ] + }, + "execution_count": 285, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print('Normalization by samples(standart norm)')\n", + "recognition(train_set, train_labels, test_set, test_labels,norm = 2, axis = 1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### *Accuracy didn't grow up after standart & subtract method of noemlization!*" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# PCA method" + ] + }, + { + "cell_type": "code", + "execution_count": 293, + "metadata": {}, + "outputs": [], + "source": [ + "def pca(X, pca_nrg = 0.98, log = False):\n", + " #Create covariance matrix\n", + " C = np.dot(X, X.T)\n", + " \n", + " #Find out eigenvalues and eigenvectors\n", + " eig, eig_v = np.linalg.eig(C)\n", + " \n", + " #Choose type of pca model\n", + " if (type(pca_nrg) == float or type(pca_nrg) == np.float64) and (pca_nrg < 1):\n", + " n = X.shape[1]\n", + " p = 1\n", + " i = 0\n", + " while p>pca_nrg:\n", + " i += 1\n", + " ind = np.argsort(-eig)[:n - i]\n", + " p = np.sum(eig[ind]) / np.sum(eig)\n", + " important_v = eig_v[ind] \n", + " elif pca_nrg == 1:\n", + " important_v = eig_v \n", + " else:\n", + " ind_big = np.argsort(-eig)[:int(pca_nrg)]\n", + " important_v = eig_v[ind_big]\n", + " W = np.dot(important_v,X)\n", + " X_pca=np.dot(W,X.T)\n", + " if log:\n", + " print(f'pca_energy >> {pca_nrg}, the number of components >> {X_pca.T.shape[1]}')\n", + " print(f'shape train dataset after projection into PCA subspace {X_pca.T.shape}') \n", + " return X_pca.T,W" + ] + }, + { + "cell_type": "code", + "execution_count": 297, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normalization by features(subtract_mean)\n", + "pca_energy >> 0.95, the number of components >> 161\n", + "shape train dataset after projection into PCA subspace (320, 161)\n", + "Accuracy >> 95.0%\n", + "\n", + "\n", + "pca_energy >> 0.96, the number of components >> 180\n", + "shape train dataset after projection into PCA subspace (320, 180)\n", + "Accuracy >> 95.0%\n", + "\n", + "\n", + "pca_energy >> 0.97, the number of components >> 202\n", + "shape train dataset after projection into PCA subspace (320, 202)\n", + "Accuracy >> 96.25%\n", + "\n", + "\n", + "pca_energy >> 0.98, the number of components >> 229\n", + "shape train dataset after projection into PCA subspace (320, 229)\n", + "Accuracy >> 96.25%\n", + "\n", + "\n", + "pca_energy >> 0.99, the number of components >> 264\n", + "shape train dataset after projection into PCA subspace (320, 264)\n", + "Accuracy >> 96.25%\n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(\"Normalization by features(subtract_mean)\")\n", + "for pca_nrg in [0.95, 0.96, 0.97, 0.98, 0.99]:\n", + " acc = recognition(train_set,train_labels,test_set,test_labels, norm=1, axis=0, pca_flag=True, pca_nrg = pca_nrg)\n", + " print(f'\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": 298, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normalization by features(standart_norm)\n", + "pca_energy >> 0.95, the number of components >> 163\n", + "shape train dataset after projection into PCA subspace (320, 163)\n", + "Accuracy >> 95.0%\n", + "\n", + "\n", + "pca_energy >> 0.96, the number of components >> 182\n", + "shape train dataset after projection into PCA subspace (320, 182)\n", + "Accuracy >> 95.0%\n", + "\n", + "\n", + "pca_energy >> 0.97, the number of components >> 204\n", + "shape train dataset after projection into PCA subspace (320, 204)\n", + "Accuracy >> 95.0%\n", + "\n", + "\n", + "pca_energy >> 0.98, the number of components >> 231\n", + "shape train dataset after projection into PCA subspace (320, 231)\n", + "Accuracy >> 95.0%\n", + "\n", + "\n", + "pca_energy >> 0.99, the number of components >> 265\n", + "shape train dataset after projection into PCA subspace (320, 265)\n", + "Accuracy >> 95.0%\n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(\"Normalization by features(standart_norm)\")\n", + "for pca_nrg in [0.95, 0.96, 0.97, 0.98, 0.99]:\n", + " acc = recognition(train_set,train_labels,test_set,test_labels, norm=2, axis=0, pca_flag=True, pca_nrg = pca_nrg)\n", + " print(f'\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": 299, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normalization by samples(subtract_mean)\n", + "pca_energy >> 0.95, the number of components >> 125\n", + "shape train dataset after projection into PCA subspace (320, 125)\n", + "Accuracy >> 88.75%\n", + "\n", + "\n", + "pca_energy >> 0.96, the number of components >> 146\n", + "shape train dataset after projection into PCA subspace (320, 146)\n", + "Accuracy >> 88.75%\n", + "\n", + "\n", + "pca_energy >> 0.97, the number of components >> 171\n", + "shape train dataset after projection into PCA subspace (320, 171)\n", + "Accuracy >> 87.5%\n", + "\n", + "\n", + "pca_energy >> 0.98, the number of components >> 203\n", + "shape train dataset after projection into PCA subspace (320, 203)\n", + "Accuracy >> 88.75%\n", + "\n", + "\n", + "pca_energy >> 0.99, the number of components >> 247\n", + "shape train dataset after projection into PCA subspace (320, 247)\n", + "Accuracy >> 88.75%\n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(\"Normalization by samples(subtract_mean)\")\n", + "for pca_nrg in [0.95, 0.96, 0.97, 0.98, 0.99]:\n", + " acc = recognition(train_set,train_labels,test_set,test_labels, norm=1, axis=1, pca_flag=True, pca_nrg = pca_nrg)\n", + " print(f'\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": 300, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Normalization by samples(standart_norm)\n", + "pca_energy >> 0.95, the number of components >> 129\n", + "shape train dataset after projection into PCA subspace (320, 129)\n", + "Accuracy >> 88.75%\n", + "\n", + "\n", + "pca_energy >> 0.96, the number of components >> 150\n", + "shape train dataset after projection into PCA subspace (320, 150)\n", + "Accuracy >> 87.5%\n", + "\n", + "\n", + "pca_energy >> 0.97, the number of components >> 175\n", + "shape train dataset after projection into PCA subspace (320, 175)\n", + "Accuracy >> 90.0%\n", + "\n", + "\n", + "pca_energy >> 0.98, the number of components >> 207\n", + "shape train dataset after projection into PCA subspace (320, 207)\n", + "Accuracy >> 90.0%\n", + "\n", + "\n", + "pca_energy >> 0.99, the number of components >> 250\n", + "shape train dataset after projection into PCA subspace (320, 250)\n", + "Accuracy >> 88.75%\n", + "\n", + "\n" + ] + } + ], + "source": [ + "print(\"Normalization by samples(standart_norm)\")\n", + "for pca_nrg in [0.95, 0.96, 0.97, 0.98, 0.99]:\n", + " acc = recognition(train_set,train_labels,test_set,test_labels, norm=2, axis=1, pca_flag=True, pca_nrg = pca_nrg)\n", + " print(f'\\n')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}