-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathpreprocess.py
122 lines (94 loc) · 3.88 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import numpy as np
import array
import random
import os
import scipy.ndimage.filters
import scipy.io
import sklearn.preprocessing
import laplacian_pyramid
import matplotlib.pyplot as plt
def extract_images(dataset='vanhateran', num_images=2000, image_dim=32*32,
center=False, rescale=False, remove_scale=False,
normalize=False, lowpass=True, bandpass=False, whiten=False,
patches_per_image=25, pad=False):
""" Extract images from vanhateran dataset """
image_side = int(np.sqrt(image_dim))
border = 4
IMAGES = np.zeros((num_images,image_dim))
image_dir = '/data/sets/images/' + dataset + '/'
filenames = filter( lambda f: not f.startswith('.'), os.listdir(image_dir))
num_images = num_images/patches_per_image
filenames = random.sample(filenames, num_images)
for n,f in enumerate(filenames):
img = load_file(dataset, image_dir + f)
(full_image_side, _) = np.shape(img)
if bandpass:
"""Return the bottom layer of the Laplacian Pyramid to remove low range structure"""
img = laplacian_pyramid.build(img, scales=2)[1]
if lowpass:
"""Low pass filter and downsample highest octave to remove noise at highest frequencies"""
img = scipy.ndimage.filters.gaussian_filter(img, sigma=1)
img = img[::2, ::2]
(full_image_side, _) = np.shape(img)
if remove_scale:
"""Remove top layer laplacian pyramid from image"""
# print "Removing scale: ", remove_scale
top = laplacian_pyramid.build(img.reshape(1, img.shape[0], img.shape[1]),
scales=remove_scale)[0]
for s in range(remove_scale-1):
top = laplacian_pyramid.expand(top, upscale=2)
img = img - top.squeeze()
if patches_per_image == 1:
IMAGES[n] = img.reshape(image_dim)
else:
for i in range(patches_per_image):
r = int(border + np.ceil((full_image_side-image_side-2*border) * random.uniform(0,1)))
c = int(border + np.ceil((full_image_side-image_side-2*border) * random.uniform(0,1)))
IMAGES[n*patches_per_image+i] = img[r:r+image_side, c:c+image_side].reshape(image_dim)
if center:
""" Mean subtract image patches """
IMAGES = sklearn.preprocessing.scale(IMAGES, axis=1, with_std=False)
if rescale:
""" Unit normalize image patches """
IMAGES = sklearn.preprocessing.normalize(IMAGES, axis=1, norm='l2')
if pad != False:
""" Pad image with border """
IMAGES = IMAGES.reshape(num_images*patches_per_image, image_side, image_side)
IMAGES = np.pad(IMAGES, ((0, 0), (pad, pad), (pad, pad)), mode='constant')
IMAGES = IMAGES.reshape(num_images*patches_per_image, (image_side+2*pad)**2)
if whiten != False:
print "Whitening"
IMAGES = IMAGES.dot(whiten)
return IMAGES
def load_file(dataset, filename):
if dataset == 'vanhateran':
""" Function to read Van Hateran. Also crops to center 1024x1024 and takes log intensity"""
R = 1024
C = 1536
extra = (C-R)/2
with open(filename, 'rb') as handle:
s = handle.read()
arr = array.array('H', s)
arr.byteswap()
img = np.array(arr, dtype='uint16').reshape(R, C)
img = img[:,extra-1:C-extra-1]
return np.log(img+1) # log intensity
elif dataset == 'cardiacmri':
""" Function to read Cardiac MRI Images """
img = scipy.io.loadmat(filename)
img = img['sol_yxzt']
img = img[:, :, 3, random.randint(0, 19)] # slice 3 is in middle
return img
def whitening_matrix(X,fudge=10^50):
# the matrix X should be observations-by-components
Xcov = np.dot(X.T,X)
# eigenvalue decomposition of the covariance matrix
d,V = np.linalg.eigh(Xcov)
# a fudge factor can be used so that eigenvectors associated with
# small eigenvalues do not get overamplified.
D = np.diag(1./np.sqrt(d+fudge))
# whitening matrix
W = np.dot(np.dot(V,D),V.T)
# multiply by the whitening matrix
X = np.dot(X,W)
return d, X, W