Skip to content

Commit

Permalink
Merge pull request #32 from KrishnaswamyLab/dev
Browse files Browse the repository at this point in the history
Update default parameters and gamma potential in MATLAB
  • Loading branch information
scottgigante authored Jun 8, 2018
2 parents decb351 + 0a0acc4 commit fc93cef
Show file tree
Hide file tree
Showing 11 changed files with 173 additions and 208 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,6 @@ Python/doc/build/

# Mac
.DS_Store

# Matlab
*.m~
12 changes: 2 additions & 10 deletions Matlab/compute_kernel_sparse.m
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
k = 5;
npca = [];
distfun = 'euclidean';
gamma = [];

% get the input parameters
if ~isempty(varargin)
for j = 1:length(varargin)
Expand All @@ -27,10 +27,6 @@
if strcmp(varargin{j}, 'distfun')
distfun = varargin{j+1};
end
% gamma
if strcmp(varargin{j}, 'gamma')
gamma = varargin{j+1};
end
end
end

Expand All @@ -57,11 +53,7 @@
W = sparse(i, j, ones(size(j)));

disp ' Symmetrize affinities'
if isempty(gamma)
W = W + W';
else
W = gamma * min(W,W') + (1-gamma) * max(W,W');
end
W = W + W';

disp ' Done computing kernel'

70 changes: 46 additions & 24 deletions Matlab/phate.m
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@
% 'ndim' - number of (output) embedding dimensions. Common values are 2
% or 3. Defaults to 2.
%
% 'k' - number of nearest neighbors of the knn graph. Defaults to 15.
% 'k' - number of nearest neighbors for bandwidth of adaptive alpha
% decaying kernel or, when a=[], number of nearest neighbors of the knn
% graph. For the unweighted kernel we recommend k to be a bit larger,
% e.g. 10 or 15. Defaults to 5.
%
% 'a' - alpha of alpha decaying kernel. when a=[] knn (unweighted) kernel
% is used. Defaults to 15.
%
% 't' - number of diffusion steps. Defaults to [] wich autmatically picks
% the optimal t.
Expand Down Expand Up @@ -44,11 +50,16 @@
% 'sqrt' - sqrt(P). (not default but often produces superior
% embeddings)
%
% 'plogp' - P*log(P).
% 'gamma' - 2/(1-\gamma)*P^((1-\gamma)/2)
%
% 'gamma' - gamma value for gamma potential method. Value between -1 and
% 1. -1 is diffusion distance. 1 is log potential. 0 is sqrt. Smaller
% gamma is a more locally sensitive embedding whereas larger gamma is a
% more globally sensitive embedding. Defaults to 0.5.
%
% 'pot_eps' - epsilon value added to diffusion operator prior to
% computing potential. Only used for 'pot_method' is 'log', i.e.:
% -log(P + pot_eps). Defaults to 1e-3.
% -log(P + pot_eps). Defaults to 1e-7.
%
% 'n_landmarks' - number of landmarks for fast and scalable PHATE. [] or
% n_landmarks = npoints does no landmarking, which is slower. More
Expand All @@ -64,7 +75,7 @@
% supplied input data can be empty ([]). Defaults to [].

npca = 100;
k = 15;
k = 5;
nsvd = 100;
n_landmarks = 2000;
ndim = 2;
Expand All @@ -74,22 +85,22 @@
distfun_mds = 'euclidean';
pot_method = 'log';
K = [];
a = 15;
Pnm = [];
t_max = 100;
% a = [];
pot_eps = 1e-3;
% alpha_th = 1e-4;
pot_eps = 1e-7;
gamma = 0.5;

% get input parameters
for i=1:length(varargin)
% k for knn adaptive sigma
if(strcmp(varargin{i},'k'))
k = lower(varargin{i+1});
end
% % a for alpha decaying kernel
% if(strcmp(varargin{i},'a'))
% a = lower(varargin{i+1});
% end
% a (alpha) for alpha decaying kernel
if(strcmp(varargin{i},'a'))
a = lower(varargin{i+1});
end
% diffusion time
if(strcmp(varargin{i},'t'))
t = lower(varargin{i+1});
Expand Down Expand Up @@ -126,22 +137,29 @@
if(strcmp(varargin{i},'n_landmarks'))
n_landmarks = lower(varargin{i+1});
end
% potential method: log, sqrt
% potential method: log, sqrt, gamma
if(strcmp(varargin{i},'pot_method'))
pot_method = lower(varargin{i+1});
end
% kernel
if(strcmp(varargin{i},'kernel'))
K = lower(varargin{i+1});
end
% kernel
if(strcmp(varargin{i},'gamma'))
gamma = lower(varargin{i+1});
end
% pot_eps
if(strcmp(varargin{i},'pot_eps'))
pot_eps = lower(varargin{i+1});
end
% % alpha_th
% if(strcmp(varargin{i},'alpha_th'))
% alpha_th = lower(varargin{i+1});
% end
end

if isempty(a) && k <=5
disp '======================================================================='
disp 'Make sure k is not too small when using an unweighted knn kernel (a=[])'
disp(['Currently k = ' numstr(k) ', which may be too small']);
disp '======================================================================='
end

tt_pca = 0;
Expand Down Expand Up @@ -170,11 +188,13 @@
end
% kernel
tic;
% if ~isempty(a)
% K = compute_alpha_kernel_sparse(pc, 'k', k, 'distfun', distfun, 'a', a, 'th', alpha_th);
% else
if isempty(a)
disp 'using unweighted knn kernel'
K = compute_kernel_sparse(pc, 'k', k, 'distfun', distfun);
% end
else
disp 'using alpha decaying kernel'
K = compute_alpha_kernel_sparse(pc, 'k', k, 'a', a, 'distfun', distfun);
end
tt_kernel = toc;
disp(['Computing kernel took ' num2str(tt_kernel) ' seconds']);
else
Expand Down Expand Up @@ -241,11 +261,13 @@
case 'sqrt'
disp 'using sqrt(P) potential distance'
Pot = sqrt(P_t);
case 'plogp'
disp 'using P*log(P) potential distance'
Pot = P_t .* log(P_t + eps);
case 'gamma'
disp 'Pot = 2/(1-\gamma)*P^((1-\gamma)/2)'
disp(['gamma = ' num2str(gamma)]);
gamma = min(gamma, 0.95);
Pot = 2/(1-gamma)*P_t.^((1-gamma)/2);
otherwise
disp 'potential method unknown'
error 'potential method unknown'
end
PDX = squareform(pdist(Pot, distfun_mds));
tt_pdx = toc;
Expand Down
4 changes: 2 additions & 2 deletions Matlab/run_phate_DLA_tree.m
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
ylabel 'PCA2'

%% PHATE 2D
Y_PHATE_2D = phate(M, 't', 45);
Y_PHATE_2D = phate(M, 't', 20, 'gamma', 0);

%% plot PHATE 2D
figure;
Expand All @@ -36,7 +36,7 @@
ylabel 'PHATE2'

%% PHATE 3D
Y_PHATE_3D = phate(M, 'ndim', 3, 't', 45);
Y_PHATE_3D = phate(M, 'ndim', 3, 't', 20);

%% plot PHATE 3D
figure;
Expand Down
4 changes: 2 additions & 2 deletions Matlab/run_phate_EB.m
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
ylabel(h, 'time');

%% PHATE 2D
Y_PHATE_2D = phate(data, 'k', 7, 'ndim', 2, 't', 20, 'pot_eps', eps);
Y_PHATE_2D = phate(data, 't', 20);

%% plot PHATE 2D
figure;
Expand All @@ -49,7 +49,7 @@
ylabel(h, 'time');

%% PHATE 3D
Y_PHATE_3D = phate(data, 'k', 7, 'ndim', 3, 't', 20, 'pot_eps', eps);
Y_PHATE_3D = phate(data, 't', 20, 'ndim', 3);

%% plot PHATE 3D
figure;
Expand Down
11 changes: 6 additions & 5 deletions Python/phate/phate.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ class PHATE(BaseEstimator):
n_components : int, optional, default: 2
number of dimensions in which the data will be embedded
k : int, optional, default: 10
k : int, optional, default: 5
number of nearest neighbors on which to build kernel
a : int, optional, default: 10
a : int, optional, default: 15
sets decay rate of kernel tails.
If None, alpha decaying kernel is not used
Expand Down Expand Up @@ -147,7 +147,7 @@ class PHATE(BaseEstimator):
`BioRxiv <http://biorxiv.org/content/early/2017/03/24/120378>`_.
"""

def __init__(self, n_components=2, k=10, a=10,
def __init__(self, n_components=2, k=5, a=15,
n_landmark=2000, t='auto', gamma=1,
n_pca=100, knn_dist='euclidean', mds_dist='euclidean',
mds='metric', n_jobs=1, random_state=None, verbose=1,
Expand Down Expand Up @@ -284,10 +284,10 @@ def set_params(self, **params):
n_components : int, optional, default: 2
number of dimensions in which the data will be embedded
k : int, optional, default: 10
k : int, optional, default: 5
number of nearest neighbors on which to build kernel
a : int, optional, default: 10
a : int, optional, default: 15
sets decay rate of kernel tails.
If None, alpha decaying kernel is not used
Expand Down Expand Up @@ -544,6 +544,7 @@ def fit(self, X):
precomputed = "distance"
else:
precomputed = "affinity"
log_info("Using precomputed {} matrix...".format(precomputed))
n_pca = None
else:
precomputed = None
Expand Down
25 changes: 20 additions & 5 deletions Python/phate/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,20 @@ def library_size_normalize(data, verbose=False):
if verbose:
print("Normalizing library sizes for %s cells" % (data.shape[0]))

# pandas support
columns, index = None, None
try:
if isinstance(data, pd.SparseDataFrame):
if isinstance(data, pd.SparseDataFrame) or \
pd.api.types.is_sparse(data):
columns, index = data.columns, data.index
data = data.to_coo()
elif pd.api.types.is_sparse(data):
data = data.to_coo()
except NameError:
pass
elif isinstance(data, pd.DataFrame):
columns, index = data.columns, data.index
except NameError as e:
if not str(e) == "name 'pd' is not defined":
raise
else:
pass
except AttributeError as e:
warnings.warn("{}: is pandas out of date? ({})".format(
str(e), pd.__version__), ImportWarning)
Expand Down Expand Up @@ -76,4 +83,12 @@ def library_size_normalize(data, verbose=False):
# axis = 1 independently normalizes each sample

data_norm = data_norm * median_transcript_count
if columns is not None:
# pandas dataframe
if sparse.issparse(data_norm):
data_norm = pd.SparseDataFrame(data_norm, default_fill_value=0)
else:
data_norm = pd.DataFrame(data_norm)
data_norm.columns = columns
data_norm.index = index
return data_norm
Loading

0 comments on commit fc93cef

Please sign in to comment.