-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
20 changed files
with
671 additions
and
182 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
%% Comparison script for Streaming r-truncated SVD (Moses, PM, GROUSE) | ||
%% Comparison script for Streaming r-truncated SVD (Moses, PM, FD, & GROUSE) | ||
% | ||
% Description: | ||
% This code is supplied as additional material alongside our paper: | ||
|
@@ -11,30 +11,31 @@ | |
% | ||
% The script is segmented into four main categories: | ||
% | ||
% -- Synthetic data evaluation: bench PM, MOSES, & GROUSE using | ||
% -- Synthetic data evaluation: bench PM, MOSES, FD, RFD, & GROUSE using | ||
% synthetic datasets | ||
% -- Real data evaluation: bench PM, MOSES, & GROUSE using real datasets | ||
% -- Real data evaluation: bench PM, MOSES, FD, RFD, & GROUSE using real | ||
% datasets | ||
% -- Speed tests: compare the execution speed of MOSES when compared | ||
% with PM & GROUSE | ||
% with PM, FD, RFD, & GROUSE | ||
% -- MOSES scaling tests: compare the performance of MOSES, in terms of | ||
% error across different parameters of | ||
% block size (b), rank (r), and ambient dim. (n) | ||
% | ||
% Author: Andreas Grammenos ([email protected]) | ||
% | ||
% Last touched date 06/06/2018 | ||
% Last touched date: 30/12/2018 | ||
% | ||
% License: | ||
% code: GPLv3 | ||
% paper: A. Eftekhari, R. A. Hauser, and A. Grammenos retain their | ||
% respective copyrights (link: https://arxiv.org/abs/1806.01304) | ||
% code: GPLv3, author: A. Grammenos | ||
% paper: A. Eftekhari, R. Hauser, and A. Grammenos retain their respective | ||
% copyrights (pre-print link: https://arxiv.org/abs/1806.01304) | ||
% | ||
% | ||
|
||
%% Initialisation | ||
|
||
% clear/close everything | ||
clc; clear all; close all; | ||
clc; clear; close all; | ||
|
||
% enable for reproducibility, comment for (slightly) different | ||
% (~random) results | ||
|
@@ -45,6 +46,7 @@ | |
global datasetPath | ||
global use_fast_moses_only | ||
global use_offline_svds | ||
global use_fdr | ||
global use_blk_err | ||
global pdf_print | ||
global fig_print | ||
|
@@ -60,8 +62,8 @@ | |
global run_exp3 | ||
|
||
% experiments to run | ||
run_synthetic = 0; % run synthetic evaluation (set 0 to skip) | ||
run_real = 1; % run real data evaluation (set 0 to skip) | ||
run_synthetic = 1; % run synthetic evaluation (set 0 to skip) | ||
run_real = 0; % run real data evaluation (set 0 to skip) | ||
run_speed_test = 0; % run the calc. speed tests (set 0 to skip) | ||
run_moses_scaling = 0; % run the scaling moses tests (set 0 to skip) | ||
|
||
|
@@ -76,10 +78,13 @@ | |
use_fast_moses_only = 1;% speed up by using fast moses <-- USE IT :) | ||
use_offline_svds = 0; % drastically speed up execution by disabling | ||
% offline svds calculation WARNING THIS OPTION IS | ||
% PAINFULLY SLOW. <- DEf. DISABLE IT :) | ||
% PAINFULLY SLOW. <- DEF. DISABLE IT :) | ||
use_fdr = 0; % use robust fd -- same as fd but on the recon. | ||
% we normalise using a*Id; using the shifted | ||
% subspace by a*Id does not work well in our case. | ||
use_blk_err = 0; % calc. errors per block not per column | ||
% provides a DRASTIC improvement in speed but less | ||
% granular error reporting. For GROUSE it is 100 | ||
% granular error reporting. For GROUSE & FD is 100 | ||
% for PM and MOSES is equal to their respective | ||
% block sizes for each run. <- Prob. use it | ||
|
||
|
@@ -156,7 +161,6 @@ | |
else | ||
fprintf("\n ** Running algorithm speed evaluation **\n"); | ||
|
||
|
||
% power law distribution params | ||
alpha = 1; | ||
% no. of trials | ||
|
@@ -178,6 +182,11 @@ | |
fprintf("\n !! Testing fat-r recovery n > r, with r=%d !!\n", r); | ||
speed_test(n_arr, r, alpha, trials) | ||
|
||
r = 100; % target rank | ||
fprintf("\n !! Testing super fat-r recovery n > r, with r=%d !!\n", r); | ||
speed_test(n_arr, r, alpha, trials) | ||
|
||
|
||
fprintf("\n ** Finished algorithm speed evaluation **\n"); | ||
end | ||
|
||
|
@@ -192,14 +201,12 @@ | |
|
||
n_arr = 200:200:1200; % ambient dimension array | ||
r_arr = 5:5:25; % r-rank | ||
m_blk_mul = 1:1:15; % block multiplier (we are bound by 2*r) | ||
m_blk_mul = 1:1:15; % block multiplier (we are bound by r) | ||
|
||
% Execute the scaling test | ||
moses_scaling(n_arr, r_arr, m_blk_mul); | ||
|
||
fprintf("\n ** Finished MOSES scaling evaluation **\n"); | ||
end | ||
|
||
%% Comparison script end. | ||
|
||
|
||
%% Comparison script end. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
function [Bout, ErrFro, T, Yr, t] = fd(Y, ell, no_err) | ||
%FD Find the frequent directions of a given matrix Y in a | ||
%streaming fashion | ||
% | ||
% FD is based on Liberty et al.: https://arxiv.org/abs/1501.01711.pdf | ||
% | ||
% Author: Andreas Grammenos ([email protected]) | ||
% | ||
% Last touched date: 30/12/2018 | ||
% | ||
% License: GPLv3 | ||
% | ||
fprintf('\n ** Running regular FD...\n'); | ||
|
||
% scope in global variables | ||
global use_blk_err | ||
|
||
% initialisations | ||
m = 2 * ell; | ||
[~, cols] = size(Y); | ||
Br = zeros(m, cols); | ||
nz_row = 1; | ||
|
||
% default block size | ||
blk_size = 100; | ||
cnt = 1; | ||
|
||
% no error by default | ||
if nargin < 3 | ||
no_err = 1; | ||
end | ||
|
||
% the number of rows | ||
numr = size(Y, 1); | ||
|
||
% initialise error metrics | ||
if use_blk_err == 1 | ||
ErrFro = nan(1, floor(numr/blk_size)); | ||
T = nan(1, floor(numr/blk_size)); | ||
else | ||
ErrFro = nan(1, numr); | ||
T = 1:numr; | ||
end | ||
|
||
% start timing | ||
ts = tic; | ||
|
||
% loop through matrix | ||
for k = 1:numr | ||
% check if we need to squeeze | ||
if (nz_row >= m) | ||
% squeeze | ||
[Br, nz_row, ~] = fd_rotate_sketch(Br, ell); | ||
end | ||
% append the current values | ||
Br(nz_row, :) = Y(k, :); | ||
% increment the next zero row counter | ||
nz_row = nz_row + 1; | ||
|
||
% calcualte the error, if needed | ||
if no_err == 0 | ||
if use_blk_err == 1 | ||
if mod(k, blk_size) == 0 | ||
y_c = Y(1:k, :); | ||
YrHat_c = y_c*(Br(1:ell, :)'*Br(1:ell, :)); | ||
temp = sum(sum((y_c-YrHat_c).^2, 1)); | ||
ErrFro(cnt) = temp/k; | ||
T(cnt) = k; cnt = cnt + 1; | ||
end | ||
else | ||
% calculate the reconstruction error | ||
y_c = Y(1:k, :); | ||
YrHat_c = y_c*(Br(1:ell, :)'*Br(1:ell, :)); | ||
temp = sum(sum((y_c-YrHat_c).^2, 1)); | ||
ErrFro(k) = temp/k; | ||
end | ||
end | ||
|
||
end | ||
% also set the final estimate of Yr | ||
Yr = Y*(Br(1:ell, :)'*Br(1:ell, :)); | ||
% only return the subset of the sketch that is of value | ||
Bout = Br(1:ell, :); | ||
% calcualte the current trial execution delta | ||
t = my_toc(ts); | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
function [B_out, nz_row, alpha] = fd_rotate_sketch(B_in, ell, alpha_prev) | ||
%FD_ROTATE_SKETCH the main shirk and rotation that's performed when | ||
%receiving a new row and our buffer is full from our stream. | ||
% | ||
% Author: Andreas Grammenos ([email protected]) | ||
% | ||
% Last touched date: 30/12/2018 | ||
% | ||
% License: GPLv3 | ||
% | ||
% initialise output | ||
B_out = B_in; | ||
% default alpha value is zero, if we don't use it | ||
if nargin < 3 | ||
alpha_prev = 0; | ||
end | ||
% calculate the svds of B | ||
[~, S, Vt] = svd(B_in); | ||
Sd = diag(S); | ||
[sd_rows, ~] = size(Sd); | ||
if sd_rows >= ell | ||
% take the square error or the last row compared to the sketch | ||
shrunk_sketch = sqrt(Sd(1:ell, :).^2 - Sd(ell).^2); | ||
% update the sketch | ||
B_out(1:ell, :) = diag(shrunk_sketch) * Vt(1:ell, :); | ||
% zero out the last row | ||
B_out(ell + 1, :) = 0; | ||
nz_row = ell + 1; | ||
else | ||
% update the portion of the sketch | ||
B_out(1:sd_rows, :) = S * Vt(1:sd_rows, :); | ||
% zero out the last row of the sketch | ||
B_out(1:sd_rows + 1, :) = 0; | ||
nz_row = sd_rows + 1; | ||
end | ||
% calculate the new regulariser vector | ||
alpha = alpha_prev + (Sd(ell)^2)/2; | ||
end |
Oops, something went wrong.