This repository has been archived by the owner on Oct 8, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 153
news20 binary classification AdaGradRDA AdaGrad AdaDelta
myui edited this page Sep 9, 2014
·
7 revisions
add jar ./tmp/hivemall-with-dependencies.jar;
source ./tmp/define-all.hive;
use news20;
#[AdaGradRDA]
Note that AdaGradRDA can only be applied to classification, not to regression.
drop table news20b_adagrad_rda_model1;
create table news20b_adagrad_rda_model1 as
select
feature,
cast(voted_avg(weight) as float) as weight
from
(select
train_adagrad_rda(addBias(features),label) as (feature,weight)
from
news20b_train_x3
) t
group by feature;
create or replace view news20b_adagrad_rda_predict1
as
select
t.rowid,
sum(m.weight * t.value) as total_weight,
case when sum(m.weight * t.value) > 0.0 then 1 else -1 end as label
from
news20b_test_exploded t LEFT OUTER JOIN
news20b_adagrad_rda_model1 m ON (t.feature = m.feature)
group by
t.rowid;
create or replace view news20b_adagrad_rda_submit1 as
select
t.label as actual,
pd.label as predicted
from
news20b_test t JOIN news20b_adagrad_rda_predict1 pd
on (t.rowid = pd.rowid);
select count(1)/4996 from news20b_adagrad_rda_submit1
where actual == predicted;
SCW1 0.9661729383506805
ADAGRAD+RDA 0.9677742193755005
#[AdaGrad]
Note that AdaGrad is better suited for regression problem.
drop table news20b_adagrad_model1;
create table news20b_adagrad_model1 as
select
feature,
cast(voted_avg(weight) as float) as weight
from
(select
adagrad(addBias(features),convert_label(label)) as (feature,weight)
from
news20b_train_x3
) t
group by feature;
create or replace view news20b_adagrad_predict1
as
select
t.rowid,
case when sigmoid(sum(m.weight * t.value)) >= 0.5 then 1 else -1 end as label
from
news20b_test_exploded t LEFT OUTER JOIN
news20b_adagrad_model1 m ON (t.feature = m.feature)
group by
t.rowid;
create or replace view news20b_adagrad_submit1 as
select
t.label as actual,
p.label as predicted
from
news20b_test t JOIN news20b_adagrad_predict1 p
on (t.rowid = p.rowid);
select count(1)/4996 from news20b_adagrad_submit1
where actual == predicted;
0.9549639711769415 (adagrad)
#[AdaDelta]
Note that AdaDelta is better suited for regression problem.
drop table news20b_adadelta_model1;
create table news20b_adadelta_model1 as
select
feature,
cast(voted_avg(weight) as float) as weight
from
(select
adadelta(addBias(features),convert_label(label)) as (feature,weight)
from
news20b_train_x3
) t
group by feature;
create or replace view news20b_adadelta_predict1
as
select
t.rowid,
case when sigmoid(sum(m.weight * t.value)) >= 0.5 then 1 else -1 end as label
from
news20b_test_exploded t LEFT OUTER JOIN
news20b_adadelta_model1 m ON (t.feature = m.feature)
group by
t.rowid;
create or replace view news20b_adadelta_submit1 as
select
t.label as actual,
p.label as predicted
from
news20b_test t JOIN news20b_adadelta_predict1 p
on (t.rowid = p.rowid);
select count(1)/4996 from news20b_adadelta_submit1
where actual == predicted;
0.9549639711769415 (adagrad)
0.9545636509207366 (adadelta)
Note that AdaDelta often performs better than AdaGrad.