Skip to content

Commit

Permalink
Merge pull request #44 from Qihoo360/zhangys
Browse files Browse the repository at this point in the history
show decay refine
  • Loading branch information
zhangys-lucky authored Dec 25, 2020
2 parents b899dca + 3548339 commit 43521f7
Show file tree
Hide file tree
Showing 12 changed files with 90 additions and 194 deletions.
138 changes: 34 additions & 104 deletions core/main/py_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ using std::string;

using namespace tensornet;

#define PYDICT_PARSE_KWARGS(kwargs, name, default_value) \
opt->name = default_value; \
{ \
PyObject* item = PyDict_GetItemString(kwargs.ptr(), #name); \
if (NULL != item) { \
opt->name = PyFloat_AsDouble(item); \
} \
}

PYBIND11_MODULE(_pywrap_tn, m) {
m.def("init", []() {
PsCluster* cluster = PsCluster::Instance();
Expand All @@ -54,130 +63,51 @@ PYBIND11_MODULE(_pywrap_tn, m) {
return true;
})
.def("AdaGrad", [](py::kwargs kwargs) {
float learning_rate = 0.01;
float initial_g2sum = 0;
float initial_scale = 1;
float epsilon = 1e-8;
float grad_decay_rate = 1.0;
float mom_decay_rate = 1.0;
float show_decay_rate = 0.98;

PyObject* item = PyDict_GetItemString(kwargs.ptr(), "learning_rate");
if (NULL != item) {
learning_rate = PyFloat_AsDouble(item);
}
auto opt = new AdaGrad();

item = PyDict_GetItemString(kwargs.ptr(), "initial_g2sum");
if (NULL != item) {
initial_g2sum = PyFloat_AsDouble(item);
}
PYDICT_PARSE_KWARGS(kwargs, learning_rate, 0.01);
PYDICT_PARSE_KWARGS(kwargs, show_decay_rate, 0.98);
PYDICT_PARSE_KWARGS(kwargs, feature_drop_show, 1 - opt->show_decay_rate);

item = PyDict_GetItemString(kwargs.ptr(), "initial_scale");
if (NULL != item) {
initial_scale = PyFloat_AsDouble(item);
}
item = PyDict_GetItemString(kwargs.ptr(), "epsilon");
if (NULL != item) {
epsilon = PyFloat_AsDouble(item);
}
item = PyDict_GetItemString(kwargs.ptr(), "grad_decay_rate");
if (NULL != item) {
grad_decay_rate = PyFloat_AsDouble(item);
}
item = PyDict_GetItemString(kwargs.ptr(), "mom_decay_rate");
if (NULL != item) {
mom_decay_rate = PyFloat_AsDouble(item);
}
item = PyDict_GetItemString(kwargs.ptr(), "show_decay_rate");
if (NULL != item) {
show_decay_rate = PyFloat_AsDouble(item);
}

auto opt = new AdaGrad(learning_rate, initial_g2sum, initial_scale, epsilon,
grad_decay_rate, mom_decay_rate, show_decay_rate);
PYDICT_PARSE_KWARGS(kwargs, initial_g2sum, 0);
PYDICT_PARSE_KWARGS(kwargs, initial_scale, 1);
PYDICT_PARSE_KWARGS(kwargs, epsilon, 1e-8);
PYDICT_PARSE_KWARGS(kwargs, grad_decay_rate, 1.0);
PYDICT_PARSE_KWARGS(kwargs, mom_decay_rate, 1.0);

// NOTICE! opt will not delete until system exist
PyObject* obj = PyCapsule_New(opt, nullptr, nullptr);

return py::reinterpret_steal<py::object>(obj);
})
.def("Adam", [](py::kwargs kwargs) {
float learning_rate = 0.001;
float beta1 = 0.9;
float beta2 = 0.999;
float epsilon = 1e-8;
float initial_scale = 1.0;

PyObject* item = PyDict_GetItemString(kwargs.ptr(), "learning_rate");
if (NULL != item) {
learning_rate = PyFloat_AsDouble(item);
}

item = PyDict_GetItemString(kwargs.ptr(), "beta1");
if (NULL != item) {
beta1 = PyFloat_AsDouble(item);
}

item = PyDict_GetItemString(kwargs.ptr(), "beta2");
if (NULL != item) {
beta2 = PyFloat_AsDouble(item);
}
auto opt = new Adam();

item = PyDict_GetItemString(kwargs.ptr(), "epsilon");
if (NULL != item) {
epsilon = PyFloat_AsDouble(item);
}
item = PyDict_GetItemString(kwargs.ptr(), "initial_scale");
if (NULL != item) {
initial_scale = PyFloat_AsDouble(item);
}
PYDICT_PARSE_KWARGS(kwargs, learning_rate, 0.001);
PYDICT_PARSE_KWARGS(kwargs, show_decay_rate, 0.98);
PYDICT_PARSE_KWARGS(kwargs, feature_drop_show, 1 - opt->show_decay_rate);

auto opt = new Adam(learning_rate, beta1, beta2, epsilon, initial_scale);
PYDICT_PARSE_KWARGS(kwargs, beta1, 0.9);
PYDICT_PARSE_KWARGS(kwargs, beta2, 0.999);
PYDICT_PARSE_KWARGS(kwargs, epsilon, 1e-8);
PYDICT_PARSE_KWARGS(kwargs, initial_scale, 1.0);

// NOTICE! opt will not delete until system exist
PyObject* obj = PyCapsule_New(opt, nullptr, nullptr);

return py::reinterpret_steal<py::object>(obj);
})
.def("Ftrl", [](py::kwargs kwargs) {
float learning_rate = 0.05;
float initial_range = 0;
float beta = 1;
float lambda1 = 0.1;
float lambda2 = 1;
float show_decay_rate = 0.98;

PyObject* item = PyDict_GetItemString(kwargs.ptr(), "learning_rate");
if (NULL != item) {
learning_rate = PyFloat_AsDouble(item);
}
auto opt = new Ftrl();

item = PyDict_GetItemString(kwargs.ptr(), "initial_range");
if (NULL != item) {
initial_range = PyFloat_AsDouble(item);
}

item = PyDict_GetItemString(kwargs.ptr(), "beta");
if (NULL != item) {
beta = PyFloat_AsDouble(item);
}

item = PyDict_GetItemString(kwargs.ptr(), "lambda1");
if (NULL != item) {
lambda1 = PyFloat_AsDouble(item);
}

item = PyDict_GetItemString(kwargs.ptr(), "lambda2");
if (NULL != item) {
lambda2 = PyFloat_AsDouble(item);
}

item = PyDict_GetItemString(kwargs.ptr(), "show_decay_rate");
if (NULL != item) {
show_decay_rate = PyFloat_AsDouble(item);
}
PYDICT_PARSE_KWARGS(kwargs, learning_rate, 0.05);
PYDICT_PARSE_KWARGS(kwargs, show_decay_rate, 0.98);
PYDICT_PARSE_KWARGS(kwargs, feature_drop_show, 1 - opt->show_decay_rate);

auto opt = new Ftrl(learning_rate, initial_range, beta, lambda1, lambda2, show_decay_rate);
PYDICT_PARSE_KWARGS(kwargs, beta, 1);
PYDICT_PARSE_KWARGS(kwargs, lambda1, 0.1);
PYDICT_PARSE_KWARGS(kwargs, lambda2, 1);
PYDICT_PARSE_KWARGS(kwargs, initial_scale, 1.0);

// NOTICE! opt will not delete until system exist
PyObject* obj = PyCapsule_New(opt, nullptr, nullptr);
Expand Down
10 changes: 3 additions & 7 deletions core/ps/optimizer/ada_grad_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ SparseAdaGradValue::SparseAdaGradValue(int dim, const AdaGrad* opt) {
}

void SparseAdaGradValue::Apply(const AdaGrad* opt, SparseGradInfo& grad_info, int dim) {
show_ += grad_info.batch_show;
delta_show += grad_info.batch_show;

float* w = Weight();

Expand All @@ -112,7 +112,7 @@ void SparseAdaGradValue::Serialize(std::ostream& os, int dim) {
}

os << g2sum_ << "\t";
os << show_;
os << show;
}

void SparseAdaGradValue::DeSerialize(std::istream& is, int dim) {
Expand All @@ -121,11 +121,7 @@ void SparseAdaGradValue::DeSerialize(std::istream& is, int dim) {
}

is >> g2sum_;
is >> show_;
}

void SparseAdaGradValue::ShowDecay(const AdaGrad* opt) {
show_ *= opt->show_decay_rate;
is >> show;
}

} // namespace tensornet
Expand Down
6 changes: 2 additions & 4 deletions core/ps/optimizer/ada_grad_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ class DenseAdaGradValue {
std::ostream& operator<<(std::ostream& os, const DenseAdaGradValue& value);
std::istream& operator>>(std::istream& is, DenseAdaGradValue& value);

struct alignas(4) SparseAdaGradValue {
struct alignas(4) SparseAdaGradValue
: public SparseOptValue {
public:
SparseAdaGradValue(int dim, const AdaGrad* opt);

Expand All @@ -73,15 +74,12 @@ struct alignas(4) SparseAdaGradValue {

void Apply(const AdaGrad* opt, SparseGradInfo& grad_info, int dim);

void ShowDecay(const AdaGrad* opt);

void Serialize(std::ostream& os, int dim);

void DeSerialize(std::istream& is, int dim);

private:
float g2sum_;
float show_ = 0.0;
float data_[0];
};

Expand Down
6 changes: 3 additions & 3 deletions core/ps/optimizer/adam_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ SparseAdamValue::SparseAdamValue(int dim, const Adam* opt) {
}

void SparseAdamValue::Apply(const Adam* opt, SparseGradInfo& grad_info, int dim) {
show_ += grad_info.batch_show;
delta_show += grad_info.batch_show;

float* w = Weight();
float* m = M(dim);
Expand All @@ -129,7 +129,7 @@ void SparseAdamValue::Serialize(std::ostream& os, int dim) {
os << v[i] << "\t";
}

os << show_;
os << show;
}

void SparseAdamValue::DeSerialize(std::istream& is, int dim) {
Expand All @@ -143,7 +143,7 @@ void SparseAdamValue::DeSerialize(std::istream& is, int dim) {
is >> v[i];
}

is >> show_;
is >> show;
}

} // namespace tensornet {
Expand Down
6 changes: 2 additions & 4 deletions core/ps/optimizer/adam_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ class DenseAdamValue {
std::ostream& operator<<(std::ostream& os, const DenseAdamValue& value);
std::istream& operator>>(std::istream& is, DenseAdamValue& value);

struct alignas(4) SparseAdamValue {
struct alignas(4) SparseAdamValue
: public SparseOptValue {
public:
SparseAdamValue(int dim, const Adam* opt);
~SparseAdamValue() = default;
Expand All @@ -75,8 +76,6 @@ struct alignas(4) SparseAdamValue {

void Apply(const Adam* opt, SparseGradInfo& grad_info, int dim);

void ShowDecay(const Adam* opt) {}

void Serialize(std::ostream& os, int dim);

void DeSerialize(std::istream& is, int dim);
Expand All @@ -100,7 +99,6 @@ struct alignas(4) SparseAdamValue {
}

private:
float show_ = 0.0;
float data_[0];
};

Expand Down
10 changes: 10 additions & 0 deletions core/ps/optimizer/data_struct.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,16 @@ struct SparseGradInfo {
int batch_show;
};

struct alignas(4) SparseOptValue {
float show = 0.0;
int delta_show = 0;

void ShowDecay(float decay_rate) {
show = (1 - decay_rate) * delta_show + decay_rate * show;
delta_show = 0;
}
};

} // namespace tensornet {

#endif // !TENSORNET_OPTIMIZER_DATA_STRUCT_H_
12 changes: 5 additions & 7 deletions core/ps/optimizer/ftrl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,15 @@ SparseFtrlValue::SparseFtrlValue(int dim, const Ftrl* opt) {
float* n = N(dim);

for (int i = 0; i < dim; ++i) {
w[i] = distribution(reng) * opt->initial_range;
w[i] = distribution(reng) * opt->initial_scale;
z[i] = 0;
n[i] = 0;
}
}

void SparseFtrlValue::Apply(const Ftrl* opt, SparseGradInfo& grad_info, int dim) {
delta_show += grad_info.batch_show;

float* w = Weight();
float* z = Z(dim);
float* n = N(dim);
Expand Down Expand Up @@ -90,7 +92,7 @@ void SparseFtrlValue::Serialize(std::ostream& os, int dim) {
os << n[i] << "\t";
}

os << show_;
os << show;
}

void SparseFtrlValue::DeSerialize(std::istream& is, int dim) {
Expand All @@ -104,11 +106,7 @@ void SparseFtrlValue::DeSerialize(std::istream& is, int dim) {
is >> n[i];
}

is >> show_;
}

void SparseFtrlValue::ShowDecay(const Ftrl* opt) {
show_ *= opt->show_decay_rate;
is >> show;
}

} // namespace tensornet
Expand Down
6 changes: 2 additions & 4 deletions core/ps/optimizer/ftrl_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ class DenseFtrlValue {
std::ostream& operator<<(std::ostream& os, const DenseFtrlValue& value);
std::istream& operator>>(std::istream& is, DenseFtrlValue& value);

struct alignas(4) SparseFtrlValue {
struct alignas(4) SparseFtrlValue
: public SparseOptValue {
public:
SparseFtrlValue(int dim, const Ftrl* opt);

Expand All @@ -72,8 +73,6 @@ struct alignas(4) SparseFtrlValue {

void Apply(const Ftrl* opt, SparseGradInfo& grad_info, int dim);

void ShowDecay(const Ftrl* opt);

void Serialize(std::ostream& os, int dim);

void DeSerialize(std::istream& is, int dim);
Expand All @@ -96,7 +95,6 @@ struct alignas(4) SparseFtrlValue {
}

private:
float show_ = 0.0;
float data_[0];
};

Expand Down
Loading

0 comments on commit 43521f7

Please sign in to comment.