Skip to content
This repository has been archived by the owner on Jan 24, 2024. It is now read-only.

Commit

Permalink
Update release v03 (#1528)
Browse files Browse the repository at this point in the history
* op unittest for cbrt/ceil/cholesky/concat/constant/fill_constant (#1495)

* op unittest for cbrt

* op unittest for ceil

* op unittest for cholesky

* op unittest for concat

* op unittest for constant

* add 4d test for constant op

* fix ci

* op unittest for fill_constant

* op unittest for fill_constant

* refine

* fix(schedule): fix SimpleComputeAt primitive (#1504)

* Fix reduce cast schedule bug (#1512)

* fix(fuse): fix reduce cast schedule bug

* test(fuse): add unittest for reduce_cast subgroup

* Refactor some op tests and fix bugs (#1515)

* Add depthwise_conv2d op test

* Refactor log op test

* Refactor round op test and fix bugs

* Only test depthwise_conv2d in cuda_cudnn

* op unittest for repeat/arange/reverse/elementwise_add_grad/flip (#1514)

* op unittest for repeat op

* add repeat frontend

* op unittest for repeat

* op unittest for arange

* op unittest for reverse

* format & remove old add op test

* op unittest for flipe && remove redundant flip implementation

* remove test_add_op_new.py

* update reverse

* Refactor some op tests and fix bugs (#1513)

* Refactor op isclose test

* Refactor op logical_right_shift and add more dtypes support

* Refactor pow op test and fix bugs

* Refactor lookup_table op test

* Add logical_right_shift host function proto

* Improve isclose test case

* Fixed jitify commit to prevent header file conflicts (#1522)

* Fixed jitify commit to prevent header file conflicts

* Set random seed for debug floor_divide

* Avoid oom error

* Just for debug ci

* Fix floor_divide error when input dtype is int

* Fix bugs and add more tests for floor_divide

* Experimental PR for the first OP to clean old schedule (#1524)

---------

Co-authored-by: zzk0 <[email protected]>
Co-authored-by: Fisher <[email protected]>
Co-authored-by: Huihuang Zheng <[email protected]>
  • Loading branch information
4 people authored Jun 19, 2023
1 parent df023dd commit 416712a
Show file tree
Hide file tree
Showing 43 changed files with 3,150 additions and 1,408 deletions.
17 changes: 1 addition & 16 deletions cinn/frontend/net_builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -246,17 +246,6 @@ Placeholder NetBuilder::CreateInput(const Variable& var) {
return Placeholder(var);
}

Variable NetBuilder::FillConstant(
const std::vector<int>& shape, float value, const std::string& name, const std::string& dtype, bool force_cpu) {
auto out =
CustomInstr("fill_constant", {}, {{"shape", shape}, {"value", value}, {"dtype", dtype}, {"force_cpu", force_cpu}})
.front();
if (!name.empty()) {
out.set_id(cinn::utils::TransValidVarName(name));
}
return out;
}

Variable NetBuilder::FillConstant(const std::vector<int>& shape,
const std::string& str_value,
const std::string& name,
Expand Down Expand Up @@ -827,11 +816,7 @@ Variable NetBuilder::Arange(const float start, const float stop, const float ste
}

Variable NetBuilder::Flip(const Variable& operand, const std::vector<int>& axes) {
Instruction instr("flip", {operand});
instr.SetAttr("axes", axes);
InferShape(instr);
AppendInstruction(instr);
return instr.GetOutput(0);
return CustomInstr("reverse", {operand}, {{"axis", utils::GetPositiveAxes(axes, operand->shape.size())}}).front();
}

Variable NetBuilder::Matmul(const Variable& x, const Variable& y, bool trans_x, bool trans_y, float alpha) {
Expand Down
23 changes: 18 additions & 5 deletions cinn/frontend/net_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ class NetBuilder {
const std::string& id_hint = "");

/**
* @brief Create constant tensor with the specific value/vector and type, the type is infered from value.
* @brief Create constant tensor with the specific value/vector and type
* @param value The constant value to be set.
* @param name The name of output variable.
* @return The result variable.
Expand Down Expand Up @@ -408,11 +408,21 @@ class NetBuilder {
* @param force_cpu Whether the variable should force placed in cpu, default in device memory. Default is false.
* @return The result variable.
*/
template <typename T = float>
Variable FillConstant(const cinn::utils::ShapeType& shape,
float value,
T value,
const std::string& name,
const std::string& dtype,
bool force_cpu = false);
bool force_cpu = false) {
auto out =
CustomInstr(
"fill_constant", {}, {{"shape", shape}, {"value", value}, {"dtype", dtype}, {"force_cpu", force_cpu}})
.front();
if (!name.empty()) {
out.set_id(cinn::utils::TransValidVarName(name));
}
return out;
}

/**
* @brief The op return a variable with the specific string value, shape and type.
Expand Down Expand Up @@ -442,7 +452,7 @@ class NetBuilder {
T value,
const std::string& name = "",
bool force_cpu = false) {
return FillConstant(shape, static_cast<float>(value), name, common::Type2Str(common::type_of<T>()), force_cpu);
return FillConstant<T>(shape, value, name, common::Type2Str(common::type_of<T>()), force_cpu);
}

/**
Expand Down Expand Up @@ -891,7 +901,10 @@ class NetBuilder {
const std::string& padding_algorithm = "EXPLICIT");

/**
* This API flipes the Variable x along the given axis.
* @brief This API reverse the Variable x along the given axis.
* @param x An N-D variable.
* @param axis Specify the axis to operate on the input reverse.
* @return A reversed variable with the same data type as x.
*/
Variable Flip(const Variable& operand, const std::vector<int>& axes);

Expand Down
70 changes: 0 additions & 70 deletions cinn/frontend/net_builder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -984,76 +984,6 @@ TEST(net_build, program_execute_arange_int) {
}
}

TEST(net_build, program_execute_flip) {
const int C = 2;
const int H = 2;
const int W = 2;
const std::vector<int> axes{0};

NetBuilder builder("net_builder");
Placeholder input = builder.CreateInput(Float(32), {C, H, W}, "Img");
Variable output = builder.Flip(input, axes);
auto program = builder.Build();

#ifdef CINN_WITH_CUDA
Target target = common::DefaultNVGPUTarget();
#else
Target target = common::DefaultHostTarget();
#endif
std::unordered_set<std::string> fetch_ids;
auto graph = Optimize(&program, fetch_ids, target);

auto scope = BuildScope(target, graph);
hlir::framework::GraphCompiler gc(target, scope, graph);
auto runtime_program = gc.Build();

scope->Var<hlir::framework::Tensor>(std::string(input.id()));
scope->Var<hlir::framework::Tensor>(std::string(output->id));

auto input_tensor = scope->GetTensor(std::string(input.id()));
SetRandData<float>(input_tensor, target);
std::vector<float> input_data = GetTensorData<float>(input_tensor, target);

runtime_program->Execute();
auto output_tensor = scope->GetTensor(std::string(output->id));
const std::vector<int>& output_shape = output_tensor->shape().data();
EXPECT_EQ(output_tensor->type(), Float(32));
EXPECT_EQ(output_shape.size(), 3UL);
EXPECT_EQ(output_shape[0], C);
EXPECT_EQ(output_shape[1], H);
EXPECT_EQ(output_shape[2], W);

std::vector<float> output_data = GetTensorData<float>(output_tensor, target);
VLOG(6) << "Visualize flip input_data";
for (int c = 0; c < C; c++) {
for (int h = 0; h < H; h++) {
std::string line;
for (int w = 0; w < W; w++) {
int index = c * (H * W) + h * W + w;
line += (std::to_string(index) + ": " + std::to_string(input_data[index]) + ", ");
}
VLOG(6) << line;
}
}

VLOG(6) << "Visualize flip output_data";
for (int c = 0; c < C; c++) {
int flip_c = std::find(axes.begin(), axes.end(), 0) == axes.end() ? c : C - c - 1;
for (int h = 0; h < H; h++) {
std::string line;
int flip_h = std::find(axes.begin(), axes.end(), 1) == axes.end() ? h : H - h - 1;
for (int w = 0; w < W; w++) {
int flip_w = std::find(axes.begin(), axes.end(), 2) == axes.end() ? w : W - w - 1;
int flip_index = flip_c * H * W + flip_h * W + flip_w;
int index = c * (H * W) + h * W + w;
line += (std::to_string(index) + ": " + std::to_string(output_data[index]) + ", ");
EXPECT_EQ(input_data[index], output_data[flip_index]);
}
VLOG(6) << line;
}
}
}

TEST(net_build, program_argmax_case1) {
const int N = 4;
const int IN_C = 3;
Expand Down
63 changes: 59 additions & 4 deletions cinn/hlir/framework/op_lowering_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -935,10 +935,10 @@ void LoopAssignReduce(ir::IRSchedule& ir_sch,
};

auto node_shape = shape_dict.at(node_data->id());
// node output is same shape with reduce output.
// The output shape of node is different from that of reduce node
if (std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()) !=
std::accumulate(node_shape.begin(), node_shape.end(), 1, std::multiplies<int>())) {
// split loop to assign master loop
// get loop factors of reduce node
int extend = 1;
std::vector<int> factors;
loops = ir_sch.GetLoops(node_data->id());
Expand All @@ -953,8 +953,63 @@ void LoopAssignReduce(ir::IRSchedule& ir_sch,
factors.push_back(loop.As<ir::For>()->extent.as_int32());
}

ir_sch.Split(loops.back(), factors);
loops = ir_sch.GetLoops(node_data->id());
// If there are IfThenElse stmt in loop, we need to find out the indices in condition,
// and special treatment should be applied to loops with these indices.
// We apply two step split on loop of src node to align the loop of reduce node.
std::unordered_set<int> loop_index_in_if;
auto first_reduce_loop = rloops.front();
// collect if
auto if_checker = [](const Expr* x) { return x->As<ir::IfThenElse>(); };
auto if_set = ir::CollectIRNodesWithoutTensor(first_reduce_loop.As<ir::For>()->body, if_checker);
std::string reduce_block_name = reducer_data->id();
for (auto if_expr : if_set) {
auto checker = [reduce_block_name](const Expr* x) {
return x->As<ir::ScheduleBlockRealize>() &&
x->As<ir::ScheduleBlockRealize>()->schedule_block.As<ir::ScheduleBlock>()->name == reduce_block_name;
};
auto blocks_in_if = ir::CollectIRNodesWithoutTensor(if_expr, checker);
if (!blocks_in_if.empty()) {
ir::Expr condition = if_expr.As<ir::IfThenElse>()->condition;
auto indices_in_if =
ir::CollectIRNodesWithoutTensor(condition, [](const Expr* x) { return x->As<ir::_Var_>(); });
for (int i = 0; i < rloops.size(); ++i) {
std::string var_name = rloops[i].As<ir::For>()->loop_var->name;
auto find_var_iter = std::find_if(indices_in_if.begin(), indices_in_if.end(), [&var_name](const ir::Expr& x) {
return x.As<ir::_Var_>()->name == var_name;
});
if (find_var_iter != indices_in_if.end()) {
loop_index_in_if.insert(i);
}
}
break;
}
}

// prepare factors of two step split
std::vector<int> first_step_factors;
std::vector<int> second_step_factors;
int second_start_loop_index;
for (int i = 0; i < factors.size(); ++i) {
if (loop_index_in_if.count(i) == 0) {
first_step_factors.push_back(factors[i]);
} else if (loop_index_in_if.count(i) != 0 && second_step_factors.empty()) {
first_step_factors.push_back(-1);
second_step_factors.push_back(factors[i]);
second_start_loop_index = i;
} else if (loop_index_in_if.count(i) != 0 && !second_step_factors.empty()) {
second_step_factors.push_back(factors[i]);
}
}
// do two step split
if (!first_step_factors.empty()) {
ir_sch.Split(loops.back(), first_step_factors);
loops = ir_sch.GetLoops(node_data->id());
}
if (!second_step_factors.empty()) {
ir_sch.Split(loops.at(second_start_loop_index), second_step_factors);
loops = ir_sch.GetLoops(node_data->id());
}

// copy loop info form rloops.
copy_loop_info(loops, rloops);
return;
Expand Down
2 changes: 0 additions & 2 deletions cinn/hlir/op/contrib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ core_gather_headers()

gather_srcs(cinnapi_src SRCS
gather_nd.cc
flip.cc
sort.cc
argmin.cc
argmax.cc
Expand All @@ -24,7 +23,6 @@ cc_test(test_gather_nd SRCS gather_nd_test.cc DEPS cinncore)
cc_test(test_sort SRCS sort_test.cc DEPS cinncore)
cc_test(test_argmin SRCS argmin_test.cc DEPS cinncore)
cc_test(test_argmax SRCS argmax_test.cc DEPS cinncore)
cc_test(test_flip SRCS flip_test.cc DEPS cinncore)
cc_test(test_repeat SRCS repeat_test.cc DEPS cinncore)
cc_test(test_one_hot SRCS one_hot_test.cc DEPS cinncore)
cc_test(test_lookup_table SRCS lookup_table_test.cc DEPS cinncore)
Expand Down
63 changes: 26 additions & 37 deletions cinn/hlir/op/contrib/argmin.cc
Original file line number Diff line number Diff line change
Expand Up @@ -113,18 +113,15 @@ std::shared_ptr<framework::OpStrategy> StrategyForArgmin(const framework::NodeAt
framework::CINNCompute argmin_compute([=](lang::Args args, lang::RetValue *ret) {
CHECK(!args.empty()) << "The input argument of argmin compute is empty! Please check.";
common::CINNValuePack pack_args = args[0];
std::string tensor_name = UniqName("Argmin_out");
CHECK_GE(pack_args.size(), 1U) << "There should be 1 input args for argmax compute";
Expr in_expr = pack_args[0];
CHECK(in_expr.as_tensor());
Tensor in_tensor = in_expr.as_tensor_ref();
auto stages = CreateStages({in_tensor});
if (FLAGS_cinn_ir_schedule) {
CHECK_EQ(pack_args.size(), 2U);
CHECK(pack_args[1].is_string());
tensor_name = pack_args[1].operator std::string();
}
auto out_tensor = Argmin(in_tensor, target, stages, axis, keep_dims, tensor_name);
CHECK_EQ(pack_args.size(), 2U);
CHECK(pack_args[1].is_string());
std::string tensor_name = pack_args[1].operator std::string();
auto out_tensor = Argmin(in_tensor, target, stages, axis, keep_dims, tensor_name);

stages->InsertLazily(out_tensor[0]);
std::vector<CINNValue> cinn_values{
Expand All @@ -133,38 +130,30 @@ std::shared_ptr<framework::OpStrategy> StrategyForArgmin(const framework::NodeAt
});

framework::CINNSchedule argmin_schedule([=](lang::Args args, lang::RetValue *ret) {
if (FLAGS_cinn_ir_schedule) {
CHECK(!args.empty()) << "The input argument of arange_schedule is empty! Please check.\n";
common::CINNValuePack arg_pack = args[0];
std::vector<Expr> vec_ast;
for (int i = 0; i < arg_pack.size(); i++) {
if (arg_pack[i].is_expr()) {
Expr temp = arg_pack[i];
vec_ast.emplace_back(temp);
}
}
CHECK(!vec_ast.empty());
ir::ModuleExpr mod_expr(vec_ast);
ir::IRSchedule ir_sch(mod_expr);
ir_sch.MergeExprs();
auto blocks = ir_sch.GetAllBlocks();
// TODO: It needs to be rewritten according to the reduction_min operator to improve performance.
// Do not use local variables, because the size will exceed the limit.
ir_sch.SetBuffer(blocks[0], "local");
ir_sch.SetBuffer(blocks[1], "local");
long prod_size = std::accumulate(output_shapes[0].begin(), output_shapes[0].end(), 1, std::multiplies<int>());
if (prod_size > 1 && target.arch == Target::Arch::X86) {
pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, true);
CHECK(!args.empty()) << "The input argument of arange_schedule is empty! Please check.\n";
common::CINNValuePack arg_pack = args[0];
std::vector<Expr> vec_ast;
for (int i = 0; i < arg_pack.size(); i++) {
if (arg_pack[i].is_expr()) {
Expr temp = arg_pack[i];
vec_ast.emplace_back(temp);
}
std::vector<common::CINNValue> res{common::CINNValue(ir_sch.GetModule().GetExprs().at(0))};
*ret = common::CINNValuePack{res};
} else {
CHECK(!args.empty()) << "The input argument of arange_schedule is empty! Please check.\n";
common::CINNValuePack arg_pack = args[0];
Expr out = arg_pack[0];
CHECK(out.as_tensor());
*ret = arg_pack;
}
CHECK(!vec_ast.empty());
ir::ModuleExpr mod_expr(vec_ast);
ir::IRSchedule ir_sch(mod_expr);
ir_sch.MergeExprs();
auto blocks = ir_sch.GetAllBlocks();
// TODO: It needs to be rewritten according to the reduction_min operator to improve performance.
// Do not use local variables, because the size will exceed the limit.
ir_sch.SetBuffer(blocks[0], "local");
ir_sch.SetBuffer(blocks[1], "local");
long prod_size = std::accumulate(output_shapes[0].begin(), output_shapes[0].end(), 1, std::multiplies<int>());
if (prod_size > 1 && target.arch == Target::Arch::X86) {
pe::IRScheduleInjectiveCPU(ir_sch, output_shapes.front(), target, true);
}
std::vector<common::CINNValue> res{common::CINNValue(ir_sch.GetModule().GetExprs().at(0))};
*ret = common::CINNValuePack{res};
});

auto strategy = std::make_shared<framework::OpStrategy>();
Expand Down
Loading

0 comments on commit 416712a

Please sign in to comment.