diff --git a/procedures/CMakeLists.txt b/procedures/CMakeLists.txt index 0a2852a309..bd62a0ca24 100644 --- a/procedures/CMakeLists.txt +++ b/procedures/CMakeLists.txt @@ -62,6 +62,7 @@ add_standalone(clce) add_standalone(cn) add_standalone(kcore) add_standalone(hits) +add_standalone(range_triangle) add_standalone(triangle) add_standalone(fast_triangle_counting) add_standalone(louvain) @@ -98,6 +99,7 @@ add_embed(clce) add_embed(cn) add_embed(kcore) add_embed(hits) +add_embed(range_triangle) add_embed(triangle) add_embed(fast_triangle_counting) add_embed(louvain) diff --git a/procedures/algo_cpp/algo.h b/procedures/algo_cpp/algo.h index b65bf725ee..1654cb770c 100644 --- a/procedures/algo_cpp/algo.h +++ b/procedures/algo_cpp/algo.h @@ -338,6 +338,16 @@ void SybilRankCore(OlapBase& graph, ParallelVector& trust_seeds, */ size_t TriangleCore(OlapBase& graph, ParallelVector& num_triangle); + /** + * @brief Compute the Triangle Counting algorithm with Range Algorithm (KDD'21). + * + * @param[in] graph The graph to compute on. + * @param[in] num_triangle The ParallelVector to store the number of triangles of each vertex. + * + * @return return the number of triangles of the whole graph. + */ +size_t RangeTriangleCore(OlapBase& graph, ParallelVector& num_triangle); + /** * @brief Compute the strongly connected components algorithm. * diff --git a/procedures/algo_cpp/range_triangle_core.cpp b/procedures/algo_cpp/range_triangle_core.cpp new file mode 100644 index 0000000000..7ecaba2321 --- /dev/null +++ b/procedures/algo_cpp/range_triangle_core.cpp @@ -0,0 +1,128 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "lgraph/olap_base.h" +#include "./algo.h" + +using namespace lgraph_api; +using namespace lgraph_api::olap; + +size_t binarySearch(size_t _key, const AdjUnit *_list, size_t _left, size_t _right) + { // 一定返回的是最小的当前值或第一个大于k的值 + int _mid; + while (_left < _right) + { + _mid = (_right + _left) / 2; + if ((_list+_mid)->neighbour < _key) + _left = _mid + 1; + else + _right = _mid; + } + return _left; + } + +size_t RangeTriangleCountCommon(ParallelVector& num_triangle, + AdjList &list_a, AdjList &list_b, size_t a, size_t b) { + size_t local_count = 0; + AdjUnit *ptr_a = list_a.begin(); + AdjUnit *ptr_b = list_b.begin(); + size_t b1 = 0, f1 = list_a.end()-list_a.begin(), b2 = 0, f2 = list_b.end()-list_b.begin(); + if(f1==0||f2==0||(ptr_a+b1)->neighbour>(ptr_b+f2-1)->neighbour || (ptr_b+b2)->neighbour>(ptr_a+f1-1)->neighbour) + return 0; + if ((ptr_a+b1)->neighbour < (ptr_b+b2)->neighbour) + b1 = binarySearch((ptr_b+b2)->neighbour, ptr_a, b1, f1); + else if ((ptr_a+b1)->neighbour > (ptr_b+b2)->neighbour) + b2 = binarySearch((ptr_a+b1)->neighbour, ptr_b, b2, f2); + if ((ptr_a+f1-1)->neighbour > (ptr_b+f2-1)->neighbour) + f1 = binarySearch((ptr_b+f2-1)->neighbour+1, ptr_a, b1, f1); + else if ((ptr_a+f1-1)->neighbour < (ptr_b+f2-1)->neighbour) + f2 = binarySearch((ptr_a+f1-1)->neighbour+1, ptr_b, b2, f2); + + size_t pre_a = -1; + size_t pre_b = -1; + size_t threshold = (a < b) ? a : b; + ptr_a += b1; + ptr_b += b2; + while (ptr_a != list_a.begin()+f1 && ptr_b != list_b.begin()+f2 + && ptr_a->neighbour < threshold && ptr_b->neighbour < threshold) { + if (pre_a == ptr_a->neighbour) { + ptr_a++; + continue; + } + if (pre_b == ptr_b->neighbour) { + ptr_b++; + continue; + } + if (ptr_a->neighbour < ptr_b->neighbour) { + pre_a = ptr_a->neighbour; + ptr_a++; + } else if (ptr_a->neighbour > ptr_b->neighbour) { + pre_b = ptr_b->neighbour; + ptr_b++; + } else { + pre_a = ptr_a->neighbour; + pre_b = ptr_b->neighbour; + write_add(&num_triangle[a], (size_t)1); + write_add(&num_triangle[b], (size_t)1); + write_add(&num_triangle[pre_a], (size_t)1); + local_count++; + ptr_a++; + ptr_b++; + } + } + return local_count; +} + +int range_triangle_compare(const void *a, const void *b) { + const AdjUnit *ptr_a = (const AdjUnit *)a; + const AdjUnit *ptr_b = (const AdjUnit *)b; + return ptr_a->neighbour > ptr_b->neighbour ? 1 : -1; +} + +size_t RangeTriangleCore(OlapBase& graph, ParallelVector& num_triangle) { + auto active = graph.AllocVertexSubset(); + active.Fill(); + graph.ProcessVertexActive( + [&](size_t vtx) { + auto edges = graph.OutEdges(vtx); + qsort(edges.begin(), graph.OutDegree(vtx), + sizeof(AdjUnit), range_triangle_compare); + return 1; + }, // 升序排列邻居 + active); + printf("sorted\n"); + size_t discovered_triangles = graph.ProcessVertexActive( + [&](size_t src) { + size_t local_count = 0; + AdjList src_adj = graph.OutEdges(src); + size_t pre = -1; + for (auto edge : src_adj) { + size_t dst = edge.neighbour; + if (pre == dst) { + continue; + } else { + pre = dst; + } + if (src < dst) { + AdjList neighbour_adj = graph.OutEdges(dst); + local_count += RangeTriangleCountCommon(num_triangle, + src_adj, neighbour_adj, src, dst); + } + } + return local_count; + }, + active); + printf("discovered %lu triangles\n", discovered_triangles); + return discovered_triangles; +} diff --git a/procedures/algo_cpp/range_triangle_procedure.cpp b/procedures/algo_cpp/range_triangle_procedure.cpp new file mode 100644 index 0000000000..53f4863cbe --- /dev/null +++ b/procedures/algo_cpp/range_triangle_procedure.cpp @@ -0,0 +1,70 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "lgraph/olap_on_db.h" +#include "tools/json.hpp" +#include "./algo.h" + +using namespace lgraph_api; +using namespace lgraph_api::olap; +using json = nlohmann::json; + +extern "C" bool Process(GraphDB& db, const std::string& request, std::string& response) { + double start_time; + + // prepare + start_time = get_time(); + int make_symmetric = 1; + try { + json input = json::parse(request); + parse_from_json(make_symmetric, "make_symmetric", input); + } catch (std::exception& e) { + response = "json parse error: " + std::string(e.what()); + std::cout << response << std::endl; + return false; + } + auto txn = db.CreateReadTxn(); + size_t construct_param = SNAPSHOT_PARALLEL; + if (make_symmetric != 0) { + construct_param = SNAPSHOT_PARALLEL | SNAPSHOT_UNDIRECTED; + } + OlapOnDB olapondb(db, txn, construct_param); + auto prepare_cost = get_time() - start_time; + + // core + start_time = get_time(); + auto num_triangle = olapondb.AllocVertexArray(); + num_triangle.Fill(0); + auto discovered_triangles = RangeTriangleCore(olapondb, num_triangle); + auto core_cost = get_time() - start_time; + + // output + start_time = get_time(); + // TODO(any): write numbers of triangle back to graph + auto output_cost = get_time() - start_time; + + // return + { + json output; + output["discovered_triangles"] = discovered_triangles; + output["num_vertices"] = olapondb.NumVertices(); + output["num_edges"] = olapondb.NumEdges(); + output["prepare_cost"] = prepare_cost; + output["core_cost"] = core_cost; + output["output_cost"] = output_cost; + output["total_cost"] = prepare_cost + core_cost + output_cost; + response = output.dump(); + } + return true; +} diff --git a/procedures/algo_cpp/range_triangle_standalone.cpp b/procedures/algo_cpp/range_triangle_standalone.cpp new file mode 100644 index 0000000000..1b0f5a8fd0 --- /dev/null +++ b/procedures/algo_cpp/range_triangle_standalone.cpp @@ -0,0 +1,94 @@ +/** + * Copyright 2022 AntGroup CO., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#include "olap/olap_on_disk.h" +#include "tools/json.hpp" +#include "./algo.h" + +class MyConfig : public ConfigBase { + public: + std::string name = "range_triangle"; + int make_symmetric = 0; + void AddParameter(fma_common::Configuration & config) { + ConfigBase::AddParameter(config); + config.Add(make_symmetric, "make_symmetric", true) + .Comment("To make input graph undirected or not"); + } + + void Print() { + ConfigBase::Print(); + std::cout << " name: " << name << std::endl; + std::cout << " make_symmetric: " << make_symmetric << std::endl; + } + + MyConfig(int &argc, char** &argv): ConfigBase(argc, argv) { + fma_common::Configuration config; + AddParameter(config); + config.ExitAfterHelp(true); + try { + config.ParseAndFinalize(argc, argv); + } catch (std::exception& e) { + std::cerr << e.what() << std::endl; + std::exit(-1); + } + Print(); + } +}; + +int main(int argc, char **argv) { + auto start_time = get_time(); + MemUsage memUsage; + memUsage.startMemRecord(); + + // prepare + MyConfig config(argc, argv); + int make_symmetric = config.make_symmetric; + + OlapOnDisk graph; + if (make_symmetric == 0) { + graph.Load(config, INPUT_SYMMETRIC); + } else { + graph.Load(config, MAKE_SYMMETRIC); + } + memUsage.print(); + memUsage.reset(); + std::cout << " num_vertices = " << graph.NumVertices() << std::endl; + std::cout << " num_edges = " << graph.NumEdges() << std::endl; + auto prepare_cost = get_time() - start_time; + + // core + start_time = get_time(); + auto num_triangle = graph.AllocVertexArray(); + num_triangle.Fill(0); + auto discovered_triangles = RangeTriangleCore(graph, num_triangle); + memUsage.print(); + memUsage.reset(); + auto core_cost = get_time() - start_time; + + // output + start_time = get_time(); + if (config.output_dir != "") { + graph.Write(config, num_triangle, graph.NumVertices(), config.name); + } + printf("discovered %lu triangles\n", discovered_triangles); + auto output_cost = get_time() - start_time; + + printf("prepare_cost = %.2lf(s)\n", prepare_cost); + printf("core_cost = %.2lf(s)\n", core_cost); + printf("output_cost = %.2lf(s)\n", output_cost); + printf("total_cost = %.2lf(s)\n", prepare_cost + core_cost + output_cost); + printf("ALL DONE.\n"); + + return 0; +}