Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add triangle counting algorithm using Range #758

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions procedures/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ add_standalone(clce)
add_standalone(cn)
add_standalone(kcore)
add_standalone(hits)
add_standalone(range_triangle)
add_standalone(triangle)
add_standalone(fast_triangle_counting)
add_standalone(louvain)
Expand Down Expand Up @@ -98,6 +99,7 @@ add_embed(clce)
add_embed(cn)
add_embed(kcore)
add_embed(hits)
add_embed(range_triangle)
add_embed(triangle)
add_embed(fast_triangle_counting)
add_embed(louvain)
Expand Down
10 changes: 10 additions & 0 deletions procedures/algo_cpp/algo.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,16 @@ void SybilRankCore(OlapBase<Empty>& graph, ParallelVector<size_t>& trust_seeds,
*/
size_t TriangleCore(OlapBase<Empty>& graph, ParallelVector<size_t>& num_triangle);

/**
* @brief Compute the Triangle Counting algorithm with Range Algorithm (KDD'21).
*
* @param[in] graph The graph to compute on.
* @param[in] num_triangle The ParallelVector to store the number of triangles of each vertex.
*
* @return return the number of triangles of the whole graph.
*/
size_t RangeTriangleCore(OlapBase<Empty>& graph, ParallelVector<size_t>& num_triangle);

/**
* @brief Compute the strongly connected components algorithm.
*
Expand Down
128 changes: 128 additions & 0 deletions procedures/algo_cpp/range_triangle_core.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
/**
* Copyright 2022 AntGroup CO., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/

#include "lgraph/olap_base.h"
#include "./algo.h"

using namespace lgraph_api;
using namespace lgraph_api::olap;

size_t binarySearch(size_t _key, const AdjUnit<Empty> *_list, size_t _left, size_t _right)
{ // 一定返回的是最小的当前值或第一个大于k的值
int _mid;
while (_left < _right)
{
_mid = (_right + _left) / 2;
if ((_list+_mid)->neighbour < _key)
_left = _mid + 1;
else
_right = _mid;
}
return _left;
}

size_t RangeTriangleCountCommon(ParallelVector<size_t>& num_triangle,
AdjList<Empty> &list_a, AdjList<Empty> &list_b, size_t a, size_t b) {
size_t local_count = 0;
AdjUnit<Empty> *ptr_a = list_a.begin();
AdjUnit<Empty> *ptr_b = list_b.begin();
size_t b1 = 0, f1 = list_a.end()-list_a.begin(), b2 = 0, f2 = list_b.end()-list_b.begin();
if(f1==0||f2==0||(ptr_a+b1)->neighbour>(ptr_b+f2-1)->neighbour || (ptr_b+b2)->neighbour>(ptr_a+f1-1)->neighbour)
return 0;
if ((ptr_a+b1)->neighbour < (ptr_b+b2)->neighbour)
b1 = binarySearch((ptr_b+b2)->neighbour, ptr_a, b1, f1);
else if ((ptr_a+b1)->neighbour > (ptr_b+b2)->neighbour)
b2 = binarySearch((ptr_a+b1)->neighbour, ptr_b, b2, f2);
if ((ptr_a+f1-1)->neighbour > (ptr_b+f2-1)->neighbour)
f1 = binarySearch((ptr_b+f2-1)->neighbour+1, ptr_a, b1, f1);
else if ((ptr_a+f1-1)->neighbour < (ptr_b+f2-1)->neighbour)
f2 = binarySearch((ptr_a+f1-1)->neighbour+1, ptr_b, b2, f2);

size_t pre_a = -1;
size_t pre_b = -1;
size_t threshold = (a < b) ? a : b;
ptr_a += b1;
ptr_b += b2;
while (ptr_a != list_a.begin()+f1 && ptr_b != list_b.begin()+f2
&& ptr_a->neighbour < threshold && ptr_b->neighbour < threshold) {
if (pre_a == ptr_a->neighbour) {
ptr_a++;
continue;
}
if (pre_b == ptr_b->neighbour) {
ptr_b++;
continue;
}
if (ptr_a->neighbour < ptr_b->neighbour) {
pre_a = ptr_a->neighbour;
ptr_a++;
} else if (ptr_a->neighbour > ptr_b->neighbour) {
pre_b = ptr_b->neighbour;
ptr_b++;
} else {
pre_a = ptr_a->neighbour;
pre_b = ptr_b->neighbour;
write_add(&num_triangle[a], (size_t)1);
write_add(&num_triangle[b], (size_t)1);
write_add(&num_triangle[pre_a], (size_t)1);
local_count++;
ptr_a++;
ptr_b++;
}
}
return local_count;
}

int range_triangle_compare(const void *a, const void *b) {
const AdjUnit<Empty> *ptr_a = (const AdjUnit<Empty> *)a;
const AdjUnit<Empty> *ptr_b = (const AdjUnit<Empty> *)b;
return ptr_a->neighbour > ptr_b->neighbour ? 1 : -1;
}

size_t RangeTriangleCore(OlapBase<Empty>& graph, ParallelVector<size_t>& num_triangle) {
auto active = graph.AllocVertexSubset();
active.Fill();
graph.ProcessVertexActive<size_t>(
[&](size_t vtx) {
auto edges = graph.OutEdges(vtx);
qsort(edges.begin(), graph.OutDegree(vtx),
sizeof(AdjUnit<Empty>), range_triangle_compare);
return 1;
}, // 升序排列邻居
active);
printf("sorted\n");
size_t discovered_triangles = graph.ProcessVertexActive<size_t>(
[&](size_t src) {
size_t local_count = 0;
AdjList<Empty> src_adj = graph.OutEdges(src);
size_t pre = -1;
for (auto edge : src_adj) {
size_t dst = edge.neighbour;
if (pre == dst) {
continue;
} else {
pre = dst;
}
if (src < dst) {
AdjList<Empty> neighbour_adj = graph.OutEdges(dst);
local_count += RangeTriangleCountCommon(num_triangle,
src_adj, neighbour_adj, src, dst);
}
}
return local_count;
},
active);
printf("discovered %lu triangles\n", discovered_triangles);
return discovered_triangles;
}
70 changes: 70 additions & 0 deletions procedures/algo_cpp/range_triangle_procedure.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/**
* Copyright 2022 AntGroup CO., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/

#include "lgraph/olap_on_db.h"
#include "tools/json.hpp"
#include "./algo.h"

using namespace lgraph_api;
using namespace lgraph_api::olap;
using json = nlohmann::json;

extern "C" bool Process(GraphDB& db, const std::string& request, std::string& response) {
double start_time;

// prepare
start_time = get_time();
int make_symmetric = 1;
try {
json input = json::parse(request);
parse_from_json(make_symmetric, "make_symmetric", input);
} catch (std::exception& e) {
response = "json parse error: " + std::string(e.what());
std::cout << response << std::endl;
return false;
}
auto txn = db.CreateReadTxn();
size_t construct_param = SNAPSHOT_PARALLEL;
if (make_symmetric != 0) {
construct_param = SNAPSHOT_PARALLEL | SNAPSHOT_UNDIRECTED;
}
OlapOnDB<Empty> olapondb(db, txn, construct_param);
auto prepare_cost = get_time() - start_time;

// core
start_time = get_time();
auto num_triangle = olapondb.AllocVertexArray<size_t>();
num_triangle.Fill(0);
auto discovered_triangles = RangeTriangleCore(olapondb, num_triangle);
auto core_cost = get_time() - start_time;

// output
start_time = get_time();
// TODO(any): write numbers of triangle back to graph
auto output_cost = get_time() - start_time;

// return
{
json output;
output["discovered_triangles"] = discovered_triangles;
output["num_vertices"] = olapondb.NumVertices();
output["num_edges"] = olapondb.NumEdges();
output["prepare_cost"] = prepare_cost;
output["core_cost"] = core_cost;
output["output_cost"] = output_cost;
output["total_cost"] = prepare_cost + core_cost + output_cost;
response = output.dump();
}
return true;
}
94 changes: 94 additions & 0 deletions procedures/algo_cpp/range_triangle_standalone.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/**
* Copyright 2022 AntGroup CO., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/

#include "olap/olap_on_disk.h"
#include "tools/json.hpp"
#include "./algo.h"

class MyConfig : public ConfigBase<Empty> {
public:
std::string name = "range_triangle";
int make_symmetric = 0;
void AddParameter(fma_common::Configuration & config) {
ConfigBase<Empty>::AddParameter(config);
config.Add(make_symmetric, "make_symmetric", true)
.Comment("To make input graph undirected or not");
}

void Print() {
ConfigBase<Empty>::Print();
std::cout << " name: " << name << std::endl;
std::cout << " make_symmetric: " << make_symmetric << std::endl;
}

MyConfig(int &argc, char** &argv): ConfigBase<Empty>(argc, argv) {
fma_common::Configuration config;
AddParameter(config);
config.ExitAfterHelp(true);
try {
config.ParseAndFinalize(argc, argv);
} catch (std::exception& e) {
std::cerr << e.what() << std::endl;
std::exit(-1);
}
Print();
}
};

int main(int argc, char **argv) {
auto start_time = get_time();
MemUsage memUsage;
memUsage.startMemRecord();

// prepare
MyConfig config(argc, argv);
int make_symmetric = config.make_symmetric;

OlapOnDisk<Empty> graph;
if (make_symmetric == 0) {
graph.Load(config, INPUT_SYMMETRIC);
} else {
graph.Load(config, MAKE_SYMMETRIC);
}
memUsage.print();
memUsage.reset();
std::cout << " num_vertices = " << graph.NumVertices() << std::endl;
std::cout << " num_edges = " << graph.NumEdges() << std::endl;
auto prepare_cost = get_time() - start_time;

// core
start_time = get_time();
auto num_triangle = graph.AllocVertexArray<size_t>();
num_triangle.Fill(0);
auto discovered_triangles = RangeTriangleCore(graph, num_triangle);
memUsage.print();
memUsage.reset();
auto core_cost = get_time() - start_time;

// output
start_time = get_time();
if (config.output_dir != "") {
graph.Write<size_t>(config, num_triangle, graph.NumVertices(), config.name);
}
printf("discovered %lu triangles\n", discovered_triangles);
auto output_cost = get_time() - start_time;

printf("prepare_cost = %.2lf(s)\n", prepare_cost);
printf("core_cost = %.2lf(s)\n", core_cost);
printf("output_cost = %.2lf(s)\n", output_cost);
printf("total_cost = %.2lf(s)\n", prepare_cost + core_cost + output_cost);
printf("ALL DONE.\n");

return 0;
}
Loading