-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgithub-commits.sql
102 lines (92 loc) · 2.83 KB
/
github-commits.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
DROP TABLE IF EXISTS github_commits;
DROP TABLE IF EXISTS daily_github_commits;
CREATE TABLE github_commits (
event_id bigint,
repo_id bigint,
repo_name text,
pusher_login text,
branch text,
created_at timestamp with time zone,
author_name text,
sha text,
message text,
comment text
);
CREATE INDEX commit_event_id_idx ON github_commits USING brin (event_id);
CREATE INDEX commit_repo_name_idx ON github_commits USING btree (repo_name);
CREATE INDEX commit_repo_id_idx ON github_commits USING btree (repo_id);
CREATE INDEX commit_sha_idx ON github_commits USING btree (sha);
CREATE INDEX commit_created_at_idx ON github_commits USING brin (created_at);
CREATE TABLE daily_github_commits (
repo_id bigint,
repo_name text,
day date,
num_commits bigint,
PRIMARY KEY (repo_id, day)
);
SET citus.shard_count TO 160;
SELECT create_distributed_table('github_commits', 'repo_id', colocate_with := 'github_events');
SELECT create_distributed_table('daily_github_commits', 'repo_id', colocate_with := 'github_events');
INSERT INTO rollups VALUES ('github_commits', 'github_events', 'github_events_event_id_seq')
ON CONFLICT (name) DO UPDATE SET last_aggregated_id = 0;
CREATE OR REPLACE FUNCTION extract_commits(OUT start_id bigint, OUT end_id bigint)
RETURNS record
LANGUAGE plpgsql
AS $function$
BEGIN
SELECT window_start, window_end INTO start_id, end_id
FROM incremental_rollup_window('github_commits');
IF start_id > end_id THEN RETURN; END IF;
INSERT INTO
github_commits (event_id, repo_id, repo_name, pusher_login, branch, created_at, author_name, sha, message)
SELECT
event_id,
repo_id,
repo_name,
actor_login,
branch,
created_at,
cmt->'author'->>'name' author_name,
cmt->>'sha' sha,
cmt->>'message' message
FROM (
SELECT
event_id,
repo_id,
repo_name,
actor_login,
payload->>'ref' branch,
created_at,
jsonb_array_elements(payload->'commits') cmt
FROM (
SELECT
event_id,
repo_id,
(data->'repo'->>'name') repo_name,
(data->>'created_at')::timestamptz created_at,
(data->'actor'->>'login')::text actor_login,
data->'payload' payload
FROM
github_events
WHERE
data->>'type' = 'PushEvent' AND event_id BETWEEN start_id AND end_id
) events
) commits;
INSERT INTO
daily_github_commits
SELECT
repo_id,
min(repo_name),
created_at::date,
count(*)
FROM
github_commits
WHERE
event_id BETWEEN start_id AND end_id
GROUP BY
1, 3
ON CONFLICT (repo_id, day) DO UPDATE SET
num_commits = daily_github_commits.num_commits + EXCLUDED.num_commits,
repo_name = EXCLUDED.repo_name;
END;
$function$;