Skip to content

Commit

Permalink
Update Grafana dashboard
Browse files Browse the repository at this point in the history
  • Loading branch information
slowli committed Sep 27, 2023
1 parent d69d96a commit 41a3f88
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 29 deletions.
44 changes: 22 additions & 22 deletions grafana_dashboard.json
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "avg(rate(network_rpc_message_size_sum{test_id=\"$test_id\",type=~\"req_sent|resp_sent\"}[1m])) by (type,method,submethod)",
"expr": "avg(rate(network_rpc_message_size_bytes_sum{test_id=\"$test_id\",type=~\"req_sent|resp_sent\"}[1m])) by (type,method,submethod)",
"legendFormat": "__auto",
"range": true,
"refId": "A"
Expand All @@ -352,7 +352,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "quantile(0.1, rate(concurrency_net_tcp__bytes_sent{test_id=\"$test_id\"}[1m]))",
"expr": "quantile(0.1, rate(concurrency_net_tcp_sent_bytes{test_id=\"$test_id\"}[1m]))",
"hide": false,
"legendFormat": "total, 10th percentile",
"range": true,
Expand All @@ -364,7 +364,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "quantile(0.5, rate(concurrency_net_tcp__bytes_sent{test_id=\"$test_id\"}[1m]))",
"expr": "quantile(0.5, rate(concurrency_net_tcp_sent_bytes{test_id=\"$test_id\"}[1m]))",
"hide": false,
"legendFormat": "total, 50th percentile",
"range": true,
Expand All @@ -376,7 +376,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "quantile(0.9, rate(concurrency_net_tcp__bytes_sent{test_id=\"$test_id\"}[1m]))",
"expr": "quantile(0.9, rate(concurrency_net_tcp_sent_bytes{test_id=\"$test_id\"}[1m]))",
"hide": false,
"legendFormat": "total, 90th percentile",
"range": true,
Expand All @@ -388,7 +388,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "quantile(1., rate(concurrency_net_tcp__bytes_sent{test_id=\"$test_id\"}[1m]))",
"expr": "quantile(1., rate(concurrency_net_tcp_sent_bytes{test_id=\"$test_id\"}[1m]))",
"hide": false,
"legendFormat": "total, max",
"range": true,
Expand Down Expand Up @@ -482,7 +482,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "rate(avg(network_rpc_latency_count{test_id=\"$test_id\",type=\"client_send_recv\"}) by (method,submethod) [1m])",
"expr": "rate(avg(network_rpc_latency_seconds_count{test_id=\"$test_id\",type=\"client_send_recv\"}) by (method,submethod) [1m])",
"legendFormat": "__auto",
"range": true,
"refId": "A"
Expand Down Expand Up @@ -569,7 +569,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "sum by(le) (rate(network_rpc_latency_bucket{test_id=\"$test_id\",method=\"consensus\",type=\"client_send_recv\",submethod=\"LeaderPrepare\"}[1m]))",
"expr": "sum by(le) (rate(network_rpc_latency_seconds_bucket{test_id=\"$test_id\",method=\"consensus\",type=\"client_send_recv\",submethod=\"LeaderPrepare\"}[1m]))",
"format": "heatmap",
"instant": false,
"interval": "",
Expand Down Expand Up @@ -659,7 +659,7 @@
},
"editorMode": "code",
"exemplar": false,
"expr": "sum by(le) (rate(network_rpc_latency_bucket{test_id=\"$test_id\",method=\"ping\",type=\"client_send_recv\"}[1m]))",
"expr": "sum by(le) (rate(network_rpc_latency_seconds_bucket{test_id=\"$test_id\",method=\"ping\",type=\"client_send_recv\"}[1m]))",
"format": "heatmap",
"instant": false,
"interval": "",
Expand Down Expand Up @@ -755,7 +755,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "rate(max(executor_lib_io__finalized_block_number{test_id=\"$test_id\"})[1m])",
"expr": "rate(max(executor_finalized_block_number{test_id=\"$test_id\"})[1m])",
"legendFormat": "__auto",
"range": true,
"refId": "A"
Expand Down Expand Up @@ -847,7 +847,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "max(executor_lib_io__finalized_block_number{test_id=\"$test_id\"})",
"expr": "max(executor_finalized_block_number{test_id=\"$test_id\"})",
"legendFormat": "__auto",
"range": true,
"refId": "max"
Expand All @@ -858,7 +858,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "min(executor_lib_io__finalized_block_number{test_id=\"$test_id\"})",
"expr": "min(executor_finalized_block_number{test_id=\"$test_id\"})",
"hide": false,
"legendFormat": "__auto",
"range": true,
Expand Down Expand Up @@ -953,7 +953,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "avg(consensus_replica__view_timeout{test_id=\"$test_id\"})",
"expr": "avg(consensus_replica_view_timeout_seconds{test_id=\"$test_id\"})",
"legendFormat": "avg",
"range": true,
"refId": "A"
Expand All @@ -964,7 +964,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "max(consensus_replica__view_timeout{test_id=\"$test_id\"})",
"expr": "max(consensus_replica_view_timeout_seconds{test_id=\"$test_id\"})",
"hide": false,
"legendFormat": "max",
"range": true,
Expand Down Expand Up @@ -1044,7 +1044,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "sum(rate(consensus_leader__commit_phase_latency_bucket{test_id=\"$test_id\"}[1m])) by (le)",
"expr": "sum(rate(consensus_leader_commit_phase_latency_seconds_bucket{test_id=\"$test_id\"}[1m])) by (le)",
"format": "heatmap",
"legendFormat": "__auto",
"range": true,
Expand Down Expand Up @@ -1139,7 +1139,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "rate(sum (consensus_leader__processing_latency_sum{test_id=\"$test_id\"}) by (type)[1m])/rate(sum (consensus_leader__processing_latency_count{test_id=\"$test_id\"}) by (type)[1m])",
"expr": "rate(sum (consensus_leader_processing_latency_seconds_sum{test_id=\"$test_id\"}) by (type)[1m])/rate(sum (consensus_leader_processing_latency_seconds_count{test_id=\"$test_id\"}) by (type)[1m])",
"legendFormat": "__auto",
"range": true,
"refId": "A"
Expand All @@ -1150,7 +1150,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "rate(sum (consensus_replica__processing_latency_sum{test_id=\"$test_id\"}) by (type)[1m])/rate(sum (consensus_replica__processing_latency_count{test_id=\"$test_id\"}) by (type)[1m])",
"expr": "rate(sum (consensus_replica_processing_latency_seconds_sum{test_id=\"$test_id\"}) by (type)[1m])/rate(sum (consensus_replica_processing_latency_seconds_count{test_id=\"$test_id\"}) by (type)[1m])",
"hide": false,
"legendFormat": "__auto",
"range": true,
Expand Down Expand Up @@ -1527,7 +1527,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "sum(rate(concurrency_net_tcp__established{test_id=\"$test_id\",direction=\"inbound\"}[1m]))",
"expr": "sum(rate(concurrency_net_tcp_established{test_id=\"$test_id\",direction=\"inbound\"}[1m]))",
"legendFormat": "new inbound",
"range": true,
"refId": "A"
Expand Down Expand Up @@ -1646,7 +1646,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "avg((min(histogram_quantile(0.8,rate(network_rpc_latency_bucket{test_id=\"$test_id\",method=\"ping\",type=\"client_send_recv\"}[1m]))) by (instance))/(min(rate(consensus_leader__commit_phase_latency_sum{test_id=\"$test_id\"}[1m])/rate(consensus_leader__commit_phase_latency_count{test_id=\"$test_id\"}[1m])) by (instance)))",
"expr": "avg((min(histogram_quantile(0.8,rate(network_rpc_latency_seconds_bucket{test_id=\"$test_id\",method=\"ping\",type=\"client_send_recv\"}[1m]))) by (instance))/(min(rate(consensus_leader_commit_phase_latency_seconds_sum{test_id=\"$test_id\"}[1m])/rate(consensus_leader_commit_phase_latency_seconds_count{test_id=\"$test_id\"}[1m])) by (instance)))",
"hide": false,
"legendFormat": "actual commit phase rate",
"range": true,
Expand All @@ -1658,7 +1658,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "2*avg(histogram_quantile(0.8,rate(network_rpc_latency_bucket{test_id=\"$test_id\",method=\"ping\",type=\"client_send_recv\"}[1m])))*rate(max(executor_lib_io__finalized_block_number{test_id=\"$test_id\"})[1m])",
"expr": "2*avg(histogram_quantile(0.8,rate(network_rpc_latency_seconds_bucket{test_id=\"$test_id\",method=\"ping\",type=\"client_send_recv\"}[1m])))*rate(max(executor_finalized_block_number{test_id=\"$test_id\"})[1m])",
"hide": false,
"legendFormat": "actual block rate",
"range": true,
Expand All @@ -1670,7 +1670,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "avg(2*histogram_quantile(0.8,rate(network_rpc_latency_bucket{test_id=\"$test_id\",method=\"ping\",type=\"client_send_recv\"}[1m]))/(3*histogram_quantile(0.66,rate(network_rpc_latency_bucket{test_id=\"$test_id\",method=\"ping\",type=\"client_send_recv\"}[1m]))))",
"expr": "avg(2*histogram_quantile(0.8,rate(network_rpc_latency_seconds_bucket{test_id=\"$test_id\",method=\"ping\",type=\"client_send_recv\"}[1m]))/(3*histogram_quantile(0.66,rate(network_rpc_latency_seconds_bucket{test_id=\"$test_id\",method=\"ping\",type=\"client_send_recv\"}[1m]))))",
"hide": false,
"legendFormat": "theoretical optimum for 2/3 signatures 3 roundtrips",
"range": true,
Expand All @@ -1694,7 +1694,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "1/(\n 1/(avg(histogram_quantile(0.8,rate(network_rpc_latency_bucket{test_id=\"$test_id\",method=\"ping\",type=\"client_send_recv\"}[1m])))*rate(max(executor_lib_io__finalized_block_number{test_id=\"$test_id\"})[1m]))\n-\n 1/avg((min(histogram_quantile(0.8,rate(network_rpc_latency_bucket{test_id=\"$test_id\",method=\"ping\",type=\"client_send_recv\"}[1m]))) by (instance))/(min(rate(consensus_leader__commit_phase_latency_sum{test_id=\"$test_id\"}[1m])/rate(consensus_leader__commit_phase_latency_count{test_id=\"$test_id\"}[1m])) by (instance)))\n)",
"expr": "1/(\n 1/(avg(histogram_quantile(0.8,rate(network_rpc_latency_seconds_bucket{test_id=\"$test_id\",method=\"ping\",type=\"client_send_recv\"}[1m])))*rate(max(executor_finalized_block_number{test_id=\"$test_id\"})[1m]))\n-\n 1/avg((min(histogram_quantile(0.8,rate(network_rpc_latency_seconds_bucket{test_id=\"$test_id\",method=\"ping\",type=\"client_send_recv\"}[1m]))) by (instance))/(min(rate(consensus_leader_commit_phase_latency_seconds_sum{test_id=\"$test_id\"}[1m])/rate(consensus_leader_commit_phase_latency_seconds_count{test_id=\"$test_id\"}[1m])) by (instance)))\n)",
"hide": false,
"legendFormat": "actual prepare phase rate (approx)",
"range": true,
Expand Down Expand Up @@ -1789,7 +1789,7 @@
"uid": "ebcc7fad-20b5-44f0-a8ab-7ba2195ef2c0"
},
"editorMode": "code",
"expr": "(avg(rate(network_rpc_latency_sum{test_id=\"$test_id\",type=~\"client_send_recv\"}[30s])/rate(network_rpc_latency_count{test_id=\"$test_id\",type=~\"client_send_recv\",method!=\"sync_validator_addrs\"}[30s])) by (method,submethod,type))",
"expr": "(avg(rate(network_rpc_latency_seconds_sum{test_id=\"$test_id\",type=~\"client_send_recv\"}[30s])/rate(network_rpc_latency_seconds_count{test_id=\"$test_id\",type=~\"client_send_recv\",method!=\"sync_validator_addrs\"}[30s])) by (method,submethod,type))",
"legendFormat": "__auto",
"range": true,
"refId": "A"
Expand Down
6 changes: 3 additions & 3 deletions node/libs/concurrency/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pub(crate) enum Direction {

/// Metrics reported for TCP connections.
#[derive(Debug, Metrics)]
#[metrics(prefix = "concurrency_tcp")]
#[metrics(prefix = "concurrency_net_tcp")]
pub(crate) struct TcpMetrics {
/// Total bytes sent over all TCP connections.
#[metrics(unit = Unit::Bytes)]
Expand All @@ -41,9 +41,9 @@ pub(crate) struct TcpMetrics {
#[metrics(unit = Unit::Bytes)]
pub(crate) received: Counter,
/// TCP connections established since the process started.
pub(crate) established_connections: Family<Direction, Counter>,
pub(crate) established: Family<Direction, Counter>,
/// Number of currently active TCP connections.
pub(crate) active_connections: Family<Direction, Gauge>,
pub(crate) active: Family<Direction, Gauge>,
}

/// TCP metrics instance.
Expand Down
8 changes: 4 additions & 4 deletions node/libs/concurrency/src/net/tcp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ pub type Listener = tokio::net::TcpListener;
/// Accepts an INBOUND listener connection.
pub async fn accept(ctx: &ctx::Ctx, this: &mut Listener) -> ctx::OrCanceled<io::Result<Stream>> {
Ok(ctx.wait(this.accept()).await?.map(|stream| {
metrics::TCP_METRICS.established_connections[&Direction::Inbound].inc();
metrics::TCP_METRICS.established[&Direction::Inbound].inc();

// We are the only owner of the correctly opened
// socket at this point so `set_nodelay` should
// always succeed.
stream.0.set_nodelay(true).unwrap();
Stream {
stream: stream.0,
_active: metrics::TCP_METRICS.active_connections[&Direction::Inbound]
_active: metrics::TCP_METRICS.active[&Direction::Inbound]
.clone()
.into(),
}
Expand All @@ -55,14 +55,14 @@ pub async fn connect(
.wait(tokio::net::TcpStream::connect(addr))
.await?
.map(|stream| {
metrics::TCP_METRICS.established_connections[&Direction::Outbound].inc();
metrics::TCP_METRICS.established[&Direction::Outbound].inc();
// We are the only owner of the correctly opened
// socket at this point so `set_nodelay` should
// always succeed.
stream.set_nodelay(true).unwrap();
Stream {
stream,
_active: metrics::TCP_METRICS.active_connections[&Direction::Outbound]
_active: metrics::TCP_METRICS.active[&Direction::Outbound]
.clone()
.into(),
}
Expand Down

0 comments on commit 41a3f88

Please sign in to comment.