Skip to content

Commit

Permalink
Add support for exposing linux kernel TLS metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
defect committed Mar 7, 2024
1 parent e82d5cf commit 8be3641
Show file tree
Hide file tree
Showing 4 changed files with 180 additions and 0 deletions.
37 changes: 37 additions & 0 deletions collector/fixtures/e2e-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1340,6 +1340,42 @@ node_ksmd_run 1
# HELP node_ksmd_sleep_seconds ksmd 'sleep_millisecs' file.
# TYPE node_ksmd_sleep_seconds gauge
node_ksmd_sleep_seconds 0.02
# HELP node_ktls_tls_curr_rx_device number of RX sessions currently installed where NIC handles cryptography
# TYPE node_ktls_tls_curr_rx_device gauge
node_ktls_tls_curr_rx_device 0
# HELP node_ktls_tls_curr_rx_sw number of RX sessions currently installed where host handles cryptography
# TYPE node_ktls_tls_curr_rx_sw gauge
node_ktls_tls_curr_rx_sw 5
# HELP node_ktls_tls_curr_tx_device number of TX sessions currently installed where NIC handles cryptography
# TYPE node_ktls_tls_curr_tx_device gauge
node_ktls_tls_curr_tx_device 0
# HELP node_ktls_tls_curr_tx_sw number of TX sessions currently installed where host handles cryptography
# TYPE node_ktls_tls_curr_tx_sw gauge
node_ktls_tls_curr_tx_sw 5
# HELP node_ktls_tls_decrypt_error_total record decryption failed (e.g. due to incorrect authentication tag)
# TYPE node_ktls_tls_decrypt_error_total counter
node_ktls_tls_decrypt_error_total 0
# HELP node_ktls_tls_decrypt_retry_total number of RX records which had to be re-decrypted due to TLS_RX_EXPECT_NO_PAD mis-prediction
# TYPE node_ktls_tls_decrypt_retry_total counter
node_ktls_tls_decrypt_retry_total 0
# HELP node_ktls_tls_no_pad_violation_total number of data RX records which had to be re-decrypted due to TLS_RX_EXPECT_NO_PAD mis-prediction
# TYPE node_ktls_tls_no_pad_violation_total counter
node_ktls_tls_no_pad_violation_total 0
# HELP node_ktls_tls_rx_device_resync_total number of RX resyncs sent to NICs handling cryptography
# TYPE node_ktls_tls_rx_device_resync_total counter
node_ktls_tls_rx_device_resync_total 0
# HELP node_ktls_tls_rx_device_total number of RX sessions opened with NIC cryptograph
# TYPE node_ktls_tls_rx_device_total counter
node_ktls_tls_rx_device_total 0
# HELP node_ktls_tls_rx_sw_total number of RX sessions opened with host cryptography
# TYPE node_ktls_tls_rx_sw_total counter
node_ktls_tls_rx_sw_total 178
# HELP node_ktls_tls_tx_device_total number of TX sessions opened with NIC cryptograph
# TYPE node_ktls_tls_tx_device_total counter
node_ktls_tls_tx_device_total 0
# HELP node_ktls_tls_tx_sw_total number of TX sessions opened with host cryptography
# TYPE node_ktls_tls_tx_sw_total counter
node_ktls_tls_tx_sw_total 161
# HELP node_lnstat_allocs_total linux network cache stats
# TYPE node_lnstat_allocs_total counter
node_lnstat_allocs_total{cpu="0",subsystem="arp_cache"} 1
Expand Down Expand Up @@ -2936,6 +2972,7 @@ node_scrape_collector_success{collector="infiniband"} 1
node_scrape_collector_success{collector="interrupts"} 1
node_scrape_collector_success{collector="ipvs"} 1
node_scrape_collector_success{collector="ksmd"} 1
node_scrape_collector_success{collector="ktls"} 1
node_scrape_collector_success{collector="lnstat"} 1
node_scrape_collector_success{collector="loadavg"} 1
node_scrape_collector_success{collector="mdadm"} 1
Expand Down
12 changes: 12 additions & 0 deletions collector/fixtures/proc/net/tls_stat
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
TlsCurrTxSw 5
TlsCurrRxSw 5
TlsCurrTxDevice 0
TlsCurrRxDevice 0
TlsTxSw 161
TlsRxSw 178
TlsTxDevice 0
TlsRxDevice 0
TlsDecryptError 0
TlsRxDeviceResync 0
TlsDecryptRetry 0
TlsRxNoPadViolation 0
130 changes: 130 additions & 0 deletions collector/ktls_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Copyright 2024 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !noktls
// +build !noktls

package collector

import (
"fmt"

"github.com/go-kit/log"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/procfs"
)

type ktlsCollector struct {
fs procfs.FS
logger log.Logger
}

func init() {
registerCollector("ktls", defaultDisabled, NewKTLSCollector)
}

// NewKTLSCollector returns a new Collector exposing kTLS stats.
func NewKTLSCollector(logger log.Logger) (Collector, error) {
fs, err := procfs.NewFS(*procPath)
if err != nil {
return nil, fmt.Errorf("failed to open procfs: %w", err)
}

return &ktlsCollector{
fs: fs,
logger: logger,
}, nil
}

func (c *ktlsCollector) Update(ch chan<- prometheus.Metric) error {
stat, err := c.fs.NewTLSStat()
if err != nil {
return err
}

ktlsCurrTxSwDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_curr_tx_sw"),
"number of TX sessions currently installed where host handles cryptography",
nil, nil,
)
ktlsCurrRxSwDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_curr_rx_sw"),
"number of RX sessions currently installed where host handles cryptography",
nil, nil,
)
ktlsCurrTxDeviceDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_curr_tx_device"),
"number of TX sessions currently installed where NIC handles cryptography",
nil, nil,
)
ktlsCurrRxDeviceDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_curr_rx_device"),
"number of RX sessions currently installed where NIC handles cryptography",
nil, nil,
)
ktlsTxDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_tx_sw_total"),
"number of TX sessions opened with host cryptography",
nil, nil,
)
ktlsRxDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_rx_sw_total"),
"number of RX sessions opened with host cryptography",
nil, nil,
)
ktlsTxDeviceDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_tx_device_total"),
"number of TX sessions opened with NIC cryptograph",
nil, nil,
)
ktlsRxDeviceDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_rx_device_total"),
"number of RX sessions opened with NIC cryptograph",
nil, nil,
)
ktlsDecryptErrorDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_decrypt_error_total"),
"record decryption failed (e.g. due to incorrect authentication tag)",
nil, nil,
)
ktlsRxDeviceResyncDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_rx_device_resync_total"),
"number of RX resyncs sent to NICs handling cryptography",
nil, nil,
)
ktlsDecryptRetryDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_decrypt_retry_total"),
"number of RX records which had to be re-decrypted due to TLS_RX_EXPECT_NO_PAD mis-prediction",
nil, nil,
)
ktlsRxNoPadViolationDesc := prometheus.NewDesc(
prometheus.BuildFQName(namespace, "ktls", "tls_no_pad_violation_total"),
"number of data RX records which had to be re-decrypted due to TLS_RX_EXPECT_NO_PAD mis-prediction",
nil, nil,
)

ch <- prometheus.MustNewConstMetric(ktlsCurrTxSwDesc, prometheus.GaugeValue, float64(stat.TLSCurrTxSw))
ch <- prometheus.MustNewConstMetric(ktlsCurrRxSwDesc, prometheus.GaugeValue, float64(stat.TLSCurrTxSw))
ch <- prometheus.MustNewConstMetric(ktlsCurrTxDeviceDesc, prometheus.GaugeValue, float64(stat.TLSCurrTxDevice))
ch <- prometheus.MustNewConstMetric(ktlsCurrRxDeviceDesc, prometheus.GaugeValue, float64(stat.TLSCurrRxDevice))
ch <- prometheus.MustNewConstMetric(ktlsTxDesc, prometheus.CounterValue, float64(stat.TLSTxSw))
ch <- prometheus.MustNewConstMetric(ktlsRxDesc, prometheus.CounterValue, float64(stat.TLSRxSw))
ch <- prometheus.MustNewConstMetric(ktlsTxDeviceDesc, prometheus.CounterValue, float64(stat.TLSTxDevice))
ch <- prometheus.MustNewConstMetric(ktlsRxDeviceDesc, prometheus.CounterValue, float64(stat.TLSRxDevice))
ch <- prometheus.MustNewConstMetric(ktlsDecryptErrorDesc, prometheus.CounterValue, float64(stat.TLSDecryptError))
ch <- prometheus.MustNewConstMetric(ktlsRxDeviceResyncDesc, prometheus.CounterValue, float64(stat.TLSRxDeviceResync))
ch <- prometheus.MustNewConstMetric(ktlsDecryptRetryDesc, prometheus.CounterValue, float64(stat.TLSDecryptRetry))
ch <- prometheus.MustNewConstMetric(ktlsRxNoPadViolationDesc, prometheus.CounterValue, float64(stat.TLSRxNoPadViolation))

return err
}
1 change: 1 addition & 0 deletions end-to-end-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ enabled_collectors=$(cat << COLLECTORS
interrupts
ipvs
ksmd
ktls
lnstat
loadavg
mdadm
Expand Down

0 comments on commit 8be3641

Please sign in to comment.