Skip to content

Commit

Permalink
add a monitor test for installer pod timeline
Browse files Browse the repository at this point in the history
  • Loading branch information
tkashem committed Dec 17, 2024
1 parent b7c61a7 commit 16844b1
Show file tree
Hide file tree
Showing 5 changed files with 207 additions and 0 deletions.
11 changes: 11 additions & 0 deletions e2echart/e2e-chart-template.html
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,10 @@ <h5 class="modal-title">Resource</h5>
return (eventInterval.source === "APIUnreachableFromClient")
}

function isInstallerPodActivity(eventInterval) {
return (eventInterval.source === "InstallerPodMonitor")
}

function isEndpointConnectivity(eventInterval) {
if (eventInterval.message.reason !== "DisruptionBegan" && eventInterval.message.reason !== "DisruptionSamplerOutageBegan") {
return false
Expand Down Expand Up @@ -272,6 +276,10 @@ <h5 class="modal-title">Resource</h5>
return [buildLocatorDisplayString(item.locator), "", "APIUnreachableFromClientMetrics"]
}

function isInstallerPodValue(item) {
return [buildLocatorDisplayString(item.locator), "", item.message.reason]
}

function disruptionValue(item) {
// We classify these disruption samples with this message if it thinks
// it looks like a problem in the CI cluster running the tests, not the cluster under test.
Expand Down Expand Up @@ -487,6 +495,9 @@ <h5 class="modal-title">Resource</h5>
timelineGroups.push({group: "api-unreachable", data: []})
createTimelineData(isAPIUnreachableFromClientValue, timelineGroups[timelineGroups.length - 1].data, eventIntervals, isAPIUnreachableFromClientActivity, regex)

timelineGroups.push({group: "installer-pod", data: []})
createTimelineData(isInstallerPodValue, timelineGroups[timelineGroups.length - 1].data, eventIntervals, isInstallerPodActivity, regex)

timelineGroups.push({ group: "etcd-leaders", data: [] })
createTimelineData(etcdLeadershipLogsValue, timelineGroups[timelineGroups.length - 1].data, eventIntervals, isEtcdLeadershipAndNotEmpty, regex)

Expand Down
2 changes: 2 additions & 0 deletions pkg/defaultmonitortests/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/openshift/origin/pkg/monitortests/kubeapiserver/disruptionnewapiserver"
"github.com/openshift/origin/pkg/monitortests/kubeapiserver/faultyloadbalancer"
"github.com/openshift/origin/pkg/monitortests/kubeapiserver/generationanalyzer"
"github.com/openshift/origin/pkg/monitortests/kubeapiserver/installerpod"
"github.com/openshift/origin/pkg/monitortests/kubeapiserver/legacykubeapiservermonitortests"
"github.com/openshift/origin/pkg/monitortests/machines/watchmachines"
"github.com/openshift/origin/pkg/monitortests/monitoring/disruptionmetricsapi"
Expand Down Expand Up @@ -133,6 +134,7 @@ func newDefaultMonitorTests(info monitortestframework.MonitorTestInitializationI
monitorTestRegistry.AddMonitorTestOrDie("metrics-api-availability", "Monitoring", disruptionmetricsapi.NewAvailabilityInvariant())
monitorTestRegistry.AddMonitorTestOrDie(apiunreachablefromclientmetrics.MonitorName, "kube-apiserver", apiunreachablefromclientmetrics.NewMonitorTest())
monitorTestRegistry.AddMonitorTestOrDie(faultyloadbalancer.MonitorName, "kube-apiserver", faultyloadbalancer.NewMonitorTest())
monitorTestRegistry.AddMonitorTestOrDie(installerpod.MonitorName, "kube-apiserver", installerpod.NewInstallerPodMonitorTest())

return monitorTestRegistry
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/monitor/monitorapi/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,8 @@ const (
SourceMachine IntervalSource = "MachineMonitor"

SourceGenerationMonitor IntervalSource = "GenerationMonitor"

SourceInstallerPodMonitor IntervalSource = "InstallerPodMonitor"
)

type Interval struct {
Expand Down
181 changes: 181 additions & 0 deletions pkg/monitortests/kubeapiserver/installerpod/monitortest.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
package installerpod

import (
"context"
"fmt"
"strings"
"time"

"github.com/openshift/origin/pkg/monitor/monitorapi"
"github.com/openshift/origin/pkg/monitortestframework"
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"

"k8s.io/client-go/rest"
"k8s.io/kubernetes/test/e2e/framework"
)

const (
MonitorName = "installer-pod-monitor"
)

func NewInstallerPodMonitorTest() monitortestframework.MonitorTest {
return &monitorTest{
monitor: &installerPodMonitor{
pods: map[string]*podInfo{},
},
filter: func(interval monitorapi.Interval) bool {
if ns, ok := interval.Locator.Keys[monitorapi.LocatorNamespaceKey]; !ok || ns != "openshift-etcd" {
return false
}

switch interval.Message.Reason {
case "Created", "Started", "Killing", "StaticPodInstallerCompleted":
return true
default:
return false
}
},
}
}

type monitorTest struct {
monitor *installerPodMonitor
filter func(interval monitorapi.Interval) bool
}

func (mt *monitorTest) StartCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error {
return nil
}

func (mt *monitorTest) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) {
return nil, nil, nil
}

func (mt *monitorTest) ConstructComputedIntervals(ctx context.Context, startingIntervals monitorapi.Intervals, recordedResources monitorapi.ResourcesMap, beginning, end time.Time) (monitorapi.Intervals, error) {
computed := mt.monitor.process(startingIntervals, mt.filter)
return computed, nil
}

func (*monitorTest) EvaluateTestsFromConstructedIntervals(ctx context.Context, finalIntervals monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) {
return nil, nil
}

func (*monitorTest) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error {
return nil
}

func (*monitorTest) Cleanup(ctx context.Context) error {
// TODO wire up the start to a context we can kill here
return nil
}

type podInfo struct {
node string
namespace string
reason string
startedAt, endedAt time.Time
}

type installerPodMonitor struct {
pods map[string]*podInfo
}

func (m *installerPodMonitor) process(intervals monitorapi.Intervals, filter func(interval monitorapi.Interval) bool) monitorapi.Intervals {
interested := make(monitorapi.Intervals, 0)
for _, interval := range intervals {
if filter(interval) {
interested = append(interested, interval)
}
}

framework.Logf("monitor[%s]: processing %d events", len(interested))
for _, interval := range interested {
m.processOne(interval)
}

computed := monitorapi.Intervals{}
for podName, info := range m.pods {
level := monitorapi.Info
endedAt := info.endedAt
if endedAt.IsZero() {
endedAt = info.startedAt
level = monitorapi.Error
}
if info.reason == "Killing" {
level = monitorapi.Error
}
computed = append(computed,
monitorapi.NewInterval(monitorapi.SourceInstallerPodMonitor, level).
Locator(monitorapi.NewLocator().NodeFromName(info.node)).
Message(monitorapi.NewMessage().
HumanMessage(fmt.Sprintf("%s", podName)).
Reason(monitorapi.IntervalReason(info.reason)),
).
Display().
Build(info.startedAt, endedAt),
)
}

return computed
}

func (m *installerPodMonitor) processOne(interval monitorapi.Interval) {
hostname := host(interval)
if len(hostname) == 0 {
framework.Logf("monitor[%s]: no host name for interval: %+v", MonitorName, interval)
return
}

name := interval.Locator.Keys[monitorapi.LocatorKey("pod")]
switch interval.Message.Reason {
case "Started":
if _, ok := m.pods[name]; ok {
framework.Logf("monitor[%s]: unexpected, seeing Started twice for the same event: %+v", MonitorName, interval)
return
}
m.pods[name] = &podInfo{
node: hostname,
namespace: interval.Locator.Keys[monitorapi.LocatorNamespaceKey],
startedAt: interval.From,
}
// TODO: are any other installer pods active on a different node?

case "Killing", "StaticPodInstallerCompleted":
info, ok := m.pods[name]
if !ok {
framework.Logf("monitor[%s]: unexpected, not seen Started before - event: %+v", MonitorName, interval)
return
}
info.reason = string(interval.Message.Reason)
info.endedAt = interval.From
}
}

func host(interval monitorapi.Interval) string {
// kubelet events
if host, ok := interval.Locator.Keys[monitorapi.LocatorNodeKey]; ok && len(host) > 0 {
return host
}

// StaticPodInstallerCompleted is reported by the installer pod, and it
// does not contain any source information
name := interval.Locator.Keys[monitorapi.LocatorKey("pod")]
if len(name) == 0 {
return ""
}

// installer pod name has the following format:
// - installer-5-retry-1-ci-op-cn7ykf7p-b9a0c-bxxcm-master-2
// - installer-7-ci-op-cn7ykf7p-b9a0c-bxxcm-master-2
_, after, found := strings.Cut(name, "-retry-")
if found {
if split := strings.SplitN(after, "-", 2); len(split) == 2 {
return split[1]
}
return ""
}
if split := strings.SplitN(after, "-", 3); len(split) == 3 {
return split[2]
}
return ""
}
11 changes: 11 additions & 0 deletions test/extended/testdata/bindata.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 16844b1

Please sign in to comment.