Skip to content

Commit

Permalink
Merge pull request #1443 from cybozu/bmc-log-collector4
Browse files Browse the repository at this point in the history
Bumpup bmc-log-collector  0.9.1  fix bugs
  • Loading branch information
takara9 authored Oct 17, 2024
2 parents 83a1f8e + 4d0a263 commit 604d668
Show file tree
Hide file tree
Showing 12 changed files with 156 additions and 87 deletions.
2 changes: 1 addition & 1 deletion bmc-log-collector/TAG
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.9.0
0.9.1
76 changes: 40 additions & 36 deletions bmc-log-collector/log-collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,6 @@ type selCollector struct {
}

func (c *selCollector) collectSystemEventLog(ctx context.Context, m Machine, logWriter bmcLogWriter) {
const layout = "2006-01-02T15:04:05Z07:00"
var createUnixtime int64
var lastId int
filePath := path.Join(c.ptrDir, m.Serial)

err := checkAndCreatePointerFile(filePath)
Expand Down Expand Up @@ -90,66 +87,73 @@ func (c *selCollector) collectSystemEventLog(ctx context.Context, m Machine, log

err = updateLastPointer(lastPtr, filePath)
if err != nil {
slog.Error("failed to write a pointer file.", "err", err, "serial", m.Serial, "createUnixtime", createUnixtime, "LastReadId", lastId, "ptrDir", c.ptrDir)
slog.Error("failed to write a pointer file.", "err", err, "serial", m.Serial, "ptrDir", c.ptrDir)
}
return
}

// Increment the success counter
counterRequestSuccess.WithLabelValues(m.Serial).Inc()

var members RedfishJsonSchema
if err := json.Unmarshal(byteJSON, &members); err != nil {
var response RedfishJsonSchema
if err := json.Unmarshal(byteJSON, &response); err != nil {
slog.Error("failed to translate JSON to go struct.", "err", err, "serial", m.Serial, "ptrDir", c.ptrDir)
return
}

for i := len(members.Sel) - 1; i >= 0; i-- {
t, _ := time.Parse(layout, members.Sel[i].Create)
createUnixtime = t.Unix()
lastId, _ = strconv.Atoi(members.Sel[i].Id)
members.Sel[i].Serial = m.Serial
members.Sel[i].BmcIP = m.BmcIP
members.Sel[i].NodeIP = m.NodeIP

if lastPtr.LastReadId < lastId {
// Normal case without iDRAC log reset.
bmcByteJsonLog, err := json.Marshal(members.Sel[i])
createTime, err := time.Parse(time.RFC3339, response.Sel[len(response.Sel)-1].Create)
if err != nil {
slog.Error("failed to parse for time", "err", err, "serial", m.Serial)
return
}
firstCreateTime := createTime.Unix()

for i := len(response.Sel) - 1; i >= 0; i-- {
currentId, err := strconv.Atoi(response.Sel[i].Id)
if err != nil {
slog.Error("failed to strconv", "err", err, "serial", m.Serial, "LastReadId", currentId, "ptrDir", c.ptrDir)
continue
}
// Add the information to identify of the node
response.Sel[i].Serial = m.Serial
response.Sel[i].BmcIP = m.BmcIP
response.Sel[i].NodeIP = m.NodeIP

if lastPtr.LastReadId < currentId {
bmcByteJsonLog, err := json.Marshal(response.Sel[i])
if err != nil {
slog.Error("failed to marshal the system event log", "err", err, "serial", m.Serial, "createUnixtime", createUnixtime, "LastReadId", lastId, "ptrDir", c.ptrDir)
slog.Error("failed to marshal the system event log", "err", err, "serial", m.Serial, "lastPtr.LastReadId", lastPtr.LastReadId, "currentLastReadId", currentId, "ptrDir", c.ptrDir)
}

err = logWriter.write(string(bmcByteJsonLog), m.Serial)
if err != nil {
slog.Error("failed to output log", "err", err, "serial", m.Serial, "createUnixtime", createUnixtime, "LastReadId", lastId, "ptrDir", c.ptrDir)
slog.Error("failed to output log", "err", err, "serial", m.Serial, "bmcByteJsonLog", string(bmcByteJsonLog), "currentLastReadId", currentId, "ptrDir", c.ptrDir)
}
lastPtr.LastReadId = lastId
lastPtr.LastReadTime = createUnixtime

lastPtr.LastReadId = currentId
lastPtr.LastError = nil
} else {
// If the log is reset in iDRAC, the ID starts from 1.
// In that case, determine if the log has already been
// issued based on the time it was generated.
if lastPtr.LastReadTime < createUnixtime {
bmcByteJsonLog, err := json.Marshal(members.Sel[i])
// In that case, determine if generated time been changed to identify log reseted.
if lastPtr.FirstCreateTime != firstCreateTime {
bmcByteJsonLog, err := json.Marshal(response.Sel[i])
if err != nil {
slog.Error("failed to convert JSON", "err", err, "serial", m.Serial, "createUnixtime", createUnixtime, "LastReadId", lastId, "ptrDir", c.ptrDir)
slog.Error("failed to convert JSON", "err", err, "serial", m.Serial, "i", i, "Event", response.Sel[i], "currentLastReadId", currentId)
}

err = logWriter.write(string(bmcByteJsonLog), m.Serial)
if err != nil {
slog.Error("failed to output log", "err", err, "serial", m.Serial, "createUnixtime", createUnixtime, "LastReadId", lastId, "ptrDir", c.ptrDir)
slog.Error("failed to output log", "err", err, "serial", m.Serial, "bmcByteJsonLog", string(bmcByteJsonLog), "currentLastReadId", currentId)
}
lastPtr.LastReadId = lastId
lastPtr.LastReadTime = createUnixtime
lastPtr.LastReadId = currentId
lastPtr.LastError = nil
}
}
}

err = updateLastPointer(LastPointer{
LastReadTime: createUnixtime,
LastReadId: lastId,
LastError: nil,
}, filePath)
lastPtr.FirstCreateTime = firstCreateTime
err = updateLastPointer(lastPtr, filePath)
if err != nil {
slog.Error("failed to write a pointer file.", "err", err, "serial", m.Serial, "createUnixtime", createUnixtime, "LastReadId", lastId, "ptrDir", c.ptrDir)
slog.Error("failed to write a pointer file.", "err", err, "serial", m.Serial, "firstCreateTime", firstCreateTime, "filePath", filePath)
return
}
}
2 changes: 1 addition & 1 deletion bmc-log-collector/log-pointer.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ import (
)

type LastPointer struct {
LastReadTime int64
LastReadId int
LastError error
LastHttpStatusCode int
FirstCreateTime int64
}

func checkAndCreatePointerFile(filePath string) error {
Expand Down
47 changes: 4 additions & 43 deletions bmc-log-collector/log-pointer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,14 @@ var _ = Describe("Get Machines List", Ordered, func() {
// Create pointer file for delete test
fd, _ := os.Create(path.Join(testPointerDir, serialForDelete))
lptr := LastPointer{
LastReadTime: 0,
LastReadId: 0,
LastReadId: 0,
FirstCreateTime: 0,
}
byteJSON, _ := json.Marshal(lptr)
_, err = fd.WriteString(string(byteJSON))
Expect(err).NotTo(HaveOccurred())
fd.Close()

//file, _ = os.Create(path.Join(testPointerDir, "_"+serialForDelete))
//_, err = file.WriteString(string(byteJSON))
//Expect(err).NotTo(HaveOccurred())
//file.Close()

// Create machines list for delete test
m0 := Machine{
Serial: "ABCDEF",
Expand All @@ -50,7 +45,6 @@ var _ = Describe("Get Machines List", Ordered, func() {
})

Context("create the pointer file", func() {
//var filePath string
filePath := path.Join(testPointerDir, serialNormal)

It("check and create pointer file", func() {
Expand All @@ -60,12 +54,11 @@ var _ = Describe("Get Machines List", Ordered, func() {
It("read ptr file", func() {
ptr, err = readLastPointer(filePath)
Expect(err).NotTo(HaveOccurred())
Expect(ptr.LastReadTime).To(Equal(int64(0)))
Expect(ptr.LastReadId).To(Equal(0))
Expect(ptr.FirstCreateTime).To(Equal(int64(0)))
GinkgoWriter.Println(ptr)
})
It("update ptr", func() {
ptr.LastReadTime = 1
ptr.LastReadId = 1
err := updateLastPointer(ptr, filePath)
Expect(err).NotTo(HaveOccurred())
Expand All @@ -82,49 +75,17 @@ var _ = Describe("Get Machines List", Ordered, func() {
It("read ptr file", func() {
ptr, err = readLastPointer(filePath)
Expect(err).NotTo(HaveOccurred())
Expect(ptr.LastReadTime).To(Equal(int64(1)))
Expect(ptr.LastReadId).To(Equal(1))
Expect(ptr.FirstCreateTime).To(Equal(int64(0)))
GinkgoWriter.Println(ptr)
})
It("update ptr", func() {
ptr.LastReadTime = 2
ptr.LastReadId = 2
err := updateLastPointer(ptr, filePath)
Expect(err).NotTo(HaveOccurred())
})
})

/*
Context("recover from the pointer file lost", func() {
filePath := path.Join(testPointerDir, serialNormal)
It("Remove the primary pointer file", func() {
err := os.Remove(filePath)
Expect(err).NotTo(HaveOccurred())
})
It("check and create pointer file", func() {
filePath = path.Join(testPointerDir, serialNormal)
err := checkAndCreatePointerFile(filePath)
Expect(err).NotTo(HaveOccurred())
})
It("read ptr file", func() {
ptr, err = readLastPointer(filePath)
Expect(err).NotTo(HaveOccurred())
Expect(ptr.LastReadTime).To(Equal(int64(2)))
Expect(ptr.LastReadId).To(Equal(2))
GinkgoWriter.Println(ptr)
})
It("update ptr", func() {
ptr.LastReadTime = 3
ptr.LastReadId = 3
err := updateLastPointer(ptr, filePath)
Expect(err).NotTo(HaveOccurred())
})
})
*/

Context("delete retired server ptr file", func() {
It("do delete", func() {
fmt.Println("ML=", ml)
Expand Down
107 changes: 103 additions & 4 deletions bmc-log-collector/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,10 @@ import (
var _ = Describe("Collecting iDRAC Logs", Ordered, func() {
var testOutputDir = "testdata/output_main_test"
var testPointerDir = "testdata/pointers_main_test"
var serial1 = "683FPQ3"
var serial2 = "HN3CLP3"
var serial3 = "J7N6MW3"
var serial1 = "683FPQ3" // basic test case1
var serial2 = "HN3CLP3" // basic test case2
var serial3 = "J7N6MW3" // the log reset test case
var serial4 = "483FPQ3" // the "OEM software events" with random timestamp test case

BeforeAll(func(ctx SpecContext) {
GinkgoWriter.Println("start BMC stub servers")
Expand All @@ -30,6 +31,8 @@ var _ = Describe("Collecting iDRAC Logs", Ordered, func() {
os.Remove(path.Join(testPointerDir, serial2))
os.Remove(path.Join(testOutputDir, serial3))
os.Remove(path.Join(testPointerDir, serial3))
os.Remove(path.Join(testOutputDir, serial4))
os.Remove(path.Join(testPointerDir, serial4))
err := os.MkdirAll(testPointerDir, 0750)
Expect(err).ToNot(HaveOccurred())
err = os.MkdirAll(testOutputDir, 0750)
Expand Down Expand Up @@ -68,6 +71,17 @@ var _ = Describe("Collecting iDRAC Logs", Ordered, func() {
}
bm3.startMock()

bm4 := bmcMock{
host: "127.0.0.1:7480",
resDir: "testdata/redfish_response",
files: []string{"483FPQ3-1.json", "483FPQ3-2.json", "483FPQ3-3.json", "483FPQ3-4.json", "483FPQ3-5.json"},
accessCounter: make(map[string]int),
responseFiles: make(map[string][]string),
responseDir: make(map[string]string),
isInitmap: true,
}
bm4.startMock()

// Wait starting stub servers
By("Test stub web access" + bm1.host)
Eventually(func(ctx SpecContext) error {
Expand All @@ -92,6 +106,14 @@ var _ = Describe("Collecting iDRAC Logs", Ordered, func() {
_, err := client.Do(req)
return err
}).WithContext(ctx).Should(Succeed())

By("Test stub web access" + bm4.host)
Eventually(func(ctx SpecContext) error {
req, _ := http.NewRequest("GET", "http://"+bm4.host+"/", nil)
client := &http.Client{Timeout: time.Duration(3) * time.Second}
_, err := client.Do(req)
return err
}).WithContext(ctx).Should(Succeed())
}, NodeTimeout(10*time.Second))

Context("stub of main equivalent", func() {
Expand Down Expand Up @@ -130,7 +152,6 @@ var _ = Describe("Collecting iDRAC Logs", Ordered, func() {
It("1st reply", func(ctx SpecContext) {
file, err = OpenTestResultLog(path.Join(testOutputDir, serial))
Expect(err).ToNot(HaveOccurred())

reader = bufio.NewReaderSize(file, 4096)
stringJSON, err := ReadingTestResultLogNext(reader)
Expect(err).ToNot(HaveOccurred())
Expand Down Expand Up @@ -318,4 +339,82 @@ var _ = Describe("Collecting iDRAC Logs", Ordered, func() {
Expect(rslt.Id).To(Equal("3"))
}, SpecTimeout(30*time.Second))
})

Context("verify 483FPQ3", func() {
var serial string = "483FPQ3"
var file *os.File
var reader *bufio.Reader
var err error

It("check 1st log record", func(ctx SpecContext) {
file, err = OpenTestResultLog(path.Join(testOutputDir, serial))
Expect(err).ToNot(HaveOccurred())
reader = bufio.NewReaderSize(file, 4096)

stringJSON, err := ReadingTestResultLogNext(reader)
Expect(err).ToNot(HaveOccurred())
GinkgoWriter.Println("**** Received stringJSON=", stringJSON)

var rslt SystemEventLog
err = json.Unmarshal([]byte(stringJSON), &rslt)
Expect(err).ToNot(HaveOccurred())

GinkgoWriter.Println("------ ", string(rslt.Serial))
GinkgoWriter.Println("------ ", string(rslt.Id))
Expect(rslt.Serial).To(Equal(serial))
Expect(rslt.Id).To(Equal("1"))
}, SpecTimeout(30*time.Second))

It("check 2nd log record", func(ctx SpecContext) {
stringJSON, err := ReadingTestResultLogNext(reader)
Expect(err).ToNot(HaveOccurred())
GinkgoWriter.Println("**** Received stringJSON=", stringJSON)

var rslt SystemEventLog
err = json.Unmarshal([]byte(stringJSON), &rslt)
Expect(err).ToNot(HaveOccurred())

GinkgoWriter.Println("------ ", string(rslt.Serial))
GinkgoWriter.Println("------ ", string(rslt.Id))
Expect(rslt.Serial).To(Equal(serial))
Expect(rslt.Id).To(Equal("2"))
}, SpecTimeout(30*time.Second))

It("check 3rd log record with OEM software event", func(ctx SpecContext) {
stringJSON, err := ReadingTestResultLogNext(reader)
Expect(err).ToNot(HaveOccurred())
GinkgoWriter.Println("**** Received stringJSON=", stringJSON)

var rslt SystemEventLog
err = json.Unmarshal([]byte(stringJSON), &rslt)
Expect(err).ToNot(HaveOccurred())

GinkgoWriter.Println("------ ", string(rslt.Serial))
GinkgoWriter.Println("------ ", string(rslt.Id))
Expect(rslt.Serial).To(Equal(serial))
Expect(rslt.Id).To(Equal("3"))
}, SpecTimeout(30*time.Second))

It("check 4th log record with OEM software event", func(ctx SpecContext) {
stringJSON, err := ReadingTestResultLogNext(reader)
Expect(err).ToNot(HaveOccurred())
GinkgoWriter.Println("**** Received stringJSON=", stringJSON)

var rslt SystemEventLog
err = json.Unmarshal([]byte(stringJSON), &rslt)
Expect(err).ToNot(HaveOccurred())

GinkgoWriter.Println("------ ", string(rslt.Serial))
GinkgoWriter.Println("------ ", string(rslt.Id))
Expect(rslt.Serial).To(Equal(serial))
Expect(rslt.Id).To(Equal("4"))
}, SpecTimeout(30*time.Second))

It("check 5th log record that is null", func(ctx SpecContext) {
stringJSON, err := ReadingTestResultLogNext(reader)
Expect(err).To(HaveOccurred())
GinkgoWriter.Println("**** Received stringJSON=", stringJSON)
Expect(len(stringJSON)).To(Equal(0))
}, SpecTimeout(30*time.Second))
})
})
2 changes: 1 addition & 1 deletion bmc-log-collector/test_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ func OpenTestResultLog(fn string) (*os.File, error) {
func ReadingTestResultLogNext(b *bufio.Reader) (string, error) {
var stringJSON string
var err error
for {
for i := 0; i < 20; i++ {
stringJSON, err = b.ReadString('\n')
if err == io.EOF {
time.Sleep(1 * time.Second)
Expand Down
2 changes: 1 addition & 1 deletion bmc-log-collector/testdata/configmap/serverlist2.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[{"serial":"683FPQ3","bmc_ipv4":"127.0.0.1:7180","node_ipv4":"10.69.0.6"},{"serial":"HN3CLP3","bmc_ipv4":"127.0.0.1:7280","node_ipv4":"10.69.0.7"},{"serial":"J7N6MW3","bmc_ipv4":"127.0.0.1:7380","node_ipv4":"10.69.0.8"}]
[{"serial":"683FPQ3","bmc_ipv4":"127.0.0.1:7180","node_ipv4":"10.69.0.6"},{"serial":"HN3CLP3","bmc_ipv4":"127.0.0.1:7280","node_ipv4":"10.69.0.7"},{"serial":"J7N6MW3","bmc_ipv4":"127.0.0.1:7380","node_ipv4":"10.69.0.8"},{"serial":"483FPQ3","bmc_ipv4":"127.0.0.1:7480","node_ipv4":"10.69.0.9"}]
1 change: 1 addition & 0 deletions bmc-log-collector/testdata/redfish_response/483FPQ3-1.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"@odata.context":"/redfish/v1/$metadata#LogEntryCollection.LogEntryCollection","@odata.id":"/redfish/v1/Managers/iDRAC.Embedded.1/LogServices/Sel/Entries","@odata.type":"#LogEntryCollection.LogEntryCollection","Description":"System Event Logs for this manager","Members":[{"@odata.id":"/redfish/v1/Managers/iDRAC.Embedded.1/LogServices/Sel/Entries/4","@odata.type":"#LogEntry.v1_15_0.LogEntry","Created":"2030-10-26T05:14:08-05:00","Description":"Log Entry 4","EntryCode":"Assert","EntryType":"SEL","GeneratorId":"0x0065","Id":"4","Message":"OEM software event.","MessageArgs":[],"[email protected]":0,"MessageId":"61736800","Name":"Log Entry 4","SensorNumber":114,"SensorType":null,"Severity":"OK"},{"@odata.id":"/redfish/v1/Managers/iDRAC.Embedded.1/LogServices/Sel/Entries/3","@odata.type":"#LogEntry.v1_15_0.LogEntry","Created":"2034-07-27T02:23:28-05:00","Description":"Log Entry 3","EntryCode":"Assert","EntryType":"SEL","GeneratorId":"0x0073","Id":"3","Message":"OEM software event.","MessageArgs":[],"[email protected]":0,"MessageId":"72696767","Name":"Log Entry 3","SensorNumber":116,"SensorType":"OS Stop/Shutdown","Severity":"OK"},{"@odata.id":"/redfish/v1/Managers/iDRAC.Embedded.1/LogServices/Sel/Entries/2","@odata.type":"#LogEntry.v1_15_0.LogEntry","Created":"2024-01-11T23:11:56-06:00","Description":"Log Entry 2","EntryCode":"Assert","EntryType":"SEL","GeneratorId":"0x0041","Id":"2","Message":"The system stopped responding because of an exception while running the operating system.","MessageArgs":[],"[email protected]":0,"MessageId":"6fa17973","Name":"Log Entry 2","SensorNumber":115,"SensorType":"OS Stop/Shutdown","Severity":"OK"},{"@odata.id":"/redfish/v1/Managers/iDRAC.Embedded.1/LogServices/Sel/Entries/1","@odata.type":"#LogEntry.v1_15_0.LogEntry","Created":"2022-08-31T10:06:09-05:00","Description":"Log Entry 1","EntryCode":"Assert","EntryType":"SEL","GeneratorId":"0x0020","Id":"1","Message":"Log cleared.","MessageArgs":[],"[email protected]":0,"MessageId":"6f02ffff","Name":"Log Entry 1","SensorNumber":114,"SensorType":"Event Logging Disabled","Severity":"OK"}],"[email protected]":4,"Name":"Log Entry Collection"}
1 change: 1 addition & 0 deletions bmc-log-collector/testdata/redfish_response/483FPQ3-2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"@odata.context":"/redfish/v1/$metadata#LogEntryCollection.LogEntryCollection","@odata.id":"/redfish/v1/Managers/iDRAC.Embedded.1/LogServices/Sel/Entries","@odata.type":"#LogEntryCollection.LogEntryCollection","Description":"System Event Logs for this manager","Members":[{"@odata.id":"/redfish/v1/Managers/iDRAC.Embedded.1/LogServices/Sel/Entries/4","@odata.type":"#LogEntry.v1_15_0.LogEntry","Created":"2030-10-26T05:14:08-05:00","Description":"Log Entry 4","EntryCode":"Assert","EntryType":"SEL","GeneratorId":"0x0065","Id":"4","Message":"OEM software event.","MessageArgs":[],"[email protected]":0,"MessageId":"61736800","Name":"Log Entry 4","SensorNumber":114,"SensorType":null,"Severity":"OK"},{"@odata.id":"/redfish/v1/Managers/iDRAC.Embedded.1/LogServices/Sel/Entries/3","@odata.type":"#LogEntry.v1_15_0.LogEntry","Created":"2034-07-27T02:23:28-05:00","Description":"Log Entry 3","EntryCode":"Assert","EntryType":"SEL","GeneratorId":"0x0073","Id":"3","Message":"OEM software event.","MessageArgs":[],"[email protected]":0,"MessageId":"72696767","Name":"Log Entry 3","SensorNumber":116,"SensorType":"OS Stop/Shutdown","Severity":"OK"},{"@odata.id":"/redfish/v1/Managers/iDRAC.Embedded.1/LogServices/Sel/Entries/2","@odata.type":"#LogEntry.v1_15_0.LogEntry","Created":"2024-01-11T23:11:56-06:00","Description":"Log Entry 2","EntryCode":"Assert","EntryType":"SEL","GeneratorId":"0x0041","Id":"2","Message":"The system stopped responding because of an exception while running the operating system.","MessageArgs":[],"[email protected]":0,"MessageId":"6fa17973","Name":"Log Entry 2","SensorNumber":115,"SensorType":"OS Stop/Shutdown","Severity":"OK"},{"@odata.id":"/redfish/v1/Managers/iDRAC.Embedded.1/LogServices/Sel/Entries/1","@odata.type":"#LogEntry.v1_15_0.LogEntry","Created":"2022-08-31T10:06:09-05:00","Description":"Log Entry 1","EntryCode":"Assert","EntryType":"SEL","GeneratorId":"0x0020","Id":"1","Message":"Log cleared.","MessageArgs":[],"[email protected]":0,"MessageId":"6f02ffff","Name":"Log Entry 1","SensorNumber":114,"SensorType":"Event Logging Disabled","Severity":"OK"}],"[email protected]":4,"Name":"Log Entry Collection"}
Loading

0 comments on commit 604d668

Please sign in to comment.