From 82b68bc395fb248d375c85518dc1e9c6ae3cbf7e Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 30 Mar 2016 07:19:17 +0000 Subject: [PATCH 01/25] dasm_x86: Added RDTSCP instruction to Lua-mode dynasm --- src/dasm_x86.lua | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dasm_x86.lua b/src/dasm_x86.lua index 0c11f020ec..ef4442c89b 100644 --- a/src/dasm_x86.lua +++ b/src/dasm_x86.lua @@ -1230,6 +1230,7 @@ local map_op = { shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:", rdtsc_0 = "0F31", -- P1+ + rdtscp_0 = "0F01F9",-- P6+ rdpmc_0 = "0F33", -- P6+ cpuid_0 = "0FA2", -- P1+ From 5c4dabd1f087c6d2d293a19258f3e5286dce9dd7 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 30 Mar 2016 07:27:17 +0000 Subject: [PATCH 02/25] core.timeline: High-resolution event log [wip] --- src/core/timeline.dasl | 92 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 src/core/timeline.dasl diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl new file mode 100644 index 0000000000..71c67fc823 --- /dev/null +++ b/src/core/timeline.dasl @@ -0,0 +1,92 @@ +-- Use of this source code is governed by the Apache 2.0 license; see COPYING. + +-- timeline: high-resolution event log. +-- +-- This module implements an event log that is suitable for logging +-- very fine-grained events during traffic processing. +-- +-- Design goals: +-- Fast enough to log 1M msg/sec with <1% CPU overhead. +-- Precise timestamps for cross-referencing processes (and PEBS events). +-- Simple format that is easy to post-process. + +module(..., package.seeall) + +local dasm = require("dasm") +local ffi = require("ffi") +local C = ffi.C + +-- Log message format +ffi.cdef[[ +struct timeline_entry { + // CPU Time Stamp Counter (cycles) + uint64_t tsc; + // Instruction Pointer where log is taken + uint64_t ip; + // Message (could be generalized...) + uint64_t msg; +} +]] + +-- XXX For test purposes just allocate a 1GB log. +local log = ffi.cast("struct timeline_entry *", C.malloc(1*1024*1024*1024)) +local log_ptr = ffi.new("struct timeline_entry *[1]", log) + +|.arch x64 +|.actionlist actions +|.globalnames globalnames + +|.define arg1, rdi + +|.type msg, struct timeline_entry + +-- void log(uint64_t msg) +local function asmlog (Dst) + |->log: + -- (No registers to save. Using rax, rdx, rcx, rdi, r8 which are + -- all caller-save.) + | rdtscp -- read timestamp counter into edx:eax (trash ecx) + | shl rdx, 32 -- reconstruct 64-bit TSC in rax + | or rax, rdx -- ... + | mov r8, [log_ptr] -- Load address of next log entry + | mov msg:r8->tsc, rax -- Store timestamp in log + | mov rax, [rsp] -- Grab caller address from stack + | mov msg:r8->ip, rax -- ... and store in message + | mov msg:r8->msg, arg1 -- Store message in log + | add r8, #msg -- Advance log pointer + | mov [log_ptr], r8 -- ... XXX must check for overflow + -- (No saved registers to restore.) + | ret +end + +local Dst, globals = dasm.new(actions, nil, nil, 1 + #globalnames) +asmlog(Dst) +local mcode, size = Dst:build() +--dasm.dump(mcode, size) +local entry = dasm.globals(globals, globalnames) + +event = ffi.cast("void(*)(uint64_t)", entry.log) +_anchor = mcode + +function selftest () + print("selftest: timeline") + local n = 1e6 + -- Log events + for i = 0, n-1 do + event(i) + end + -- Check log + for i = 0, n-1 do + assert(i==0 or log[i].tsc > log[i-1].tsc, "tsc increase") + assert(log[i].msg == i, "message value") + end + -- Print statistics + local times = {} + for i = 0, n-1 do + if i > 0 then times[#times+1] = tonumber(log[i].tsc - log[i-1].tsc) end + end + table.sort(times) + print("median logging time", times[n/2]) + print("selftest: ok") +end + From 0acf545af1dc889ec67b4d134e0ba2a77a8e67b7 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 6 Apr 2016 04:49:12 +0000 Subject: [PATCH 03/25] core.timeline: Wrote draft documentation in core/timeline.md --- src/core/timeline.md | 90 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 src/core/timeline.md diff --git a/src/core/timeline.md b/src/core/timeline.md new file mode 100644 index 0000000000..bdf96031ff --- /dev/null +++ b/src/core/timeline.md @@ -0,0 +1,90 @@ +### Timeline + +The timeline is a high-resolution event log that stores entries in a +shared-memory ring buffer. Log entries are timestamped with +cycle-precise wall-clock time based on the CPU [Time Stamp Counter +(TSC)](https://en.wikipedia.org/wiki/Time_Stamp_Counter). Each log +message references predefined strings for its category and +format-string and stores up to four 64-bit argument values. + +Timeline messages can be dynamically enabled or disabled based on +their priority. This supports periodically enabling detailed trace +messages for short periods of time (e.g. 100 microseconds) to ensure +that the log always includes a sample of interesting data for +analysis. + +Logging to the timeline is efficient. Logging costs around 50 cycles +when a message is enabled. The cost of skipping disabled messages is +much less. The logging function is written in assembly language and is +called by LuaJIT via the FFI. (Checking whether log messages are +enabled is invisible to the trace compiler because it is done in the +FFI call.) + +#### File format and binary representation + +The binary representation has three components: + +- header: magic number, file format version, flags. +- entires: array of 64-byte log entries. +- stringtable: string constants (to referenced by their byte-index) + + DIAGRAM: Timeline + +-------------------------+ + | header (64B) | + +-------------------------+ + | | + | | + | entries (~10MB) | + | | + | | + +-------------------------+ + | stringtable (~1MB) | + +-------------------------+ + +The timeline can be read by scanning through the entries and detecting +the first and last entries by comparing timestamps. The entires can be +converted from binary data to human-readable strings by using the +format strings that they reference. + +#### Usage + +Create a timeline as a shared memory object with default file size: + +``` +tl = timeline.new("/timeline") +``` + +Define an event that can be logged on this timeline: + +``` +local proc = timeline.define(tl, 'example', 'trace', "processed $tcp, $udp, and $other") +``` + +Log a series of events: + +``` +proc(10, 20, 3) +proc(50, 60, 8) +``` + +The log now contains these entries: + +``` + processed tcp(10), udp(20), and other(3) + processed tcp(50), udp(60), and other(8) +``` + +#### API + +— Function **new** *shmpath* *[entries]* *[stringtablesize]* + +Create a new timeline at the given shared memory path. + +— Function **define** *timeline* *category* *priority* *message* + +Defines a message that can be logged to this timeline. Returns a function that is called to log the event. + +- *category* is a short descriptive string like "luajit", "engine", "pci01:00.0". +- *priority* is one of the strings `fatal`, `error`, `warning`, `info`, `debug`, `trace`, `trace+`, `trace++`, `trace+++`. +- *message* is text describing the event. This can be a one-liner or a detailed multiline description. Words on the first line starting with `$` define arguments to the logger function which will be stored as 64-bit values (maximum four per message). + From 83d94d8056516950a3c5ee84935d4b3c8390298f Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 6 Apr 2016 04:57:36 +0000 Subject: [PATCH 04/25] core/timeline.md: Fixed markdown for diagram --- src/core/timeline.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/core/timeline.md b/src/core/timeline.md index bdf96031ff..06d55a39a2 100644 --- a/src/core/timeline.md +++ b/src/core/timeline.md @@ -28,6 +28,7 @@ The binary representation has three components: - entires: array of 64-byte log entries. - stringtable: string constants (to referenced by their byte-index) +``` DIAGRAM: Timeline +-------------------------+ | header (64B) | @@ -40,6 +41,7 @@ The binary representation has three components: +-------------------------+ | stringtable (~1MB) | +-------------------------+ +``` The timeline can be read by scanning through the entries and detecting the first and last entries by comparing timestamps. The entires can be From 051b23a441cea6216e3a2955294b128f41427018 Mon Sep 17 00:00:00 2001 From: Simon Leinen Date: Wed, 6 Apr 2016 08:53:00 +0200 Subject: [PATCH 05/25] Typo fixes, and a question I spotted two typos and fixed them (entires -> entries). And a question: In the example, shouldn't the string `example` appear somewhere in the resulting log ("The log now contains there entries")? --- src/core/timeline.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/timeline.md b/src/core/timeline.md index 06d55a39a2..c679cea00d 100644 --- a/src/core/timeline.md +++ b/src/core/timeline.md @@ -25,7 +25,7 @@ FFI call.) The binary representation has three components: - header: magic number, file format version, flags. -- entires: array of 64-byte log entries. +- entries: array of 64-byte log entries. - stringtable: string constants (to referenced by their byte-index) ``` @@ -44,7 +44,7 @@ The binary representation has three components: ``` The timeline can be read by scanning through the entries and detecting -the first and last entries by comparing timestamps. The entires can be +the first and last entries by comparing timestamps. The entries can be converted from binary data to human-readable strings by using the format strings that they reference. From 35686832317dc1c221a9303e538cf26010247afb Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 6 Apr 2016 10:22:58 +0000 Subject: [PATCH 06/25] core/timeline.md: Improved timeline readme Added category to example log output (thanks for feedback @sleinen). Added functions: priority(), save(), dump() Fixed formatting (more word wrap) --- src/core/timeline.md | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/src/core/timeline.md b/src/core/timeline.md index c679cea00d..40659fb1e2 100644 --- a/src/core/timeline.md +++ b/src/core/timeline.md @@ -72,8 +72,8 @@ proc(50, 60, 8) The log now contains these entries: ``` - processed tcp(10), udp(20), and other(3) - processed tcp(50), udp(60), and other(8) + example processed tcp(10), udp(20), and other(3) + example processed tcp(50), udp(60), and other(8) ``` #### API @@ -84,9 +84,29 @@ Create a new timeline at the given shared memory path. — Function **define** *timeline* *category* *priority* *message* -Defines a message that can be logged to this timeline. Returns a function that is called to log the event. +Defines a message that can be logged to this timeline. Returns a +function that is called to log the event. - *category* is a short descriptive string like "luajit", "engine", "pci01:00.0". -- *priority* is one of the strings `fatal`, `error`, `warning`, `info`, `debug`, `trace`, `trace+`, `trace++`, `trace+++`. -- *message* is text describing the event. This can be a one-liner or a detailed multiline description. Words on the first line starting with `$` define arguments to the logger function which will be stored as 64-bit values (maximum four per message). +- *priority* is one of the strings `fatal`, `error`, `warning`, + `info`, `debug`, `trace`, `trace+`, `trace++`, `trace+++`. +- *message* is text describing the event. This can be a one-liner or a + detailed multiline description. Words on the first line starting + with `$` define arguments to the logger function which will be + stored as 64-bit values (maximum four per message). + +— Function **save** *timeline* *filename* + +Save a snapshot of the timeline to a file. The file format is the raw binary timeline format. + +— Function **priority** *timeline* *level* + +Set the minimum priority that is required for a message to be logged +on the timeline. This can be used to control the rate at which +messages are logged to the timeline to manage processing overhead and +the rate of churn in the ring buffer. + +— Function **dump** *filename* *[maxentries]* + +Print the contents of a timeline, ordered from newest to oldest. From bc7f1b09f3e4ef1e38ea058af2a7e260d9ac11e6 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 6 Apr 2016 10:24:06 +0000 Subject: [PATCH 07/25] core/timeline.dasl: New implementation matching API [wip] Updated the timeline implementation to match the documented API. This is still a work in progress: The messages printed by the selftest all have zeros for arguments for unknown reasons; The overhead of logging has doubled for unknown reasons, possibly due to allocating with core.shm and putting a performance-critical cacheline on a 4096-aligned address that may trigger associativity problems (wild speculation, have to dig in). --- src/core/timeline.dasl | 303 ++++++++++++++++++++++++++++++++++------- 1 file changed, 252 insertions(+), 51 deletions(-) diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index 71c67fc823..87a8f92369 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -1,92 +1,293 @@ +-- timeline: high-resolution event log using in-memory ring buffer -- Use of this source code is governed by the Apache 2.0 license; see COPYING. --- timeline: high-resolution event log. --- --- This module implements an event log that is suitable for logging --- very fine-grained events during traffic processing. --- --- Design goals: --- Fast enough to log 1M msg/sec with <1% CPU overhead. --- Precise timestamps for cross-referencing processes (and PEBS events). --- Simple format that is easy to post-process. - module(..., package.seeall) local dasm = require("dasm") local ffi = require("ffi") local C = ffi.C +local S = require("syscall") +local shm = require("core.shm") + +------------------------------------------------------------ +-- Binary data structures + +-- Log entry format (ring item) --- Log message format ffi.cdef[[ -struct timeline_entry { - // CPU Time Stamp Counter (cycles) - uint64_t tsc; - // Instruction Pointer where log is taken - uint64_t ip; - // Message (could be generalized...) - uint64_t msg; -} + struct timeline_entry { + uint64_t tsc; // CPU timestamp + uint64_t ip; // Instruction pointer of logger + uint64_t msg_id; // category:24, message:24, priority:16 + uint64_t msg0, msg1, msg2, msg3; // message arguments + uint64_t reserved; // pad to 64 bytes + }; ]] --- XXX For test purposes just allocate a 1GB log. -local log = ffi.cast("struct timeline_entry *", C.malloc(1*1024*1024*1024)) -local log_ptr = ffi.new("struct timeline_entry *[1]", log) +-- Header of the log file + +local magic = 0xa3ff7223441d0001 +local version = 0x00010001 + +ffi.cdef[[ + struct timeline_header { + // File format magic number. + uint32_t magic; + uint8_t major_version, minor_version; + uint16_t flags; + // XXX: does not really need to be stored in the file? + uint64_t priority_mask; + uint64_t next; + uint64_t last; + char pad[32]; + struct timeline_entry entries[0]; + }; +]] + +------------------------------------------------------------ +-- API + +-- Create a new timeline under the given shared memory path. +function new (shmpath, entries, size_stringtable) + entries = entries or 1024*1024 + size_stringtable = size_stringtable or 1e6 + -- Calculate size based on number of log entries + local size_header = ffi.sizeof("struct timeline_header") + local size_entries = entries * ffi.sizeof("struct timeline_entry") + local size = size_header + size_entries + size_stringtable + -- Allocate one shm object with memory for all data structures + local memory = shm.map(shmpath, ffi.typeof("char["..size.."]")) + local header = ffi.cast("struct timeline_header *", memory) + local ring = ffi.cast("struct timeline_entry *", memory + size_header) + local stringtable = ffi.cast("char*", memory + size_header + size_entries) + -- Return an object + return { memory = memory, + entries = entries, + header = init_header(ffi.cast("struct timeline_header *", memory), entries), + ring = ring, + stringtable = init_stringtable(stringtable, size_stringtable) } +end + +function init_header (header, entries) + header.magic = magic + header.major_version, header.minor_version = 0, 0 + header.flags = 0 + header.priority_mask = 0xffff + header.next = 0 + header.last = entries-1 -- XXX entries must be a power of 2 + return header +end + +local priorities = {['fatal']=1, ['error']=2, ['warning']=3, ['info']=4, ['debug']=5, + ['trace']=6, ['trace+']=7, ['trace++']=8, ['trace+++']=9} + +-- Define an event and return the logger function. +function define (timeline, category, priority, message) + local icategory = intern(timeline.stringtable, category) + local imessage = intern(timeline.stringtable, message) + local prio = priorities[priority] or error("unrecognized priority: "..priority) + local id = bit.bor(bit.lshift(icategory, 40), + bit.lshift(imessage, 16), + prio) + local n = #(message_args(message)) + local event = event -- move asm function into local scope + local log = timeline.header + if n==0 then return function () event(log,id,0,0,0,0) end end + if n==1 then return function (a) event(log,id,a,0,0,0) end end + if n==2 then return function (a,b) event(log,id,a,b,0,0) end end + if n==3 then return function (a,b,c) event(log,id,a,b,c,0) end end + if n==4 then return function (a,b,c,d) event(log,id,a,b,c,d) end end + error("illegal number of arguments: "..n) +end + +function message_args (string) + local args = {} + string:gsub("[$](%w)+", function (arg) table.insert(args, arg) end) + return args +end + +-- Write a copy of the timeline to a file +function save (tl, filename) + local fd, err = S.open(filename, "rdwr,creat", "0600") + if not fd then return fd, err end + S.write(fd, tl.memory, ffi.sizeof(tl.memory)) + S.close(fd) + return true +end + +-- Choose the minimum priority for a message to be logged +function priority (level) + error("NYI") +end + +-- Print log entries from the file +function dump (filename, maxentries) + local stat, err = S.stat(filename) + if not stat then return stat, err end + local fd, err = S.open(filename, "rdonly") + if not fd then return fd, err end + local ptr = S.mmap(nil, stat.size, "read", "private", fd, 0) + local header = ffi.cast("struct timeline_header *", ptr) + local ring = header.entries + local strings = ffi.cast("char*", header.entries + (header.last + 1)) + local last = tonumber(header.last) + local size = last + 1 + print(("%8s %-16s %s"):format("cycles", "category", "message")) + -- "Fast-forward" to the end (when following entry has earlier timestamp) + local index = 0 + while ring[index].tsc <= ring[(index+1) % size].tsc do + index = (index+1) % size + end + -- "Rewind" printing messages + local ref_tsc = nil + for i = 0, size-1 do + if maxentries and i > maxentries then break end + local timedelta = ref_tsc and (ref_tsc - ring[index].tsc) or 0 + local msgid = ring[index].msg_id + local icategory = bit.band(0xffff, bit.rshift(msgid, 40)) + local imessage = bit.band(0xffff, bit.rshift(msgid, 16)) + -- Lookup the messages and perform arg substitution + local category = ffi.string(strings + icategory) + local format = ffi.string(strings + imessage) + local args = {tonumber(ring[index].msg3), + tonumber(ring[index].msg2), + tonumber(ring[index].msg1), + tonumber(ring[index].msg0)} + local subst = function (match) + -- convert "$foo" to "foo(10)" + return match:gsub("^[$]", "", 1).."("..table.remove(args)..")" + end + local message = format:gsub("($%w+)", subst) + print(("%8d %-16s %s"):format(tonumber(timedelta), category, message)) + ref_tsc = ring[index].tsc + index = (index == 0) and last or index-1 + end +end + +------------------------------------------------------------ +-- Defining log message formats + +function init_stringtable (pointer, size) + return { base = pointer, position = 0, space = size, known = {} } +end + +-- Interns a string in the string table and returns the string's index. +function intern (tab, str) + if tab.known[str] then + return tab.known[str] + else + local bytes = #str + 1 -- include null terminator byte + tab.space = tab.space - bytes + assert(tab.space >= 0, "timeline string table overflow") + local pos = tab.position + ffi.copy(tab.base + tab.position, str, bytes) + tab.position = tab.position + bytes + tab.known[str] = pos + return pos + end +end + +-- Lookup an interned string by index and return as a Lua string. +function lookup (tab, index) + return ffi.string(tab.base + index) +end + +------------------------------------------------------------ +-- Logging messages |.arch x64 |.actionlist actions |.globalnames globalnames -|.define arg1, rdi -|.type msg, struct timeline_entry +|.define arg0, rdi +|.define arg1, rsi +|.define arg2, rdx +|.define arg3, rcx +|.define arg4, r8 +|.define arg5, r9 --- void log(uint64_t msg) +|.type log, struct timeline_header +|.type msg, struct timeline_entry local function asmlog (Dst) |->log: - -- (No registers to save. Using rax, rdx, rcx, rdi, r8 which are - -- all caller-save.) - | rdtscp -- read timestamp counter into edx:eax (trash ecx) - | shl rdx, 32 -- reconstruct 64-bit TSC in rax - | or rax, rdx -- ... - | mov r8, [log_ptr] -- Load address of next log entry - | mov msg:r8->tsc, rax -- Store timestamp in log - | mov rax, [rsp] -- Grab caller address from stack - | mov msg:r8->ip, rax -- ... and store in message - | mov msg:r8->msg, arg1 -- Store message in log - | add r8, #msg -- Advance log pointer - | mov [log_ptr], r8 -- ... XXX must check for overflow - -- (No saved registers to restore.) + -- Exit if this log message is disabled + | mov rax, log:arg0->priority_mask + | and rax, arg1 + | jnz >1 + | ret + |1: + + -- Load and advance next log entry index + | mov r11, log:arg0->next + | mov rax, r11 + | add rax, 1 + | and rax, log:arg0->last + | mov log:arg0->next, rax + + -- Convert log entry number to pointer + | shl r11, 6 + | lea r10, log:arg0->entries + | add r10, r11 + + -- Log the arguments + | mov msg:r10->msg_id, arg1 + | mov msg:r10->msg0, arg2 + | mov msg:r10->msg1, arg3 + | mov msg:r10->msg2, arg4 + | mov msg:r10->msg3, arg5 + + -- Log the timestamp + | rdtscp + | mov msg:r10->tsc, eax + | mov [r10+4], edx + + -- Log the instruction pointer + | mov rax, [rsp] + | mov msg:r10->ip, rax + | ret end local Dst, globals = dasm.new(actions, nil, nil, 1 + #globalnames) asmlog(Dst) local mcode, size = Dst:build() ---dasm.dump(mcode, size) local entry = dasm.globals(globals, globalnames) -event = ffi.cast("void(*)(uint64_t)", entry.log) +event = ffi.cast("void(*)(struct timeline_header *, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t)", entry.log) + _anchor = mcode +--dasm.dump(mcode, size) + function selftest () print("selftest: timeline") - local n = 1e6 + local tl = new("selftest/timeline") + local event1 = define(tl, 'selftest', 'trace', 'event1 $a $b $c $d') + local event2 = define(tl, 'selftest', 'trace', 'event2 $a') + local event3 = define(tl, 'selftest', 'trace', 'event3') -- Log events - for i = 0, n-1 do - event(i) - end - -- Check log - for i = 0, n-1 do - assert(i==0 or log[i].tsc > log[i-1].tsc, "tsc increase") - assert(log[i].msg == i, "message value") + for i = 0, tl.entries-1 do + if i % 3 == 0 then event1(i, i*2, i*3, i*4) end + if i % 3 == 1 then event2(i) end + if i % 3 == 2 then event3() end end -- Print statistics local times = {} - for i = 0, n-1 do - if i > 0 then times[#times+1] = tonumber(log[i].tsc - log[i-1].tsc) end + for i = 0, tl.entries-1 do + if i > 0 then times[#times+1] = tonumber(tl.ring[i].tsc - tl.ring[i-1].tsc) end end table.sort(times) - print("median logging time", times[n/2]) + local n = tl.entries + --print("times:", times[1], times[n/2], times[n/4], times[n*3/4]) + print("median logging time: "..times[tl.entries/2].." cycles") + -- Check log + for i = 0, tl.entries-1 do + assert(i==0 or tl.ring[i].tsc > tl.ring[i-1].tsc, "tsc increase") + end + print("selftest: save and dump") + save(tl, "selftest-timeline.dat") + dump("selftest-timeline.dat", 10) print("selftest: ok") end From 0629dcf9ee8252ade82634d7ac35396f55393274 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 6 Apr 2016 13:07:53 +0000 Subject: [PATCH 08/25] timeline.md: Documented defaults for new() --- src/core/timeline.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/core/timeline.md b/src/core/timeline.md index 40659fb1e2..ab64a59407 100644 --- a/src/core/timeline.md +++ b/src/core/timeline.md @@ -82,6 +82,9 @@ The log now contains these entries: Create a new timeline at the given shared memory path. +- *entries* defaults to 1024*1024 entries (64 megabytes). This could be increased if more events are needed. The value must be a power of 2. +- *stringtablesize* defaults to one megabyte. This is expected to be more than sufficient. + — Function **define** *timeline* *category* *priority* *message* Defines a message that can be logged to this timeline. Returns a From c7f4f25a111df449ee4855de20b7a68c0f6cdb20 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Thu, 7 Apr 2016 03:35:48 +0000 Subject: [PATCH 09/25] core.timeline: Log CPU core ID + numa node I realized that when we take the timestamp with RDTSCP this is also loading the current CPU core number and NUMA node into ECX. This patch includes that register in the log dump. This seems useful so that we can account for scheduling issues from the log. Strictly speaking the value in ECX is the contents of the TSC_AUX register but it seems that we can rely on Linux to load the core ID + NUMA node there. Here is where I got the idea: http://stackoverflow.com/questions/22310028/is-there-an-x86-instruction-to-tell-which-core-the-instruction-is-being-run-on --- src/core/timeline.dasl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index 87a8f92369..d2b855be2f 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -18,9 +18,10 @@ ffi.cdef[[ struct timeline_entry { uint64_t tsc; // CPU timestamp uint64_t ip; // Instruction pointer of logger + uint32_t aux; // TSC_AUX: core (bits 0-7) + numa (12-15) + uint32_t reserved; // (pad to 64 bytes) uint64_t msg_id; // category:24, message:24, priority:16 uint64_t msg0, msg1, msg2, msg3; // message arguments - uint64_t reserved; // pad to 64 bytes }; ]] @@ -241,6 +242,7 @@ local function asmlog (Dst) | rdtscp | mov msg:r10->tsc, eax | mov [r10+4], edx + | mov msg:r10->aux, ecx -- Log the instruction pointer | mov rax, [rsp] From 29288188e82aa3b2abdb1963007e6dffb2680395 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Thu, 7 Apr 2016 03:43:33 +0000 Subject: [PATCH 10/25] core.timeline: Fix bug in arg parsing The string pattern matching for detecting arguments ($arg) was wrong and this caused all args to be logged as 0. --- src/core/timeline.dasl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index d2b855be2f..9aef859c6d 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -103,7 +103,7 @@ end function message_args (string) local args = {} - string:gsub("[$](%w)+", function (arg) table.insert(args, arg) end) + string:gsub("[$](%w+)", function (arg) table.insert(args, arg) end) return args end From 4182df239bc6f944eaad781aaa523e2b7c3e783b Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Thu, 7 Apr 2016 03:47:35 +0000 Subject: [PATCH 11/25] core.timeline: Improve log dumps and selftest Show core number and NUMA node. Write slightly better log messages for selftest. Simplify the logging loop in the selftest to simply log three messages per iteration. This way the timestamps will indicate the logging overhead. (Previous version had some funny branches that could add a few dozen cycles to the timestamps which is a bit distracting.) --- src/core/timeline.dasl | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index 9aef859c6d..8ef0e6566a 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -133,7 +133,9 @@ function dump (filename, maxentries) local strings = ffi.cast("char*", header.entries + (header.last + 1)) local last = tonumber(header.last) local size = last + 1 - print(("%8s %-16s %s"):format("cycles", "category", "message")) + -- Print column headers. + -- Note: "-cycles" column is time delta between entries (moving backwards in time) + print(("%4s %4s %8s %-16s %s"):format("numa", "core", "-cycles", "category", "message")) -- "Fast-forward" to the end (when following entry has earlier timestamp) local index = 0 while ring[index].tsc <= ring[(index+1) % size].tsc do @@ -143,10 +145,17 @@ function dump (filename, maxentries) local ref_tsc = nil for i = 0, size-1 do if maxentries and i > maxentries then break end - local timedelta = ref_tsc and (ref_tsc - ring[index].tsc) or 0 + local timedelta + if ref_tsc == nil then + timedelta = '' + else + timedelta = tonumber(ref_tsc - ring[index].tsc) + end local msgid = ring[index].msg_id local icategory = bit.band(0xffff, bit.rshift(msgid, 40)) local imessage = bit.band(0xffff, bit.rshift(msgid, 16)) + local core = bit.band(0xff, ring[index].aux) + local numa = bit.band(0x0f, bit.rshift(ring[index].aux, 12)) -- Lookup the messages and perform arg substitution local category = ffi.string(strings + icategory) local format = ffi.string(strings + imessage) @@ -159,7 +168,8 @@ function dump (filename, maxentries) return match:gsub("^[$]", "", 1).."("..table.remove(args)..")" end local message = format:gsub("($%w+)", subst) - print(("%8d %-16s %s"):format(tonumber(timedelta), category, message)) + print(("%-4d %-4d %8s %-16s %s"):format( + numa, core, timedelta, category, message)) ref_tsc = ring[index].tsc index = (index == 0) and last or index-1 end @@ -265,14 +275,14 @@ _anchor = mcode function selftest () print("selftest: timeline") local tl = new("selftest/timeline") - local event1 = define(tl, 'selftest', 'trace', 'event1 $a $b $c $d') - local event2 = define(tl, 'selftest', 'trace', 'event2 $a') - local event3 = define(tl, 'selftest', 'trace', 'event3') + local event1 = define(tl, 'selftest', 'trace', 'event with four args: $i, $2i, $3i, and $4i') + local event2 = define(tl, 'selftest', 'trace', 'event with one arg: $i') + local event3 = define(tl, 'selftest', 'trace', 'event with no args') -- Log events for i = 0, tl.entries-1 do - if i % 3 == 0 then event1(i, i*2, i*3, i*4) end - if i % 3 == 1 then event2(i) end - if i % 3 == 2 then event3() end + event1(i, i*2, i*3, i*4) + event2(i) + event3() end -- Print statistics local times = {} @@ -289,7 +299,7 @@ function selftest () end print("selftest: save and dump") save(tl, "selftest-timeline.dat") - dump("selftest-timeline.dat", 10) + dump("selftest-timeline.dat", 15) print("selftest: ok") end From 3047bd69e555d7e7aed6ead0b6b82b7a8843bb8b Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Thu, 7 Apr 2016 04:52:21 +0000 Subject: [PATCH 12/25] core.timeline: Implemented updated priority() API --- src/core/timeline.dasl | 47 +++++++++++++++++++++++++++++++++++------- src/core/timeline.md | 17 +++++++++++++-- 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index 8ef0e6566a..048e7e2c26 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -79,8 +79,8 @@ function init_header (header, entries) return header end -local priorities = {['fatal']=1, ['error']=2, ['warning']=3, ['info']=4, ['debug']=5, - ['trace']=6, ['trace+']=7, ['trace++']=8, ['trace+++']=9} +local priorities = {['error']=1, ['warning']=2, ['info']=3, + ['trace']=4, ['app']=5, ['packet']=6, ['library']=7} -- Define an event and return the logger function. function define (timeline, category, priority, message) @@ -89,8 +89,11 @@ function define (timeline, category, priority, message) local prio = priorities[priority] or error("unrecognized priority: "..priority) local id = bit.bor(bit.lshift(icategory, 40), bit.lshift(imessage, 16), - prio) - local n = #(message_args(message)) + bit.lshift(1, prio)) + -- Parse message arguments on first line + local args = {} + message:match("^[^\n]*"):gsub("[$](%w+)", function (arg) table.insert(args, arg) end) + local n = #(args) local event = event -- move asm function into local scope local log = timeline.header if n==0 then return function () event(log,id,0,0,0,0) end end @@ -102,8 +105,6 @@ function define (timeline, category, priority, message) end function message_args (string) - local args = {} - string:gsub("[$](%w+)", function (arg) table.insert(args, arg) end) return args end @@ -117,8 +118,14 @@ function save (tl, filename) end -- Choose the minimum priority for a message to be logged -function priority (level) - error("NYI") +function priority (timeline, priority) + local n + if priority == 'all' then n = 15 + elseif priority == 'none' then n = 0 + else n = priorities[priority] or error("unrecognized priority: "..priority) + end + -- Enable all priorities up to this level + timeline.header.priority_mask = bit.lshift(1, n+1) - 1 end -- Print log entries from the file @@ -297,9 +304,33 @@ function selftest () for i = 0, tl.entries-1 do assert(i==0 or tl.ring[i].tsc > tl.ring[i-1].tsc, "tsc increase") end + print("selftest: save and dump") save(tl, "selftest-timeline.dat") dump("selftest-timeline.dat", 15) + + print("selftest: priorities") + local p_info = define(tl, 'selftest', 'info', 'info event') + local p_trace = define(tl, 'selftest', 'trace', 'trace event') + local p_packet = define(tl, 'selftest', 'packet', 'packet event') + local ref_next = tl.header.next + -- no: assert that no message was logged + local no = function () + assert(ref_next == tl.header.next, "no log message expected") + end + -- yes: assert that a message was logged + local yes = function () + assert(ref_next ~= tl.header.next, "log message expected") + ref_next = tl.header.next + end + -- (default priority: all) + p_info() yes() p_trace() yes() p_packet() yes() + priority(tl, 'all') p_info() yes() p_trace() yes() p_packet() yes() + priority(tl, 'library') p_info() yes() p_trace() yes() p_packet() yes() + priority(tl, 'app') p_info() yes() p_trace() yes() p_packet() no() + priority(tl, 'info') p_info() yes() p_trace() no() p_packet() no() + priority(tl, 'none') p_info() no() p_trace() no() p_packet() no() + print("selftest: ok") end diff --git a/src/core/timeline.md b/src/core/timeline.md index ab64a59407..384749968b 100644 --- a/src/core/timeline.md +++ b/src/core/timeline.md @@ -91,13 +91,23 @@ Defines a message that can be logged to this timeline. Returns a function that is called to log the event. - *category* is a short descriptive string like "luajit", "engine", "pci01:00.0". -- *priority* is one of the strings `fatal`, `error`, `warning`, - `info`, `debug`, `trace`, `trace+`, `trace++`, `trace+++`. +- *priority* is one of the strings `error`, `warning`, `info`, + `trace`, `app`, `packet`, `library`. - *message* is text describing the event. This can be a one-liner or a detailed multiline description. Words on the first line starting with `$` define arguments to the logger function which will be stored as 64-bit values (maximum four per message). +The priority should be chosen according to how frequently events will +occur. This will make it possible for users to control how much detail +is included in the log, and how quickly it wraps around, by choosing a +suitable minimum event priority. Specifically choose `trace` for +occasional events, `app` for per-breath events, `packet` for +per-packet events, and `library` for events in library functions that +could potentially be called in a tight loop. (If none of these +priority names perfectly describes your use case then pick the one you +think best matches the frequency of your events.) + — Function **save** *timeline* *filename* Save a snapshot of the timeline to a file. The file format is the raw binary timeline format. @@ -109,6 +119,9 @@ on the timeline. This can be used to control the rate at which messages are logged to the timeline to manage processing overhead and the rate of churn in the ring buffer. +The level can be the strings accepted by `define()` or one of the +strings `all` or `none`. + — Function **dump** *filename* *[maxentries]* Print the contents of a timeline, ordered from newest to oldest. From 003f1cb00ddcf60e5aa0aafd3a908e8d2023edaa Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Thu, 7 Apr 2016 09:03:35 +0000 Subject: [PATCH 13/25] core.timeline: Fixed and improved dump() Fixed an off-by-one error (braino) in choosing which timestamps to calculate the delta from. Make the cycle column wider to better accommodate large values. --- src/core/timeline.dasl | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index 048e7e2c26..d358213507 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -142,7 +142,7 @@ function dump (filename, maxentries) local size = last + 1 -- Print column headers. -- Note: "-cycles" column is time delta between entries (moving backwards in time) - print(("%4s %4s %8s %-16s %s"):format("numa", "core", "-cycles", "category", "message")) + print(("%4s %4s %12s %-16s %s"):format("numa", "core", "-cycles", "category", "message")) -- "Fast-forward" to the end (when following entry has earlier timestamp) local index = 0 while ring[index].tsc <= ring[(index+1) % size].tsc do @@ -151,13 +151,9 @@ function dump (filename, maxentries) -- "Rewind" printing messages local ref_tsc = nil for i = 0, size-1 do + local previndex = (index == 0) and last or index-1 if maxentries and i > maxentries then break end - local timedelta - if ref_tsc == nil then - timedelta = '' - else - timedelta = tonumber(ref_tsc - ring[index].tsc) - end + local timedelta = tonumber(ring[index].tsc - ring[previndex].tsc) local msgid = ring[index].msg_id local icategory = bit.band(0xffff, bit.rshift(msgid, 40)) local imessage = bit.band(0xffff, bit.rshift(msgid, 16)) @@ -175,7 +171,7 @@ function dump (filename, maxentries) return match:gsub("^[$]", "", 1).."("..table.remove(args)..")" end local message = format:gsub("($%w+)", subst) - print(("%-4d %-4d %8s %-16s %s"):format( + print(("%-4d %-4d %12s %-16s %s"):format( numa, core, timedelta, category, message)) ref_tsc = ring[index].tsc index = (index == 0) and last or index-1 From 9b659f9da8ace07177bd13ca38d1c6b58bd182a9 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Thu, 7 Apr 2016 09:07:38 +0000 Subject: [PATCH 14/25] core.timeline: selftest with disabled events loop The selftest function now executes a few loops to see how long it takes to log a million events that are disabled by priority. Log excerpt: numa core -cycles category message 0 3 5014856 selftest invoked many events(1000000) 0 3 5025592 selftest invoked many events(1000000) 0 3 5176004 selftest invoked many events(1000000) So looks like five cycles per event is the ballpark cost (including the loop that makes the individual FFI calls). --- src/core/timeline.dasl | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index d358213507..a7805886b4 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -301,6 +301,24 @@ function selftest () assert(i==0 or tl.ring[i].tsc > tl.ring[i-1].tsc, "tsc increase") end + -- Create some log entries showing the logging throughput for + -- messages that are disabled by priority. + -- (These will be printed below.) + local timing_start = define(tl, 'timing', 'trace', 'timing tight loop of disabled events') + local timing_loop = define(tl, 'timing', 'trace', 'invoked many $events') + local timing_dis = define(tl, 'timing', 'library', 'this message will not be logged') + priority(tl, 'trace') + timing_start() + for i = 1, 3 do + -- Loop running many disabled events + local n = 1e6 + for j = 1, n do + -- Call an event that is disabled + timing_dis() + end + timing_loop(n) + end + print("selftest: save and dump") save(tl, "selftest-timeline.dat") dump("selftest-timeline.dat", 15) @@ -319,8 +337,6 @@ function selftest () assert(ref_next ~= tl.header.next, "log message expected") ref_next = tl.header.next end - -- (default priority: all) - p_info() yes() p_trace() yes() p_packet() yes() priority(tl, 'all') p_info() yes() p_trace() yes() p_packet() yes() priority(tl, 'library') p_info() yes() p_trace() yes() p_packet() yes() priority(tl, 'app') p_info() yes() p_trace() yes() p_packet() no() From 32143397e1bbcfb22d253ab23dcd43ac26e65f75 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Thu, 7 Apr 2016 09:22:00 +0000 Subject: [PATCH 15/25] core/timeline.md: Added example of dump() output --- src/core/timeline.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/core/timeline.md b/src/core/timeline.md index 384749968b..f687de48e4 100644 --- a/src/core/timeline.md +++ b/src/core/timeline.md @@ -126,3 +126,27 @@ strings `all` or `none`. Print the contents of a timeline, ordered from newest to oldest. +This dump function is intended as a simple example. More sophisticated +log viewing and analysis tools are eagerly anticipated. + +Here is the output: + +``` +numa core -cycles category message +0 4 5033652 selftest invoked many events(1000000) +0 4 5013972 selftest invoked many events(1000000) +0 4 5178340 selftest invoked many events(1000000) +0 4 1753835508 selftest timing tight loop of disabled events +0 4 60 selftest event with no args +0 4 40 selftest event with one arg: i(1048575) +0 4 40 selftest event with four args: i(1048575), 2i(2097150), 3i(3145725), and 4i(4194300) +0 4 36 selftest event with no args +0 4 40 selftest event with one arg: i(1048574) +0 4 40 selftest event with four args: i(1048574), 2i(2097148), 3i(3145722), and 4i(4194296) +0 4 36 selftest event with no args +0 4 40 selftest event with one arg: i(1048573) +0 4 40 selftest event with four args: i(1048573), 2i(2097146), 3i(3145719), and 4i(4194292) +0 4 36 selftest event with no args +0 4 40 selftest event with one arg: i(1048572) +0 4 40 selftest event with four args: i(1048572), 2i(2097144), 3i(3145716), and 4i(4194288) +``` From 111b1bff031f66c6f56cdc8a105ddc46c7d3ff9b Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Thu, 7 Apr 2016 09:27:32 +0000 Subject: [PATCH 16/25] core/timeline.md: Add 5 cycle estimate w/ disabled --- src/core/timeline.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/core/timeline.md b/src/core/timeline.md index f687de48e4..c2afb86666 100644 --- a/src/core/timeline.md +++ b/src/core/timeline.md @@ -14,11 +14,10 @@ that the log always includes a sample of interesting data for analysis. Logging to the timeline is efficient. Logging costs around 50 cycles -when a message is enabled. The cost of skipping disabled messages is -much less. The logging function is written in assembly language and is -called by LuaJIT via the FFI. (Checking whether log messages are -enabled is invisible to the trace compiler because it is done in the -FFI call.) +when a message is enabled and around 5 cycles when disabled. The +logging function is written in assembly language and is called by +LuaJIT via the FFI. (Checking whether log messages are enabled is +invisible to the trace compiler because it is done in the FFI call.) #### File format and binary representation From b81cbb57e2c6832497f4d21e2a8e683579f10d10 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Mon, 18 Apr 2016 08:57:31 +0000 Subject: [PATCH 17/25] engine: Add timeline events for the breathe cycle The events are currently always enabled. This needs to be optimized. Goal for now is simply to have interesting information in the log. --- src/core/app.lua | 70 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) diff --git a/src/core/app.lua b/src/core/app.lua index 77e225e572..0102bed67e 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -33,6 +33,57 @@ freebits = counter.open("engine/freebits") -- Total packet bits freed (for 10G freebytes = counter.open("engine/freebytes") -- Total packet bytes freed configs = counter.open("engine/configs") -- Total configurations loaded +-- Events + +local tl = require("core.timeline") +timeline = tl.new("engine/timeline") + +local function define_event(level, message) + return tl.define(timeline, 'engine', level, message) +end + +-- Sleeping + +local event_sleep_Hz = define_event('trace', [[sleeping $usec for $Hz +The engine requests that the kernel suspend this process for a period of +microseconds in order to reduce CPU utilization and achieve a fixed +frequency of breaths per second (Hz).]]) + +local event_sleep_idle = define_event('trace', [[sleeping $usec on idle +The engine requests that the kernel suspend this process for a period +of microseconds in order to reduce CPU utilization because idleness +has been detected (a breath in which no packets were processed.)]]) + +local event_wakeup = define_event('trace', [[wakeup from sleep +The engine resumes operation after sleeping voluntarily.]]) + +-- Traffic processing start/stop + +local event_traffic_start = define_event('info', [[start traffic processing +The engine starts the traffic processing loop.]]) + +local event_traffic_stop = define_event('info', [[stop traffic processing +The engine stops the traffic processing loop.]]) + +-- Breath steps + +local event_breath_start = define_event('trace', [[start $breath +The engine starts an iteration of the packet-processing event loop (a +"breath".)]]) + +local event_breath_pulled = define_event('trace', [[pulled input packets +The engine has "pulled" new packets into the event loop for processing.]]) + +local event_breath_pushed = define_event('trace', [[pushed output packets +The engine has "pushed" packets one step through the processing network.]]) + +local event_breath_end = define_event('trace', [[end $breath +The engine completes an iteration of the event loop (a "breath.")]]) + +local event_commit_counters = define_event('trace', [[commit counters +The engine commits the latest counter values to externally visible +shared memory.]]) + -- Breathing regluation to reduce CPU usage when idle by calling usleep(3). -- -- There are two modes available: @@ -77,6 +128,7 @@ local function with_restart (app, method) else method(app) end + app.event() end -- Restart dead apps. @@ -170,6 +222,8 @@ function apply_config_actions (actions, conf) name, tostring(app))) end local zone = app.zone or getfenv(class.new)._NAME or name + local event = define_event('trace', ("called app %s (type: %s)"):format( + name, zone)) app.appname = name app.output = {} app.input = {} @@ -177,6 +231,7 @@ function apply_config_actions (actions, conf) table.insert(new_app_array, app) app_name_to_index[name] = #new_app_array app.zone = zone + app.event = event end function ops.restart (name) ops.stop(name) @@ -231,6 +286,7 @@ end -- Call this to "run snabb switch". function main (options) + event_traffic_start() options = options or {} local done = options.done local no_timers = options.no_timers @@ -246,6 +302,7 @@ function main (options) until done and done() counter.commit() if not options.no_report then report(options.report) end + event_traffic_stop() end local nextbreath @@ -258,14 +315,18 @@ function pace_breathing () nextbreath = nextbreath or monotonic_now local sleep = tonumber(nextbreath - monotonic_now) if sleep > 1e-6 then + event_sleep_Hz(Hz, math.round(sleep*1e6)) C.usleep(sleep * 1e6) monotonic_now = C.get_monotonic_time() + event_wakeup() end nextbreath = math.max(nextbreath + 1/Hz, monotonic_now) else if lastfrees == counter.read(frees) then sleep = math.min(sleep + 1, maxsleep) + event_sleep_idle(sleep) C.usleep(sleep) + event_wakeup() else sleep = math.floor(sleep/2) end @@ -277,6 +338,7 @@ end function breathe () monotonic_now = C.get_monotonic_time() + event_breath_start(counter.read(breaths)) -- Restart: restart dead apps restart_dead_apps() -- Inhale: pull work into the app network @@ -290,6 +352,7 @@ function breathe () zone() end end + event_breath_pulled() -- Exhale: push work out through the app network local firstloop = true repeat @@ -307,12 +370,17 @@ function breathe () progress = true end end + event_breath_pushed() end firstloop = false until not progress -- Stop after no link had new data + event_breath_end(counter.read(breaths)) counter.add(breaths) -- Commit counters at a reasonable frequency - if counter.read(breaths) % 100 == 0 then counter.commit() end + if counter.read(breaths) % 100 == 0 then + counter.commit() + event_commit_counters() + end end function report (options) From 6898d6a144595250671b5ea064601a97eb20cd8e Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 20 Apr 2016 03:24:26 +0000 Subject: [PATCH 18/25] engine: More timeline events for breathe cycle --- src/core/app.lua | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index 0102bed67e..c5ce9bd751 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -80,6 +80,13 @@ The engine has "pushed" packets one step through the processing network.]]) local event_breath_end = define_event('trace', [[end $breath The engine completes an iteration of the event loop (a "breath.")]]) +local event_breath_initialized = define_event('trace', [[initialized breath +The engine has completed initialization for the breath: synchronized +the current time and handled any pending error recovery.]]) + +local event_ran_timers = define_event('trace', [[ran timers +The engine polled its timers and executed any that were expired.]]) + local event_commit_counters = define_event('trace', [[commit counters The engine commits the latest counter values to externally visible shared memory.]]) @@ -297,7 +304,7 @@ function main (options) monotonic_now = C.get_monotonic_time() repeat breathe() - if not no_timers then timer.run() end + if not no_timers then timer.run() event_ran_timers() end if not busywait then pace_breathing() end until done and done() counter.commit() @@ -337,11 +344,12 @@ function pace_breathing () end function breathe () - monotonic_now = C.get_monotonic_time() event_breath_start(counter.read(breaths)) + monotonic_now = C.get_monotonic_time() -- Restart: restart dead apps restart_dead_apps() -- Inhale: pull work into the app network + event_breath_initialized() for i = 1, #app_array do local app = app_array[i] -- if app.pull then @@ -370,8 +378,8 @@ function breathe () progress = true end end - event_breath_pushed() end + event_breath_pushed() firstloop = false until not progress -- Stop after no link had new data event_breath_end(counter.read(breaths)) From ed63a790af34958e500786bcc568af502ec45df7 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Wed, 20 Apr 2016 03:26:08 +0000 Subject: [PATCH 19/25] nfv: Save timeline at end of benchmark-mode --- src/program/snabbnfv/traffic/traffic.lua | 1 + 1 file changed, 1 insertion(+) diff --git a/src/program/snabbnfv/traffic/traffic.lua b/src/program/snabbnfv/traffic/traffic.lua index b944673150..054a6cd790 100644 --- a/src/program/snabbnfv/traffic/traffic.lua +++ b/src/program/snabbnfv/traffic/traffic.lua @@ -132,6 +132,7 @@ function bench (pciaddr, confpath, sockpath, npackets) end engine.main({done = done, no_report = true}) + require("core.timeline").save(engine.timeline, "nfv-timeline") local finish = C.get_monotonic_time() local runtime = finish - start From 7629b0f6b58acbbd6c2d8026c5a4b40c69efee5c Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Tue, 10 May 2016 06:31:02 +0000 Subject: [PATCH 20/25] core.timeline: Update for new core.shm API Use shm.create() instead of shm.map(). Required after update in 9fe397ef5cb77cf7a4b3070678a377eedec4449f. --- src/core/timeline.dasl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index a7805886b4..09b9478b6b 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -57,7 +57,7 @@ function new (shmpath, entries, size_stringtable) local size_entries = entries * ffi.sizeof("struct timeline_entry") local size = size_header + size_entries + size_stringtable -- Allocate one shm object with memory for all data structures - local memory = shm.map(shmpath, ffi.typeof("char["..size.."]")) + local memory = shm.create(shmpath, ffi.typeof("char["..size.."]")) local header = ffi.cast("struct timeline_header *", memory) local ring = ffi.cast("struct timeline_entry *", memory + size_header) local stringtable = ffi.cast("char*", memory + size_header + size_entries) From 9746f245817e90198d9dc854e6b88709d392d8de Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Tue, 10 May 2016 08:49:11 +0000 Subject: [PATCH 21/25] core.shm: $SNABB_SHM_ROOT overrides /var/run/snabb --- src/core/shm.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/shm.lua b/src/core/shm.lua index 77b5a93917..64f34cb5eb 100644 --- a/src/core/shm.lua +++ b/src/core/shm.lua @@ -70,7 +70,7 @@ local S = require("syscall") local const = require("syscall.linux.constants") -- Root directory where the object tree is created. -root = "/var/run/snabb" +root = os.getenv("SNABB_SHM_ROOT") or "/var/run/snabb" path = "" -- Table (address->size) of all currently mapped objects. From cb66e3469e3e8c25873b0d0f933433edb46155a1 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Tue, 10 May 2016 12:06:26 +0000 Subject: [PATCH 22/25] engine: Randomize timeline priority Sample different priorities (log levels) on different breaths. Try to always include interesting details in the log but without wrapping too quickly. --- src/core/app.lua | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index c66482ff7d..44e92cffa0 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -68,9 +68,14 @@ The engine stops the traffic processing loop.]]) -- Breath steps -local event_breath_start = define_event('trace', [[start $breath +local event_breath_start = define_event('info', [[start $breath ($freedpackets $freedbits) The engine starts an iteration of the packet-processing event loop (a -"breath".)]]) +"breath".) + +The 'freedpackets' and 'freedbits' arguments give the total number of +packets (and bits of packet data) that have been freed during +processing. This is an approximation of the total amount of data that +has been processed.]]) local event_breath_pulled = define_event('trace', [[pulled input packets The engine has "pulled" new packets into the event loop for processing.]]) @@ -352,7 +357,7 @@ function pace_breathing () end function breathe () - event_breath_start(counter.read(breaths)) + event_breath_start(counter.read(breaths), counter.read(frees), counter.read(freebits)) monotonic_now = C.get_monotonic_time() -- Restart: restart dead apps restart_dead_apps() @@ -397,6 +402,14 @@ function breathe () counter.commit() event_commit_counters() end + -- Sample events with dynamic priorities. + -- Lower priorities are enabled 1/10th as often as the one above. + local r = math.random() + if r < 0.001 then tl.priority(timeline, 'packet') + elseif r < 0.010 then tl.priority(timeline, 'app') + elseif r < 0.100 then tl.priority(timeline, 'trace') + else tl.priority(timeline, 'info') + end end function report (options) From 56d0d087bba5a9e4b35ec13e805016998fb38e1d Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Tue, 10 May 2016 12:18:38 +0000 Subject: [PATCH 23/25] engine: Revise start_breath / end_breath events end breath now reports number of packets and bits freed. --- src/core/app.lua | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/src/core/app.lua b/src/core/app.lua index 44e92cffa0..ed3e07e468 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -68,14 +68,14 @@ The engine stops the traffic processing loop.]]) -- Breath steps -local event_breath_start = define_event('info', [[start $breath ($freedpackets $freedbits) +local event_breath_start = define_event('info', [[start $breath: total $packets $bytes $etherbits The engine starts an iteration of the packet-processing event loop (a "breath".) -The 'freedpackets' and 'freedbits' arguments give the total number of -packets (and bits of packet data) that have been freed during -processing. This is an approximation of the total amount of data that -has been processed.]]) +The total count of packets, bytes, and bits (including layer-1 +ethernet overhead) that the engine has processed are included. These +can be used to track the rate of traffic processing. +]]) local event_breath_pulled = define_event('trace', [[pulled input packets The engine has "pulled" new packets into the event loop for processing.]]) @@ -83,8 +83,16 @@ The engine has "pulled" new packets into the event loop for processing.]]) local event_breath_pushed = define_event('trace', [[pushed output packets The engine has "pushed" packets one step through the processing network.]]) -local event_breath_end = define_event('trace', [[end $breath -The engine completes an iteration of the event loop (a "breath.")]]) +local event_breath_end = define_event('info', [[end $breath: processed $packets $bpp +The engine completes an iteration of the event loop (a "breath.") + +'packets' gives the number of packets that were processed during the breath. +'bpp' gives the average number of bytes per packet. + +Note: 'packets' is an internal measure of how many packets were +deallocated (freed) during processing. This does not necessarily +correspond directly to ingress or egress packets on a given interface. +]]) local event_breath_initialized = define_event('trace', [[initialized breath The engine has completed initialization for the breath: synchronized @@ -357,7 +365,10 @@ function pace_breathing () end function breathe () - event_breath_start(counter.read(breaths), counter.read(frees), counter.read(freebits)) + local freed_packets0 = counter.read(frees) + local freed_bytes0 = counter.read(freebytes) + event_breath_start(counter.read(breaths), freed_packets0, freed_bytes0, + counter.read(freebits)) monotonic_now = C.get_monotonic_time() -- Restart: restart dead apps restart_dead_apps() @@ -395,7 +406,11 @@ function breathe () event_breath_pushed() firstloop = false until not progress -- Stop after no link had new data - event_breath_end(counter.read(breaths)) + local freed + local freed_packets = counter.read(frees) - freed_packets0 + local freed_bytes = (counter.read(freebytes) - freed_bytes0) + local freed_bytes_per_packet = freed_bytes / math.max(freed_packets, 1) + event_breath_end(counter.read(breaths), freed_packets, freed_bytes_per_packet) counter.add(breaths) -- Commit counters at a reasonable frequency if counter.read(breaths) % 100 == 0 then @@ -405,10 +420,11 @@ function breathe () -- Sample events with dynamic priorities. -- Lower priorities are enabled 1/10th as often as the one above. local r = math.random() - if r < 0.001 then tl.priority(timeline, 'packet') - elseif r < 0.010 then tl.priority(timeline, 'app') - elseif r < 0.100 then tl.priority(timeline, 'trace') - else tl.priority(timeline, 'info') + if r < 0.00001 then tl.priority(timeline, 'packet') + elseif r < 0.00010 then tl.priority(timeline, 'app') + elseif r < 0.00100 then tl.priority(timeline, 'trace') + elseif r < 0.01000 then tl.priority(timeline, 'info') + else tl.priority(timeline, 'warning') end end From 9501c605069af916e3a40d459b35df3ce43c29e5 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Fri, 22 Jul 2016 08:24:28 +0000 Subject: [PATCH 24/25] core/timeline: dump() now prints first-to-last The timeline text dump now formats the log starting from the first message and continuing towards the last. The previous behavior was to print the log backwards but this was confusing and ill-considered. --- src/core/timeline.dasl | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/core/timeline.dasl b/src/core/timeline.dasl index 09b9478b6b..be0caf8a6e 100644 --- a/src/core/timeline.dasl +++ b/src/core/timeline.dasl @@ -142,18 +142,22 @@ function dump (filename, maxentries) local size = last + 1 -- Print column headers. -- Note: "-cycles" column is time delta between entries (moving backwards in time) - print(("%4s %4s %12s %-16s %s"):format("numa", "core", "-cycles", "category", "message")) - -- "Fast-forward" to the end (when following entry has earlier timestamp) - local index = 0 - while ring[index].tsc <= ring[(index+1) % size].tsc do - index = (index+1) % size + print(("%4s %4s %12s %-16s %s"):format("numa", "core", "cycles", "category", "message")) + -- Search for the first entry i.e. with lowest timestamp + local index, stamp = 0, ring[0].tsc + for i = 1, last do + if ring[i].tsc ~= 0 and ring[i].tsc < stamp then + stamp = ring[i].tsc + index = i + end end - -- "Rewind" printing messages + -- Print messages from first to last local ref_tsc = nil for i = 0, size-1 do local previndex = (index == 0) and last or index-1 if maxentries and i > maxentries then break end - local timedelta = tonumber(ring[index].tsc - ring[previndex].tsc) + if ring[index].tsc == 0 then break end -- end of log + local timedelta = (i == 0) and '' or tonumber(ring[index].tsc - ring[previndex].tsc) local msgid = ring[index].msg_id local icategory = bit.band(0xffff, bit.rshift(msgid, 40)) local imessage = bit.band(0xffff, bit.rshift(msgid, 16)) @@ -174,7 +178,7 @@ function dump (filename, maxentries) print(("%-4d %-4d %12s %-16s %s"):format( numa, core, timedelta, category, message)) ref_tsc = ring[index].tsc - index = (index == 0) and last or index-1 + index = (index == last) and 0 or index+1 end end From 56eda3d8af9317005a92a8d138356eb2a6d970f1 Mon Sep 17 00:00:00 2001 From: Luke Gorrie Date: Fri, 22 Jul 2016 08:36:36 +0000 Subject: [PATCH 25/25] core/app.lua: Fix end_breath timeline event arg Requires a math.max() on an argument that is otherwise FFI cdata. --- src/core/app.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/app.lua b/src/core/app.lua index cb71975784..18ee647495 100644 --- a/src/core/app.lua +++ b/src/core/app.lua @@ -435,7 +435,7 @@ function breathe () local freed local freed_packets = counter.read(frees) - freed_packets0 local freed_bytes = (counter.read(freebytes) - freed_bytes0) - local freed_bytes_per_packet = freed_bytes / math.max(freed_packets, 1) + local freed_bytes_per_packet = freed_bytes / math.max(tonumber(freed_packets), 1) event_breath_end(counter.read(breaths), freed_packets, freed_bytes_per_packet) counter.add(breaths) -- Commit counters at a reasonable frequency