Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Timeline with engine instrumentation #973

Open
wants to merge 29 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
82b68bc
dasm_x86: Added RDTSCP instruction to Lua-mode dynasm
lukego Mar 30, 2016
5c4dabd
core.timeline: High-resolution event log [wip]
lukego Mar 30, 2016
0acf545
core.timeline: Wrote draft documentation in core/timeline.md
lukego Apr 6, 2016
83d94d8
core/timeline.md: Fixed markdown for diagram
lukego Apr 6, 2016
051b23a
Typo fixes, and a question
sleinen Apr 6, 2016
8fb1020
Merge pull request #7 from sleinen/patch-1
lukego Apr 6, 2016
3568683
core/timeline.md: Improved timeline readme
lukego Apr 6, 2016
bc7f1b0
core/timeline.dasl: New implementation matching API [wip]
lukego Apr 6, 2016
0629dcf
timeline.md: Documented defaults for new()
lukego Apr 6, 2016
c7f4f25
core.timeline: Log CPU core ID + numa node
lukego Apr 7, 2016
2928818
core.timeline: Fix bug in arg parsing
lukego Apr 7, 2016
4182df2
core.timeline: Improve log dumps and selftest
lukego Apr 7, 2016
3047bd6
core.timeline: Implemented updated priority() API
lukego Apr 7, 2016
003f1cb
core.timeline: Fixed and improved dump()
lukego Apr 7, 2016
9b659f9
core.timeline: selftest with disabled events loop
lukego Apr 7, 2016
3214339
core/timeline.md: Added example of dump() output
lukego Apr 7, 2016
111b1bf
core/timeline.md: Add 5 cycle estimate w/ disabled
lukego Apr 7, 2016
b81cbb5
engine: Add timeline events for the breathe cycle
lukego Apr 18, 2016
6898d6a
engine: More timeline events for breathe cycle
lukego Apr 20, 2016
ed63a79
nfv: Save timeline at end of benchmark-mode
lukego Apr 20, 2016
388bbfe
Merge branch 'master' into timeline-engine
lukego May 10, 2016
7629b0f
core.timeline: Update for new core.shm API
lukego May 10, 2016
9697486
Merge branch 'lukego/timeline-engine' into optimize
lukego May 10, 2016
9746f24
core.shm: $SNABB_SHM_ROOT overrides /var/run/snabb
lukego May 10, 2016
cb66e34
engine: Randomize timeline priority
lukego May 10, 2016
56d0d08
engine: Revise start_breath / end_breath events
lukego May 10, 2016
9501c60
core/timeline: dump() now prints first-to-last
lukego Jul 22, 2016
8c461c8
Merge tag 'v2016.07' into 'timeline-detail'
lukego Jul 22, 2016
56eda3d
core/app.lua: Fix end_breath timeline event arg
lukego Jul 22, 2016
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 107 additions & 2 deletions src/core/app.lua
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,77 @@ freebits = counter.open("engine/freebits") -- Total packet bits freed (for 10G
freebytes = counter.open("engine/freebytes") -- Total packet bytes freed
configs = counter.open("engine/configs") -- Total configurations loaded

-- Events

local tl = require("core.timeline")
timeline = tl.new("engine/timeline")

local function define_event(level, message)
return tl.define(timeline, 'engine', level, message)
end

-- Sleeping

local event_sleep_Hz = define_event('trace', [[sleeping $usec for $Hz
The engine requests that the kernel suspend this process for a period of
microseconds in order to reduce CPU utilization and achieve a fixed
frequency of breaths per second (Hz).]])

local event_sleep_idle = define_event('trace', [[sleeping $usec on idle
The engine requests that the kernel suspend this process for a period
of microseconds in order to reduce CPU utilization because idleness
has been detected (a breath in which no packets were processed.)]])

local event_wakeup = define_event('trace', [[wakeup from sleep
The engine resumes operation after sleeping voluntarily.]])

-- Traffic processing start/stop

local event_traffic_start = define_event('info', [[start traffic processing
The engine starts the traffic processing loop.]])

local event_traffic_stop = define_event('info', [[stop traffic processing
The engine stops the traffic processing loop.]])

-- Breath steps

local event_breath_start = define_event('info', [[start $breath: total $packets $bytes $etherbits
The engine starts an iteration of the packet-processing event loop (a
"breath".)

The total count of packets, bytes, and bits (including layer-1
ethernet overhead) that the engine has processed are included. These
can be used to track the rate of traffic processing.
]])

local event_breath_pulled = define_event('trace', [[pulled input packets
The engine has "pulled" new packets into the event loop for processing.]])

local event_breath_pushed = define_event('trace', [[pushed output packets
The engine has "pushed" packets one step through the processing network.]])

local event_breath_end = define_event('info', [[end $breath: processed $packets $bpp
The engine completes an iteration of the event loop (a "breath.")

'packets' gives the number of packets that were processed during the breath.
'bpp' gives the average number of bytes per packet.

Note: 'packets' is an internal measure of how many packets were
deallocated (freed) during processing. This does not necessarily
correspond directly to ingress or egress packets on a given interface.
]])

local event_breath_initialized = define_event('trace', [[initialized breath
The engine has completed initialization for the breath: synchronized
the current time and handled any pending error recovery.]])

local event_ran_timers = define_event('trace', [[ran timers
The engine polled its timers and executed any that were expired.]])

local event_commit_counters = define_event('trace', [[commit counters
The engine commits the latest counter values to externally visible
shared memory.]])

-- Breathing regluation to reduce CPU usage when idle by calling usleep(3).
--
-- There are two modes available:
Expand Down Expand Up @@ -85,6 +156,7 @@ function with_restart (app, method)
else
status, result = true, method(app)
end
app.event()
shm.path = oldshm
return status, result
end
Expand Down Expand Up @@ -189,6 +261,8 @@ function apply_config_actions (actions, conf)
name, tostring(app)))
end
local zone = app.zone or getfenv(class.new)._NAME or name
local event = define_event('trace', ("called app %s (type: %s)"):format(
name, zone))
app.appname = name
app.output = {}
app.input = {}
Expand All @@ -197,6 +271,7 @@ function apply_config_actions (actions, conf)
table.insert(new_app_array, app)
app_name_to_index[name] = #new_app_array
app.zone = zone
app.event = event
end
function ops.restart (name)
ops.stop(name)
Expand Down Expand Up @@ -258,6 +333,7 @@ end

-- Call this to "run snabb switch".
function main (options)
event_traffic_start()
options = options or {}
local done = options.done
local no_timers = options.no_timers
Expand All @@ -275,11 +351,12 @@ function main (options)
monotonic_now = C.get_monotonic_time()
repeat
breathe()
if not no_timers then timer.run() end
if not no_timers then timer.run() event_ran_timers() end
if not busywait then pace_breathing() end
until done and done()
counter.commit()
if not options.no_report then report(options.report) end
event_traffic_stop()
end

local nextbreath
Expand All @@ -292,14 +369,18 @@ function pace_breathing ()
nextbreath = nextbreath or monotonic_now
local sleep = tonumber(nextbreath - monotonic_now)
if sleep > 1e-6 then
event_sleep_Hz(Hz, math.round(sleep*1e6))
C.usleep(sleep * 1e6)
monotonic_now = C.get_monotonic_time()
event_wakeup()
end
nextbreath = math.max(nextbreath + 1/Hz, monotonic_now)
else
if lastfrees == counter.read(frees) then
sleep = math.min(sleep + 1, maxsleep)
event_sleep_idle(sleep)
C.usleep(sleep)
event_wakeup()
else
sleep = math.floor(sleep/2)
end
Expand All @@ -310,10 +391,15 @@ function pace_breathing ()
end

function breathe ()
local freed_packets0 = counter.read(frees)
local freed_bytes0 = counter.read(freebytes)
event_breath_start(counter.read(breaths), freed_packets0, freed_bytes0,
counter.read(freebits))
monotonic_now = C.get_monotonic_time()
-- Restart: restart dead apps
restart_dead_apps()
-- Inhale: pull work into the app network
event_breath_initialized()
for i = 1, #app_array do
local app = app_array[i]
-- if app.pull then
Expand All @@ -324,6 +410,7 @@ function breathe ()
zone()
end
end
event_breath_pulled()
-- Exhale: push work out through the app network
local firstloop = true
repeat
Expand All @@ -342,11 +429,29 @@ function breathe ()
end
end
end
event_breath_pushed()
firstloop = false
until not progress -- Stop after no link had new data
local freed
local freed_packets = counter.read(frees) - freed_packets0
local freed_bytes = (counter.read(freebytes) - freed_bytes0)
local freed_bytes_per_packet = freed_bytes / math.max(tonumber(freed_packets), 1)
event_breath_end(counter.read(breaths), freed_packets, freed_bytes_per_packet)
counter.add(breaths)
-- Commit counters at a reasonable frequency
if counter.read(breaths) % 100 == 0 then counter.commit() end
if counter.read(breaths) % 100 == 0 then
counter.commit()
event_commit_counters()
end
-- Sample events with dynamic priorities.
-- Lower priorities are enabled 1/10th as often as the one above.
local r = math.random()
if r < 0.00001 then tl.priority(timeline, 'packet')
elseif r < 0.00010 then tl.priority(timeline, 'app')
elseif r < 0.00100 then tl.priority(timeline, 'trace')
elseif r < 0.01000 then tl.priority(timeline, 'info')
else tl.priority(timeline, 'warning')
end
end

function report (options)
Expand Down
2 changes: 1 addition & 1 deletion src/core/shm.lua
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ local S = require("syscall")
local const = require("syscall.linux.constants")

-- Root directory where the object tree is created.
root = "/var/run/snabb"
root = os.getenv("SNABB_SHM_ROOT") or "/var/run/snabb"
path = ""

-- Table (address->size) of all currently mapped objects.
Expand Down
Loading