-
Notifications
You must be signed in to change notification settings - Fork 1
/
dstat_plot.rb
executable file
·290 lines (241 loc) · 9.63 KB
/
dstat_plot.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
#! /usr/bin/ruby
require 'gnuplot'
require 'csv'
require 'optparse'
'''
dstat_plot
plots csv data generated by dstat
'''
$verbose = false
Y_DEFAULT = 105.0
def plot(dataset_container, category, field, dry, filename)
Gnuplot.open do |gp|
Gnuplot::Plot.new(gp) do |plot|
plot.title dataset_container[:plot_title].gsub('_', '\\\\\\\\_')
plot.xlabel 'Time in seconds'
plot.ylabel "#{category}: #{field}"
plot.yrange "[0:#{dataset_container[:y_max] * 1.05}]"
plot.set 'autoscale' if dataset_container[:autoscale]
plot.key 'out vert right top'
unless dry
format = filename.split('.')[-1]
plot.terminal format + ' size 1600,800 enhanced font "Helvetica,11"'
plot.output filename
puts "Saving plot to '#{filename}'"
end
plot.data = dataset_container[:datasets]
end
end
end
def generate_filename(output, column, category, field, target_dir)
generated_filename = column ? "dstat-column#{column}.png" : "#{category}-#{field}.png".sub("/", "_")
if output
File.directory?(output) ? File.join(output, generated_filename) : output
else
File.join(target_dir, generated_filename)
end
end
# Calculate the average of groups of values from data
# Params:
# +data:: Array containing the data
# +slice_size:: number of values each group of data should contain
def average(data, slice_size)
reduced_data = []
data.each_slice(slice_size) do |slice|
reduced_data.push(slice.reduce(:+) / slice.size)
end
reduced_data
end
# Preprocesses the data contained in all datasets in the dataset_container
# Groups of values are averaged with respect to timecode and actual data
# Params:
# +dataset_container:: Hash that holds the datasets and further information
# +slice_size:: size of the group that averages are calculated from
def data_preprocessing(dataset_container, slice_size)
dataset_container[:datasets].each do |dataset|
timecode = dataset.data[0]
reduced_timecode = average(timecode, slice_size)
values = dataset.data[1].map { |value| value.to_f }
reduced_values = average(values, slice_size)
dataset.data = [reduced_timecode, reduced_values]
end
end
# Create the GnuplotDataSet that is going to be printed.
# Params:
# +timecode:: Array containing the timestamps
# +values:: Array containing the actual values
# +no_plot_key:: boolean to de-/activate plotkey
# +smooth:: nil or smoothing algorithm
# +file:: file
def create_gnuplot_dataset(timecode, values, no_plot_key, smooth, file)
Gnuplot::DataSet.new([timecode, values]) do |gp_dataset|
gp_dataset.with = "lines"
gp_dataset.title = (File.basename file).gsub('_', '\\_')
gp_dataset.notitle if no_plot_key
gp_dataset.smooth = smooth unless smooth.nil?
end
end
def create_plot_title(prefix, smooth, inversion, csv_header)
plot_title = "#{prefix} over time"
plot_title << " (smoothing: #{smooth})" if smooth
if csv_header[2].index("Host:")
plot_title << '\n' + "(Host: #{csv_header[2][1]} User: #{csv_header[2][6]} Date: #{csv_header[3].last})"
end
plot_title << '\n(inverted)' if inversion
plot_title
end
def index_valid?(parameter, name, allowed)
if parameter.nil?
puts "'#{parameter}' is not a valid parameter for '#{name}'."
puts "Allowed #{name}s: #{allowed.inspect}"
exit 0
else
true
end
end
def translate_to_column(category, field, csv)
category_index = csv[5].index category
index_valid?(category_index, "category", csv[5].compact)
field_index = csv[6].drop(category_index).index field
index_valid?(field_index, "field", csv[6].compact)
if $verbose then puts "'#{category}-#{field}' was translated to #{category_index + field_index}." end
column = category_index + field_index
end
# returns the values from a csv file
def read_data_from_csv(files, category, field, column, no_plot_key, y_max, inversion, title, smooth)
plot_title = nil
datasets = []
autoscale = false
overall_max = y_max.nil? ? Y_DEFAULT : y_max
files.each do |file|
csv = CSV.read(file)
prefix = column ? "dstat-column #{column}" : "#{category}-#{field}"
if $verbose then puts "Reading from csv to get #{prefix}." end
if plot_title.nil? # this only needs to be done for the first file
plot_title = title ? title : create_plot_title(prefix, smooth, inversion != 0.0, csv[0..6])
end
if csv[2].index 'Host:'
csv = csv.drop(7)
end
begin
csv = csv.transpose
rescue IndexError => e
puts 'ERROR: It appears that your csv file is malformed. Check for incomplete lines, empty lines etc.'
puts e.backtrace[0] + e.message
exit
end
timecode = csv[0].map { |timestamp| timestamp.to_f - csv[0].first.to_f }
values = csv[column]
if inversion != 0.0
values.map! { |value| (value.to_f - inversion).abs }
overall_max = inversion
end
if y_max.nil?
local_maximum = values.max { |a, b| a.to_f <=> b.to_f }.to_f
if local_maximum > overall_max then overall_max = local_maximum end
end
dataset = create_gnuplot_dataset(timecode, values, no_plot_key, smooth, file)
datasets.push dataset
end
if $verbose then puts "datasets: #{datasets.count} \nplot_title: #{plot_title} \ny_max: #{y_max} \nautoscale: #{autoscale}" end
{ datasets: datasets, plot_title: plot_title, y_max: overall_max, autoscale: autoscale }
end
def read_options_and_arguments
opts = {} # Hash that holds all the options
optparse = OptionParser.new do |parser|
# banner that is displayed at the top
parser.banner = "Usage: \b
dstat_plot.rb [options] -c CATEGORY -f FIELD [directory | file1 file2 ...] or \b
dstat_plot.rb [options] -l COLUMN [directory | file1 file2 ...]\n\n"
### options and what they do
parser.on('-v', '--verbose', 'Output more information') do
$verbose = true
end
opts[:inversion] = 0.0
parser.on('-i', '--invert [VALUE]', Float, 'Invert the graph such that inverted(x) = VALUE - f(x),', 'default is 100.') do |value|
opts[:inversion] = value.nil? ? 100.0 : value
end
parser.on('-n', '--no-key', 'No plot key is printed.') do
opts[:no_plot_key] = true
end
parser.on('-d', '--dry', 'Dry run. Plot is not saved to file but instead displayed with gnuplot.') do
opts[:dry] = true
end
parser.on('-o', '--output FILE|DIR', 'File or Directory that plot should be saved to. ' \
'If a directory is given', 'the filename will be generated. Default is csv file directory.') do |path|
opts[:output] = path
end
parser.on('-y', '--y-range RANGE', Float, 'Sets the y-axis range. Default is 105. ' \
'If a value exceeds this range,', '"autoscale" is enabled.') do |range|
opts[:y_max] = range
end
parser.on('-t', '--title TITLE', 'Override the default title of the plot.') do |title|
opts[:title] = title
end
parser.on('-s', '--smoothing ALGORITHM', 'Smoothes the graph using the given algorithm.') do |algorithm|
algorithms = %w(unique frequency cumulative cnormal kdensity unwrap csplines acsplines mcsplines bezier sbezier)
if algorithms.index(algorithm)
opts[:smooth] = algorithm
else
puts "#{algorithm} is not a valid option as an algorithm."
exit
end
end
parser.on('-a', '--average-over SLICE_SIZE', Integer, 'Calculates the everage for slice_size large groups of values.', "\n") do |slice_size|
opts[:slice_size] = slice_size
end
parser.on('-c', '--category CATEGORY', 'Select the category.') do |category|
opts[:category] = category
end
parser.on('-f', '--field FIELD' , 'Select the field.') do |field|
opts[:field] = field
end
parser.on('-l', '--column COLUMN', 'Select the desired column directly.', "\n") do |column|
unless opts[:category] && opts[:field] # -c and -f override -l
opts[:column] = column.to_i
end
end
# This displays the help screen
parser.on_tail('-h', '--help', 'Display this screen.' ) do
puts parser
exit
end
end
# there are two forms of the parse method. 'parse'
# simply parses ARGV, while 'parse!' parses ARGV
# and removes all options and parameters found. What's
# left is the list of files
optparse.parse!
if $verbose then puts "opts: #{opts.inspect}" end
if opts[:category].nil? || opts[:field].nil?
if opts[:column].nil?
puts "[Error] (-c CATEGORY and -f FIELD) or (-l COLUMN) are mandatory parameters.\n\n #{optparse}"
exit
end
end
# if ARGV is empty at this point no directory or file(s) is specified
# and the current working directory is used
if ARGV.empty? then ARGV.push '.' end
files = []
if File.directory?(ARGV.last)
opts[:target_dir] = ARGV.last.chomp('/') # cuts of "/" from the end if present
files = Dir.glob "#{opts[:target_dir]}/*.csv"
files = files.sort
else
opts[:target_dir] = File.dirname ARGV.first
ARGV.each { |filename| files.push filename }
end
puts "Plotting data from #{files.count} file(s)."
opts[:files] = files
if $verbose then puts "files: #{files.count} #{files.inspect}" end
# opts = { :inversion, :no_plot_key, :dry, :output, :y_max, :title, :category, :field, :column, :target_dir, :files }
opts
end
if __FILE__ == $0
opts = read_options_and_arguments
dataset_container = read_data_from_csv(opts[:files],opts[:category], opts[:field], opts[:column],
opts[:no_plot_key], opts[:y_max], opts[:inversion], opts[:title], opts[:smooth])
data_preprocessing(dataset_container, opts[:slice_size]) unless opts[:slice_size].nil?
filename = generate_filename(opts[:output], opts[:column], opts[:category], opts[:field], opts[:target_dir])
plot(dataset_container, opts[:category], opts[:field], opts[:dry], filename)
end