Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add hotspot analysis using perf #8

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 54 additions & 32 deletions harness.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from yaml import CLoader
import argparse
import subprocess
from subprocess import PIPE
import os
import shutil
import time
Expand All @@ -33,13 +34,16 @@
import optviewer
import optdiff

def invoke_optviewer(filelist, output_html_dir, jobs, print_progress):
from hotness import *


def invoke_optviewer(filelist, output_html_dir, jobs, print_progress, builds=[]):
all_remarks, file_remarks, should_display_hotness = \
optrecord.gather_results(
filelist, # filelist
1, # num jobs
print_progress) # print progress

print('generating opt viewers for builds', builds)
optviewer.map_remarks(all_remarks)

optviewer.generate_report(all_remarks,
Expand All @@ -50,7 +54,9 @@ def invoke_optviewer(filelist, output_html_dir, jobs, print_progress):
should_display_hotness,
100, # max hottest remarks in index
1, # number of jobs
print_progress) # print progress
print_progress, # print progress
builds, )


def invoke_optdiff(yaml_file_1, yaml_file_2, filter_only, out_yaml):
optdiff.generate_diff(
Expand All @@ -62,11 +68,12 @@ def invoke_optdiff(yaml_file_1, yaml_file_2, filter_only, out_yaml):
100000, # max remarks
out_yaml) # output yaml

def run(config, program, reps, dry):
def run(config, program, reps, dry, with_perf):
print('Launching program', program, 'with modes', config[program]['build'])
exe = config[program]['run'] + ' ' + config[program]['input']
os.makedirs( './results', exist_ok=True)
results = { program: {} }
perf_command = 'perf record --freq=100000 -o perf.data' if with_perf else ''
exe = config[program]['env'] + ' ' + perf_command + ' ' + config[program]['run'] + ' ' + config[program]['input']
os.makedirs('./results', exist_ok=True)
results = {program: {}}
try:
with open('./results/results-%s.yaml'%(program), 'r') as f:
results = yaml.load(f, Loader=CLoader)
Expand Down Expand Up @@ -97,18 +104,21 @@ def run(config, program, reps, dry):
for i in range(start, reps):
print('path', bin_dir, 'exe',exe)
t1 = time.perf_counter()
p = subprocess.run( exe, capture_output=True, cwd=bin_dir, shell=True )
out = str(p.stdout.decode('utf-8'))
err = str(p.stderr.decode('utf-8'))
# p = subprocess.run( exe, capture_output=True, cwd=bin_dir, shell=True )
p = subprocess.run(exe, cwd=bin_dir, shell=True, stdout=PIPE, stderr=PIPE)
out = str(p.stdout.decode('utf-8', errors='ignore'))
err = str(p.stderr.decode('utf-8', errors='ignore'))
# out=str(p.stdout)
# err=str(p.stderr)
output = out + err
print(output)
#print('Out', p.stdout.decode('utf-8') )
#print('Err', p.stderr.decode('utf-8') )
with open('%s/stdout-%d.txt'%(bin_dir, i), 'w') as f:
f.write(p.stdout.decode('utf-8'));
with open('%s/stderr-%d.txt'%(bin_dir, i), 'w') as f:
f.write(p.stderr.decode('utf-8'));

# print('Out', p.stdout.decode('utf-8') )
# print('Err', p.stderr.decode('utf-8') )
with open('%s/stdout-%d.txt' % (bin_dir, i), 'w') as f:
f.write(p.stdout.decode('latin-1', errors='replace'));
with open('%s/stderr-%d.txt' % (bin_dir, i), 'w') as f:
f.write(p.stderr.decode('latin-1', errors='replace'));
output = ''
if p.returncode != 0:
print('ERROR running', program, 'in', mode)
sys.exit(p.returncode)
Expand All @@ -132,6 +142,15 @@ def run(config, program, reps, dry):

with open('./results/results-%s.yaml'%(program), 'w') as f:
yaml.dump( results, f )
# if we run with perf, we generate the report
if with_perf:
hotlines = get_hot_lines_percentage(config[program]['bin'], bin_dir)
reports_dir = './reports/' + program
lines_hotness_path = os.path.join(reports_dir, '{}.lines_hotness.yaml'.format(mode))
print('WRITING HOTNESS OF SRC CODE LINES TO:', lines_hotness_path)
with open(lines_hotness_path, 'w') as f:
yaml.dump(hotlines, f)


def show_stats(config, program):
try:
Expand Down Expand Up @@ -183,10 +202,6 @@ def merge_stats_reports( program, build_dir, mode ):

with open(reports_dir + mode + '.opt.yaml', 'r') as f:
data = yaml.load_all(f, Loader=CLoader)
print('==== data')
#for d in data:
# print(d)
input('==== end of data')

# merge stats
filenames = Path(build_dir).rglob('*.stats')
Expand Down Expand Up @@ -218,7 +233,6 @@ def compile_and_install(config, program, repo_dir, mode):
subprocess.run( config[program]['build'][mode], cwd=build_dir, shell=True )
except Exception as e:
print('building %s mode %s failed'%(program, mode), e)
input('key...')
sys.exit(1)

print('Merge stats and reports...')
Expand All @@ -232,9 +246,10 @@ def compile_and_install(config, program, repo_dir, mode):
shutil.copy( build_dir + '/' + copy, bin_dir)


def generate_diff_reports( report_dir, builds, mode ):
def generate_diff_reports(report_dir, builds, mode, with_perf):
out_yaml = report_dir + '%s-%s-%s.opt.yaml'%( builds[0], builds[1], mode )
output_html_dir = report_dir + 'html-%s-%s-%s'%( builds[0], builds[1], mode )
build_for_hotness = builds if with_perf else []

def generate_diff_yaml():
print('Creating diff remark YAML files...')
Expand All @@ -260,7 +275,8 @@ def generate_diff_html():
[out_yaml],
output_html_dir,
1,
True)
True,
build_for_hotness)
print('Done generating compilation report for builds %s|%s mode %s'%( builds[0], builds[1], mode ))
except:
print('Failed generating compilation report for builds %s|%s mode %s'%( builds[0], builds[1], mode ))
Expand All @@ -279,17 +295,20 @@ def generate_diff_html():
else:
generate_diff_html()

def generate_remark_reports( config, program ):

def generate_remark_reports(config, program, with_perf):
report_dir = './reports/' + program + '/'

def generate_html():
print('Creating HTML report output for build %s ...' % ( build ) )
build_for_hotness = [build] if with_perf else []
try:
invoke_optviewer(
[in_yaml],
output_html_dir,
1,
True)
True,
build_for_hotness)
print('Done generating compilation reports!')
except:
print('Failed generating compilation reports (expects build was '\
Expand All @@ -309,10 +328,11 @@ def generate_html():
# Create repors for 2-combinations of build options.
combos = itertools.combinations( config[program]['build'], 2 )
for builds in combos:
generate_diff_reports( report_dir, builds, 'all' )
generate_diff_reports( report_dir, builds, 'analysis' )
generate_diff_reports( report_dir, builds, 'missed' )
generate_diff_reports( report_dir, builds, 'passed' )
generate_diff_reports( report_dir, builds, 'all', with_perf )
generate_diff_reports( report_dir, builds, 'analysis', with_perf )
generate_diff_reports( report_dir, builds, 'missed', with_perf )
generate_diff_reports( report_dir, builds, 'passed', with_perf )


def fetch(config, program):
# directories
Expand Down Expand Up @@ -345,6 +365,7 @@ def main():
parser.add_argument('-s', '--stats', dest='stats', action='store_true', help='show run statistics')
parser.add_argument('-d', '--dry-run', dest='dry', action='store_true', help='enable dry run')
parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='verbose printing')
parser.add_argument('-pc', '--perf', dest='perf', action='store_true', help='use perf')
args = parser.parse_args()

with open(args.input, 'r') as f:
Expand All @@ -358,6 +379,7 @@ def main():
print('args.build', args.build)
print('args.run', args.run)
print('args.generate', args.generate)
print('args.perf', args.perf)

programs = []
if args.programs:
Expand All @@ -374,9 +396,9 @@ def main():
if args.build:
build( config, p )
if args.run:
run( config, p, args.run, args.dry )
run( config, p, args.run, args.dry, args.perf )
if args.generate:
generate_remark_reports( config, p )
generate_remark_reports( config, p, args.perf )
if args.stats:
show_stats( config, p)

Expand Down
118 changes: 118 additions & 0 deletions hotness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
import subprocess
from subprocess import PIPE
import os


# reads a perf report that is a product of the following command:
# perf report -b --sort symbol
# this report contains the symbols sorted by their % of usage
# it outputs a dict that has keys as symbols and vals as % of usage
def get_hot_symbols(report_path):
symbols_usage = {}
with open(report_path) as report_file:
for line in report_file:
if line[0] == '#': # skip the header
continue

if line.strip() == '': # skip empty lines
continue
# print(line)
words = line.strip().split()
percentage = words[0]
symbol = ' '.join(words[3:])

percentage = float(percentage[:-1]) # remove % and convert to float
symbols_usage[symbol] = percentage

return symbols_usage


# read perf annotate -P -l symbol and get hot lines in src file
# hot lines are lines that have more than 0.5%(0.005) of execution time of the function
# the function outputs a dict with key "srcfile:line" and value of percentage of time
def get_hotness_from_anno_file(anno_path, hotlines={}, symbol_percentage=100):
skip_keywords = ['Sorted summary for file', '----------------------------']
with open(anno_path) as anno_file:
for line in anno_file:
if line[0] == '#': # skip the header
continue

if line.strip() == '': # skip empty lines
continue

if 'Percent | Source code & Disassembly of' in line: # we only capture src code and terminate before disassembly code
break

# skip predefined lines
skip_line = False
for skip in skip_keywords:
if skip in line:
skip_line = True
# we cant use continue above because it will escape the inner loop
if skip_line:
continue

# print(line)
words = line.strip().split()
percentage = float(words[0])
srccode = ' '.join(words[1:])
line_hotness = round(percentage * symbol_percentage / 100, 3)
if srccode in hotlines:
hotlines[srccode] += line_hotness
else:
hotlines[srccode] = line_hotness

return hotlines


# @TODO add cwd as a , ALSO ADD relative and absolute percentages
# return the hotlines in the secfile of a symbol. Return only lines with usage time 0.5% or more
def get_symbol_hotness_in_srcfiles(symbol, symbol_percentage, hotlines={}, cwd=''):
# create annotation file of the symbol
annotation_file_name = "perf-annotate.tmp"
exe = "perf annotate {} -P -l > {}".format(symbol, annotation_file_name)
print("executing command: {}".format(exe))
p = subprocess.run(exe, cwd=cwd, shell=True, stdout=PIPE, stderr=PIPE)
out = str(p.stdout.decode('utf-8', errors='ignore'))
err = str(p.stderr.decode('utf-8', errors='ignore'))
print(out, '\n\n', err)
annotation_file_name = os.path.join(cwd, annotation_file_name)
hotlines = get_hotness_from_anno_file(annotation_file_name, hotlines=hotlines, symbol_percentage=symbol_percentage)
return hotlines


# generate report from perf data and return the hot symbols with their percentages
def get_hot_symbols_from_perf_data(binfile, perf_data_file='perf.data', cwd=''):
report_file_name = "perf-report.tmp"
exe = 'perf report --no-child -d {} -i {} --percentage "relative" > {}'.format(binfile, perf_data_file,
report_file_name)
print("executing command: {}".format(exe))
p = subprocess.run(exe, cwd=cwd, shell=True, stdout=PIPE, stderr=PIPE)
out = str(p.stdout.decode('utf-8', errors='ignore'))
err = str(p.stderr.decode('utf-8', errors='ignore'))
print(out, '\n\n', err)
report_file_name = os.path.join(cwd, report_file_name)
hot_symbols = get_hot_symbols(report_file_name)
return hot_symbols


def get_hot_lines_percentage(binfile, cwd):
symbols = get_hot_symbols_from_perf_data(binfile, cwd=cwd)
print(symbols)
print('\n\n\n\n\n\n\n')
hotlines = {}
for symbol in symbols:
# hotlines=get_hotness_from_anno_file('trial')
# skip symbols that are not in the main app
if '@' in symbol:
continue
symbol_percentage = symbols[symbol]
hotlines = get_symbol_hotness_in_srcfiles(symbol, symbol_percentage, hotlines=hotlines, cwd=cwd)

return hotlines


if __name__ == "__main__":
'''hotlines=get_hot_lines_percentage('lulesh2.0')
for key in hotlines:
print("FILE PATH:{}\tPERCENTAGE:{}%".format(key,round(hotlines[key],3)))'''
Loading