LLNL · m-kahla · Jul 20, 2021
diff --git a/harness.py b/harness.py
@@ -17,6 +17,7 @@
 from yaml import CLoader
 import argparse
 import subprocess
+from subprocess import PIPE
 import os
 import shutil
 import time
@@ -33,13 +34,16 @@
 import optviewer
 import optdiff
 
-def invoke_optviewer(filelist, output_html_dir, jobs, print_progress):
+from hotness import *
+
+
+def invoke_optviewer(filelist, output_html_dir, jobs, print_progress, builds=[]):
     all_remarks, file_remarks, should_display_hotness = \
                 optrecord.gather_results(
                         filelist,           # filelist
                         1,                  # num jobs
                         print_progress)     # print progress
-
+    print('generating opt viewers for builds', builds)
     optviewer.map_remarks(all_remarks)
 
     optviewer.generate_report(all_remarks,
@@ -50,7 +54,9 @@ def invoke_optviewer(filelist, output_html_dir, jobs, print_progress):
                 should_display_hotness,
                 100,                                # max hottest remarks in index
                 1,                                  # number of jobs
-                print_progress)                     # print progress
+                print_progress,                     # print progress
+                builds, )
+
 
 def invoke_optdiff(yaml_file_1, yaml_file_2, filter_only, out_yaml):
     optdiff.generate_diff(
@@ -62,11 +68,12 @@ def invoke_optdiff(yaml_file_1, yaml_file_2, filter_only, out_yaml):
             100000,         # max remarks
             out_yaml)       # output yaml
 
-def run(config, program, reps, dry):
+def run(config, program, reps, dry, with_perf):
     print('Launching program', program, 'with modes', config[program]['build'])
-    exe = config[program]['run'] + ' ' + config[program]['input']
-    os.makedirs( './results', exist_ok=True)
-    results = { program: {} }
+    perf_command = 'perf record --freq=100000 -o perf.data' if with_perf else ''
+    exe = config[program]['env'] + ' ' + perf_command + ' ' + config[program]['run'] + ' ' + config[program]['input']
+    os.makedirs('./results', exist_ok=True)
+    results = {program: {}}
     try:
         with open('./results/results-%s.yaml'%(program), 'r') as f:
             results = yaml.load(f, Loader=CLoader)
@@ -97,18 +104,21 @@ def run(config, program, reps, dry):
             for i in range(start, reps):
                 print('path', bin_dir, 'exe',exe)
                 t1 = time.perf_counter()
-                p = subprocess.run( exe, capture_output=True, cwd=bin_dir, shell=True )
-                out = str(p.stdout.decode('utf-8'))
-                err = str(p.stderr.decode('utf-8'))
+                # p = subprocess.run( exe, capture_output=True, cwd=bin_dir, shell=True )
+                p = subprocess.run(exe, cwd=bin_dir, shell=True, stdout=PIPE, stderr=PIPE)
+                out = str(p.stdout.decode('utf-8', errors='ignore'))
+                err = str(p.stderr.decode('utf-8', errors='ignore'))
+                # out=str(p.stdout)
+                # err=str(p.stderr)
                 output = out + err
                 print(output)
-                #print('Out', p.stdout.decode('utf-8') )
-                #print('Err', p.stderr.decode('utf-8') )
-                with open('%s/stdout-%d.txt'%(bin_dir, i), 'w') as f:
-                    f.write(p.stdout.decode('utf-8'));
-                with open('%s/stderr-%d.txt'%(bin_dir, i), 'w') as f:
-                    f.write(p.stderr.decode('utf-8'));
-
+                # print('Out', p.stdout.decode('utf-8') )
+                # print('Err', p.stderr.decode('utf-8') )
+                with open('%s/stdout-%d.txt' % (bin_dir, i), 'w') as f:
+                    f.write(p.stdout.decode('latin-1', errors='replace'));
+                with open('%s/stderr-%d.txt' % (bin_dir, i), 'w') as f:
+                    f.write(p.stderr.decode('latin-1', errors='replace'));
+                output = ''
                 if p.returncode != 0:
                     print('ERROR running', program, 'in', mode)
                     sys.exit(p.returncode)
@@ -132,6 +142,15 @@ def run(config, program, reps, dry):
 
                 with open('./results/results-%s.yaml'%(program), 'w') as f:
                     yaml.dump( results, f )
+        # if we run with perf, we generate the report
+        if with_perf:
+            hotlines = get_hot_lines_percentage(config[program]['bin'], bin_dir)
+            reports_dir = './reports/' + program
+            lines_hotness_path = os.path.join(reports_dir, '{}.lines_hotness.yaml'.format(mode))
+            print('WRITING HOTNESS OF SRC CODE LINES TO:', lines_hotness_path)
+            with open(lines_hotness_path, 'w') as f:
+                yaml.dump(hotlines, f)
+
 
 def show_stats(config, program):
     try:
@@ -183,10 +202,6 @@ def merge_stats_reports( program, build_dir, mode ):
 
     with open(reports_dir + mode + '.opt.yaml', 'r') as f:
         data = yaml.load_all(f, Loader=CLoader)
-        print('==== data')
-        #for d in data:
-        #    print(d)
-        input('==== end of data')
 
     # merge stats
     filenames = Path(build_dir).rglob('*.stats')
@@ -218,7 +233,6 @@ def compile_and_install(config, program, repo_dir, mode):
         subprocess.run( config[program]['build'][mode], cwd=build_dir, shell=True )
     except Exception as e:
         print('building %s mode %s failed'%(program, mode), e)
-        input('key...')
         sys.exit(1)
 
     print('Merge stats and reports...')
@@ -232,9 +246,10 @@ def compile_and_install(config, program, repo_dir, mode):
             shutil.copy( build_dir + '/' + copy, bin_dir)
 
 
-def generate_diff_reports( report_dir, builds, mode ):
+def generate_diff_reports(report_dir, builds, mode, with_perf):
     out_yaml = report_dir + '%s-%s-%s.opt.yaml'%( builds[0], builds[1], mode )
     output_html_dir = report_dir + 'html-%s-%s-%s'%( builds[0], builds[1], mode )
+    build_for_hotness = builds if with_perf else []
 
     def generate_diff_yaml():
         print('Creating diff remark YAML files...')
@@ -260,7 +275,8 @@ def generate_diff_html():
                     [out_yaml],
                     output_html_dir,
                     1,
-                    True)
+                    True,
+                    build_for_hotness)
             print('Done generating compilation report for builds %s|%s mode %s'%( builds[0], builds[1], mode ))
         except:
             print('Failed generating compilation report for builds %s|%s mode %s'%( builds[0], builds[1], mode ))
@@ -279,17 +295,20 @@ def generate_diff_html():
     else:
         generate_diff_html()
 
-def generate_remark_reports( config, program ):
+
+def generate_remark_reports(config, program, with_perf):
     report_dir = './reports/' + program + '/'
 
     def generate_html():
         print('Creating HTML report output for build %s ...' % ( build ) )
+        build_for_hotness = [build] if with_perf else []
         try:
             invoke_optviewer(
                     [in_yaml],
                     output_html_dir,
                     1,
-                    True)
+                    True,
+                build_for_hotness)
             print('Done generating compilation reports!')
         except:
             print('Failed generating compilation reports (expects build was '\
@@ -309,10 +328,11 @@ def generate_html():
     # Create repors for 2-combinations of build options.
     combos = itertools.combinations( config[program]['build'], 2 )
     for builds in combos:
-        generate_diff_reports( report_dir, builds, 'all' )
-        generate_diff_reports( report_dir, builds, 'analysis' )
-        generate_diff_reports( report_dir, builds, 'missed' )
-        generate_diff_reports( report_dir, builds, 'passed' )
+        generate_diff_reports( report_dir, builds, 'all', with_perf )
+        generate_diff_reports( report_dir, builds, 'analysis', with_perf )
+        generate_diff_reports( report_dir, builds, 'missed', with_perf )
+        generate_diff_reports( report_dir, builds, 'passed', with_perf )
+
 
 def fetch(config, program):
     # directories
@@ -345,6 +365,7 @@ def main():
     parser.add_argument('-s', '--stats', dest='stats', action='store_true', help='show run statistics')
     parser.add_argument('-d', '--dry-run', dest='dry', action='store_true', help='enable dry run')
     parser.add_argument('-v', '--verbose', dest='verbose', action='store_true', help='verbose printing')
+    parser.add_argument('-pc', '--perf', dest='perf', action='store_true', help='use perf')
     args = parser.parse_args()
 
     with open(args.input, 'r') as f:
@@ -358,6 +379,7 @@ def main():
         print('args.build', args.build)
         print('args.run', args.run)
         print('args.generate', args.generate)
+        print('args.perf', args.perf)
 
     programs = []
     if args.programs:
@@ -374,9 +396,9 @@ def main():
         if args.build:
             build( config, p )
         if args.run:
-            run( config, p, args.run, args.dry )
+            run( config, p, args.run, args.dry, args.perf )
         if args.generate:
-            generate_remark_reports( config, p )
+            generate_remark_reports( config, p, args.perf )
         if args.stats:
             show_stats( config, p)
 

diff --git a/hotness.py b/hotness.py
@@ -0,0 +1,118 @@
+import subprocess
+from subprocess import PIPE
+import os
+
+
+# reads a perf report that is a product of the following command:
+# perf report -b --sort symbol
+# this report contains the symbols sorted by their % of usage
+# it outputs a dict that has keys as symbols and vals as % of usage
+def get_hot_symbols(report_path):
+    symbols_usage = {}
+    with open(report_path) as report_file:
+        for line in report_file:
+            if line[0] == '#':  # skip the header
+                continue
+
+            if line.strip() == '':  # skip empty lines
+                continue
+            # print(line)
+            words = line.strip().split()
+            percentage = words[0]
+            symbol = ' '.join(words[3:])
+
+            percentage = float(percentage[:-1])  # remove % and convert to float
+            symbols_usage[symbol] = percentage
+
+    return symbols_usage
+
+
+# read perf annotate -P -l symbol and get hot lines in src file
+# hot lines are lines that have more than 0.5%(0.005) of execution time of the function
+# the function outputs a dict with key "srcfile:line" and value of percentage of time
+def get_hotness_from_anno_file(anno_path, hotlines={}, symbol_percentage=100):
+    skip_keywords = ['Sorted summary for file', '----------------------------']
+    with open(anno_path) as anno_file:
+        for line in anno_file:
+            if line[0] == '#':  # skip the header
+                continue
+
+            if line.strip() == '':  # skip empty lines
+                continue
+
+            if 'Percent |	Source code & Disassembly of' in line:  # we only capture src code and terminate before disassembly code
+                break
+
+            # skip predefined lines
+            skip_line = False
+            for skip in skip_keywords:
+                if skip in line:
+                    skip_line = True
+            # we cant use continue above because it will escape the inner loop
+            if skip_line:
+                continue
+
+            # print(line)
+            words = line.strip().split()
+            percentage = float(words[0])
+            srccode = ' '.join(words[1:])
+            line_hotness = round(percentage * symbol_percentage / 100, 3)
+            if srccode in hotlines:
+                hotlines[srccode] += line_hotness
+            else:
+                hotlines[srccode] = line_hotness
+
+    return hotlines
+
+
+# @TODO add cwd as a , ALSO ADD relative and absolute percentages
+# return the hotlines in the secfile of a symbol. Return only lines with usage time 0.5% or more
+def get_symbol_hotness_in_srcfiles(symbol, symbol_percentage, hotlines={}, cwd=''):
+    # create annotation file of the symbol
+    annotation_file_name = "perf-annotate.tmp"
+    exe = "perf annotate {} -P -l > {}".format(symbol, annotation_file_name)
+    print("executing command: {}".format(exe))
+    p = subprocess.run(exe, cwd=cwd, shell=True, stdout=PIPE, stderr=PIPE)
+    out = str(p.stdout.decode('utf-8', errors='ignore'))
+    err = str(p.stderr.decode('utf-8', errors='ignore'))
+    print(out, '\n\n', err)
+    annotation_file_name = os.path.join(cwd, annotation_file_name)
+    hotlines = get_hotness_from_anno_file(annotation_file_name, hotlines=hotlines, symbol_percentage=symbol_percentage)
+    return hotlines
+
+
+# generate report from perf data and return the hot symbols with their percentages
+def get_hot_symbols_from_perf_data(binfile, perf_data_file='perf.data', cwd=''):
+    report_file_name = "perf-report.tmp"
+    exe = 'perf report --no-child -d {} -i {} --percentage "relative"  > {}'.format(binfile, perf_data_file,
+                                                                                    report_file_name)
+    print("executing command: {}".format(exe))
+    p = subprocess.run(exe, cwd=cwd, shell=True, stdout=PIPE, stderr=PIPE)
+    out = str(p.stdout.decode('utf-8', errors='ignore'))
+    err = str(p.stderr.decode('utf-8', errors='ignore'))
+    print(out, '\n\n', err)
+    report_file_name = os.path.join(cwd, report_file_name)
+    hot_symbols = get_hot_symbols(report_file_name)
+    return hot_symbols
+
+
+def get_hot_lines_percentage(binfile, cwd):
+    symbols = get_hot_symbols_from_perf_data(binfile, cwd=cwd)
+    print(symbols)
+    print('\n\n\n\n\n\n\n')
+    hotlines = {}
+    for symbol in symbols:
+        # hotlines=get_hotness_from_anno_file('trial')
+        # skip symbols that are not in the main app
+        if '@' in symbol:
+            continue
+        symbol_percentage = symbols[symbol]
+        hotlines = get_symbol_hotness_in_srcfiles(symbol, symbol_percentage, hotlines=hotlines, cwd=cwd)
+
+    return hotlines
+
+
+if __name__ == "__main__":
+    '''hotlines=get_hot_lines_percentage('lulesh2.0')
+    for key in hotlines:
+        print("FILE PATH:{}\tPERCENTAGE:{}%".format(key,round(hotlines[key],3)))'''