1#!/usr/bin/env python 2# 3# Licensed to the Apache Software Foundation (ASF) under one 4# or more contributor license agreements. See the NOTICE file 5# distributed with this work for additional information 6# regarding copyright ownership. The ASF licenses this file 7# to you under the Apache License, Version 2.0 (the 8# "License"); you may not use this file except in compliance 9# with the License. You may obtain a copy of the License at 10# 11# http://www.apache.org/licenses/LICENSE-2.0 12# 13# Unless required by applicable law or agreed to in writing, 14# software distributed under the License is distributed on an 15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16# KIND, either express or implied. See the License for the 17# specific language governing permissions and limitations 18# under the License. 19# 20""" 21This script can be used to make the output from 22apache::thrift::profile_print_info() more human-readable. 23 24It translates each executable file name and address into the corresponding 25source file name, line number, and function name. By default, it also 26demangles C++ symbol names. 27""" 28 29import optparse 30import os 31import re 32import subprocess 33import sys 34 35 36class AddressInfo(object): 37 """ 38 A class to store information about a particular address in an object file. 39 """ 40 def __init__(self, obj_file, address): 41 self.objectFile = obj_file 42 self.address = address 43 self.sourceFile = None 44 self.sourceLine = None 45 self.function = None 46 47 48g_addrs_by_filename = {} 49 50 51def get_address(filename, address): 52 """ 53 Retrieve an AddressInfo object for the specified object file and address. 54 55 Keeps a global list of AddressInfo objects. Two calls to get_address() 56 with the same filename and address will always return the same AddressInfo 57 object. 58 """ 59 global g_addrs_by_filename 60 try: 61 by_address = g_addrs_by_filename[filename] 62 except KeyError: 63 by_address = {} 64 g_addrs_by_filename[filename] = by_address 65 66 try: 67 addr_info = by_address[address] 68 except KeyError: 69 addr_info = AddressInfo(filename, address) 70 by_address[address] = addr_info 71 return addr_info 72 73 74def translate_file_addresses(filename, addresses, options): 75 """ 76 Use addr2line to look up information for the specified addresses. 77 All of the addresses must belong to the same object file. 78 """ 79 # Do nothing if we can't find the file 80 if not os.path.isfile(filename): 81 return 82 83 args = ['addr2line'] 84 if options.printFunctions: 85 args.append('-f') 86 args.extend(['-e', filename]) 87 88 proc = subprocess.Popen(args, stdin=subprocess.PIPE, 89 stdout=subprocess.PIPE) 90 for address in addresses: 91 assert address.objectFile == filename 92 proc.stdin.write(address.address + '\n') 93 94 if options.printFunctions: 95 function = proc.stdout.readline() 96 function = function.strip() 97 if not function: 98 raise Exception('unexpected EOF from addr2line') 99 address.function = function 100 101 file_and_line = proc.stdout.readline() 102 file_and_line = file_and_line.strip() 103 if not file_and_line: 104 raise Exception('unexpected EOF from addr2line') 105 idx = file_and_line.rfind(':') 106 if idx < 0: 107 msg = 'expected file and line number from addr2line; got %r' % \ 108 (file_and_line,) 109 msg += '\nfile=%r, address=%r' % (filename, address.address) 110 raise Exception(msg) 111 112 address.sourceFile = file_and_line[:idx] 113 address.sourceLine = file_and_line[idx + 1:] 114 115 (remaining_out, cmd_err) = proc.communicate() 116 retcode = proc.wait() 117 if retcode != 0: 118 raise subprocess.CalledProcessError(retcode, args) 119 120 121def lookup_addresses(options): 122 """ 123 Look up source file information for all of the addresses currently stored 124 in the global list of AddressInfo objects. 125 """ 126 global g_addrs_by_filename 127 for (file, addresses) in g_addrs_by_filename.items(): 128 translate_file_addresses(file, addresses.values(), options) 129 130 131class Entry(object): 132 """ 133 An entry in the thrift profile output. 134 Contains a header line, and a backtrace. 135 """ 136 def __init__(self, header): 137 self.header = header 138 self.bt = [] 139 140 def addFrame(self, filename, address): 141 # If libc was able to determine the symbols names, the filename 142 # argument will be of the form <filename>(<function>+<offset>) 143 # So, strip off anything after the last '(' 144 idx = filename.rfind('(') 145 if idx >= 0: 146 filename = filename[:idx] 147 148 addr = get_address(filename, address) 149 self.bt.append(addr) 150 151 def write(self, f, options): 152 f.write(self.header) 153 f.write('\n') 154 n = 0 155 for address in self.bt: 156 f.write(' #%-2d %s:%s\n' % (n, address.sourceFile, 157 address.sourceLine)) 158 n += 1 159 if options.printFunctions: 160 if address.function: 161 f.write(' %s\n' % (address.function,)) 162 else: 163 f.write(' ??\n') 164 165 166def process_file(in_file, out_file, options): 167 """ 168 Read thrift profile output from the specified input file, and print 169 prettier information on the output file. 170 """ 171 # 172 # A naive approach would be to read the input line by line, 173 # and each time we come to a filename and address, pass it to addr2line 174 # and print the resulting information. Unfortunately, addr2line can be 175 # quite slow, especially with large executables. 176 # 177 # This approach is much faster. We read in all of the input, storing 178 # the addresses in each file that need to be resolved. We then call 179 # addr2line just once for each file. This is much faster than calling 180 # addr2line once per address. 181 # 182 183 virt_call_regex = re.compile(r'^\s*T_VIRTUAL_CALL: (\d+) calls on (.*):$') 184 gen_prot_regex = re.compile( 185 r'^\s*T_GENERIC_PROTOCOL: (\d+) calls to (.*) with a (.*):$') 186 bt_regex = re.compile(r'^\s*#(\d+)\s*(.*) \[(0x[0-9A-Za-z]+)\]$') 187 188 # Parse all of the input, and store it as Entry objects 189 entries = [] 190 current_entry = None 191 while True: 192 line = in_file.readline() 193 if not line: 194 break 195 196 if line == '\n' or line.startswith('Thrift virtual call info:'): 197 continue 198 199 virt_call_match = virt_call_regex.match(line) 200 if virt_call_match: 201 num_calls = int(virt_call_match.group(1)) 202 type_name = virt_call_match.group(2) 203 if options.cxxfilt: 204 # Type names reported by typeid() are internal names. 205 # By default, c++filt doesn't demangle internal type names. 206 # (Some versions of c++filt have a "-t" option to enable this. 207 # Other versions don't have this argument, but demangle type 208 # names passed as an argument, but not on stdin.) 209 # 210 # If the output is being filtered through c++filt, prepend 211 # "_Z" to the type name to make it look like an external name. 212 type_name = '_Z' + type_name 213 header = 'T_VIRTUAL_CALL: %d calls on "%s"' % \ 214 (num_calls, type_name) 215 if current_entry is not None: 216 entries.append(current_entry) 217 current_entry = Entry(header) 218 continue 219 220 gen_prot_match = gen_prot_regex.match(line) 221 if gen_prot_match: 222 num_calls = int(gen_prot_match.group(1)) 223 type_name1 = gen_prot_match.group(2) 224 type_name2 = gen_prot_match.group(3) 225 if options.cxxfilt: 226 type_name1 = '_Z' + type_name1 227 type_name2 = '_Z' + type_name2 228 header = 'T_GENERIC_PROTOCOL: %d calls to "%s" with a "%s"' % \ 229 (num_calls, type_name1, type_name2) 230 if current_entry is not None: 231 entries.append(current_entry) 232 current_entry = Entry(header) 233 continue 234 235 bt_match = bt_regex.match(line) 236 if bt_match: 237 if current_entry is None: 238 raise Exception('found backtrace frame before entry header') 239 frame_num = int(bt_match.group(1)) 240 filename = bt_match.group(2) 241 address = bt_match.group(3) 242 current_entry.addFrame(filename, address) 243 continue 244 245 raise Exception('unexpected line in input: %r' % (line,)) 246 247 # Add the last entry we were processing to the list 248 if current_entry is not None: 249 entries.append(current_entry) 250 current_entry = None 251 252 # Look up all of the addresses 253 lookup_addresses(options) 254 255 # Print out the entries, now that the information has been translated 256 for entry in entries: 257 entry.write(out_file, options) 258 out_file.write('\n') 259 260 261def start_cppfilt(): 262 (read_pipe, write_pipe) = os.pipe() 263 264 # Fork. Run c++filt in the parent process, 265 # and then continue normal processing in the child. 266 pid = os.fork() 267 if pid == 0: 268 # child 269 os.dup2(write_pipe, sys.stdout.fileno()) 270 os.close(read_pipe) 271 os.close(write_pipe) 272 return 273 else: 274 # parent 275 os.dup2(read_pipe, sys.stdin.fileno()) 276 os.close(read_pipe) 277 os.close(write_pipe) 278 279 cmd = ['c++filt'] 280 os.execvp(cmd[0], cmd) 281 282 283def main(argv): 284 parser = optparse.OptionParser(usage='%prog [options] [<file>]') 285 parser.add_option('--no-functions', help='Don\'t print function names', 286 dest='printFunctions', action='store_false', 287 default=True) 288 parser.add_option('--no-demangle', 289 help='Don\'t demangle C++ symbol names', 290 dest='cxxfilt', action='store_false', 291 default=True) 292 293 (options, args) = parser.parse_args(argv[1:]) 294 num_args = len(args) 295 if num_args == 0: 296 in_file = sys.stdin 297 elif num_args == 1: 298 in_file = open(argv[1], 'r') 299 else: 300 parser.print_usage(sys.stderr) 301 print >> sys.stderr, 'trailing arguments: %s' % (' '.join(args[1:],)) 302 return 1 303 304 if options.cxxfilt: 305 start_cppfilt() 306 307 process_file(in_file, sys.stdout, options) 308 309 310if __name__ == '__main__': 311 rc = main(sys.argv) 312 sys.exit(rc) 313