#!/usr/bin/env python3 # -*- coding: utf-8 -*- # # Copyright Grégory Soutadé # This file is part of SOAdvancedDissector # SOAdvancedDissector is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # SOAdvancedDissector is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with SOAdvancedDissector. If not, see . # import re import os import shutil import struct import sys import subprocess import argparse from objects import * from cppprototypeparser import CPPPrototypeParser from display import ProgressionDisplay # # Regexp for readelf -sW|c++filt # # 6: 006f25d0 20 OBJECT WEAK DEFAULT 20 vtable for dpdoc::Annot num_re = '(?P[0-9]+\:)' address_re = '(?P
[0-9a-f]+)' size_re = '(?P[0-9]+)' ustr_re = '(?P<{}>[A-Z]+)' type_re = ustr_re.format('type') link_re = ustr_re.format('link') visibility_re = ustr_re.format('visibility') ndx_re = '(?P[0-9]+)' name_re = '(?P.*)' readelf = r'[ ]+{}[ ]+{}[ ]+{}[ ]+{}[ ]+{}[ ]+{}[ ]+{}[ ]+{}' readelf = readelf.format(num_re, address_re, size_re, type_re, link_re, visibility_re,ndx_re,name_re) readelf_re = re.compile(readelf) # # Regexp for readelf --sections|c++filt # # [ 0] NULL 00000000 000000 000000 00 0 0 0 # [ 1] .interp PROGBITS 00000154 000154 000019 00 A 0 0 1 num_re = '(?P\[[ ]*[0-9]+\])' name_re = '(?P.+)' type_re = ustr_re.format('type') address_re = '(?P
[0-9a-f]+)' offset_re = '(?P[0-9a-f]+)' size_re = '(?P[0-9a-f]+)' sections = r'[ ]+{}[ ]+{}[ ]+{}[ ]+{}[ ]+{}[ ]+{}[ ]+.*' sections = sections.format(num_re, name_re, type_re, address_re, offset_re, size_re) sections_re = re.compile(sections) # # Regexp for vtable-dumper --demangle|c++filt # # 0 0000000000000000 # 4 006e9fd0 (& typeinfo for zip::EditableStream) # Inherit from dputils::GuardedStream # Inherit from dpio::StreamClient # 8 002a793d zip::EditableStream::~EditableStream() dynvtable_idx_re = re.compile(r'(?P[0-9]+)[ ]+(?P[-]?0[x]?[0-9a-f]+)') dynvtable_entry_re = re.compile(r'(?P[0-9]+)[ ]+(?P
[0-9a-f]+) (?P.*)') dynvtable_inherit_re = re.compile(r'Inherit from (?P.*)') # # Global variables # global_namespace = Namespace('global') namespaces = {'global':global_namespace} # Root namespace with one (without namespace) 'global' matched_lines = [] # Matched lines from readelf symbols output sections_lines = [] # Matched lines from readelf sections output sections_addr = [] # Contains tuple (section_addr_start, section_addr_end, section_offset) address_size = 4 # Target address size (32bits by default) classes_with_inheritance = [] namespace_dependencies = {} display = ProgressionDisplay() def line_count(filename): """Do 'wc -l ' Returns ------- int Line count of filename """ try: return int(subprocess.check_output(['wc', '-l', filename]).split()[0]) except: return 0 def findBinOffset(addr): """Find offset into binary file from target address Sections must have been extracted Parameters ---------- addr : int Target address Returns ------- int Offset or None if not found """ for (start, end, offset) in sections_addr: if addr >= start and addr <= end: return (addr - start) + offset return None def funcnameFromProtype(fullname): """Return function name (name + parameters) from prototype (includes namespaces + base class) Parameters ---------- fullname : str Full function name (package::NCXStreamReceiver::totalLengthReady(unsigned int)) Returns ------- str Function name + parameters """ if fullname.startswith('typeinfo'): return ('typeinfo()', '') parser = CPPPrototypeParser(fullname) return (parser.funcname + parser.fullparameters, '::'.join(parser.namespaces)) def findObjectInCache(name): """Find class object in namespaces Parameters ---------- name : str Full class name (package::NCXStreamReceiver) Returns ------- obj Found class or None """ parser = CPPPrototypeParser(name) if parser.is_function: return global_namespace.child(parser.funcname) if not parser.namespaces: return global_namespace.child(parser.classname) if not parser.namespaces[0] in namespaces.keys(): return None namespace = namespaces[parser.namespaces[0]] # Don't directly use find on root to avoid duplicate name in sub namespaces # eg : ns0::ns1::ns2::ns0::ns3::func for targetNamespace in parser.namespaces[1:]: namespace = namespace.find(targetNamespace) if not namespace: return None # print('findObjectInCache({}) --> {}'.format(name, namespace.name)) return namespace.find(parser.classname) def findObjectFromAddr(addr): """Find object from address (in readelf output) Parameters ---------- addr : int Object address Returns ------- obj Matched object or None """ for match in matched_lines: if int(match.group('address'),16) == addr: #print(match.groups()) return match return None def createClass(fullname): """Find class object in namespaces or create it if it doesn't exists Parameters ---------- name : str Full class name (package::NCXStreamReceiver) Returns ------- obj Found class or created one """ parser = CPPPrototypeParser(fullname) class_ = Class(parser.classname, '::'.join(parser.namespaces)) if not parser.namespaces: global_namespace.addChild(class_) else: if not parser.namespaces[0] in namespaces.keys(): lastNamespace = Namespace(parser.namespaces[0]) namespaces[parser.namespaces[0]] = lastNamespace else: lastNamespace = namespaces[parser.namespaces[0]] for name in parser.namespaces[1:]: newNamespace = lastNamespace.child(name) if not newNamespace: newNamespace = Namespace(name) lastNamespace.addChild(newNamespace) lastNamespace = newNamespace lastNamespace.addChild(class_) return class_ def parseDynamicVtable(filename): """Parse dynamic vtable (vtable-dumper) file and fix static vtable Parameters ---------- filename : str Filename to parse """ display.setTarget(' * Parse dynamic vtable', line_count(filename), 10) curObj = None with open(filename, 'r') as fd: for line in fd.readlines(): display.progress(1) # Empty line -> end of current class line = line.strip() if len(line) == 0: curObj = None continue if curObj is None: # New vtable if not line.startswith('Vtable for '): continue objname = line[len('Vtable for '):] curObj = findObjectInCache(objname) else: # First, try object vtable entry match = dynvtable_entry_re.match(line) if match: idx = int(match.group('index')) func = None if match.group('name') == '__cxa_pure_virtual': func = Function('virtfunc{}()'.format(idx), 0, True, True) else: funcaddr = int(match.group('address'),16) match = findObjectFromAddr(funcaddr) funcname = 'unknown_virtfunc{}()'.format(idx) funcnamespaces = '' if match: (funcname, funcnamespaces) = funcnameFromProtype(match.group('name')) else: sys.stderr.write('Error dynvtable0, no match for {}\n'.format(hex(funcaddr))) func = Function(funcname, funcaddr, virtual=True, namespace=funcnamespaces) curObj.updateVirtualFunction(int(idx/address_size), func) continue # Index vtable entry match = dynvtable_idx_re.match(line) if match: funcaddr = int(match.group('vtable_index'),16) funcname = 'vtable_index{}'.format(-funcaddr) func = Function(funcname, funcaddr, True) curObj.updateVirtualFunction(int(int(match.group('index'))/address_size), func) continue # Inherit entry match = dynvtable_inherit_re.match(line) if match: basename = match.group('inherit') base = findObjectInCache(basename) if not base: base = createClass(basename) curObj.addBaseClass(base) classes_with_inheritance.append(curObj) continue sys.stderr.write('Error dynvtable, no match for {}\n'.format(line)) sys.stderr.flush() display.finish() def fixupInheritance(): """Fix inheritance : each class reports implemented pure virtual function into its base(s) """ global classes_with_inheritance display.setTarget(' * Fixup inheritance', len(classes_with_inheritance)) for class_ in classes_with_inheritance: display.progress(1) class_.fixupInheritance() display.finish() classes_with_inheritance = None # Release memory def parseStaticVtable(binfile): """Parse vtable vtable object into binary file and create static vtable entries Parameters ---------- binfile : str Filename of binary file to inspect objects """ display.setTarget(' * Parse static vtable', len(matched_lines), 10) with open(binfile, 'rb') as fd: for match in matched_lines: display.progress(1) name = match.group('name') if not name.startswith('vtable for'): continue address = int(match.group('address'), 16) # vtable for mtext::cts::ListOfGlyphRunsCTS classname = name[len('vtable for '):] class_ = findObjectInCache(classname) if class_ is None: class_ = createClass(classname) binaddr = findBinOffset(address) fd.seek(binaddr) nb_funcs = int(int(match.group('size'))/address_size) #print('vtable {} {} funcs'.format(classname, nb_funcs)) fd.seek(binaddr) for i in range(0, nb_funcs): funcaddr = 0 if address_size == 4: funcaddr, = struct.unpack(' pure virtual func = Function('virtfunc{}()'.format(i), 0, True, True) else: funcname = '' funcnamespaces = '' if funcaddr == 0 or hex(funcaddr).startswith('0xf'): # Negative address funcname = 'vtable_index{}'.format(-funcaddr) else: match = findObjectFromAddr(funcaddr) try: if not match: sys.stderr.write('No func found at : {}\n'.format(hex(funcaddr))) (funcname, funcnamespaces) = funcnameFromProtype(match.group('name')) except: if match: sys.stderr.write('FFP except : {}'.format(match.group('name'))) funcname = 'unknown_virtfunc{}()'.format(i) func = Function(funcname, funcaddr, virtual=True, namespace=funcnamespaces) try: # print('Add virt {}'.format(func)) class_.addVirtualFunction(func) except: print(match.group('name')) print(class_) raise display.finish() def parseTypeinfo(): """Parse typeinfo objects in matched_lines and create classes """ typeinfos = [] for match in matched_lines: name = match.group('name') # "typeinfo for css::MediaParser" if not name.startswith('typeinfo for'): continue typeinfos.append(name[len('typeinfo for '):]) # print(name) # Sort array before creating class in order to have the right # class hierarchy for classname in sorted(typeinfos): #print(classname) createClass(classname) def parseSymbolFile(filename): """Parse readelf symbols output file and fill matched_lines Parameters ---------- filename : str Filename to parse """ display.setTarget(' * Parse symbols file', line_count(filename), 10) with open(filename, 'r') as fd: for line in fd.readlines(): display.progress(1) line = line.rstrip() if not line: continue match = readelf_re.match(line) if not match: continue if match.group('type') not in ('FUNC', 'OBJECT'): continue if match.group('link') not in ('GLOBAL', 'WEAK'): continue if match.group('visibility') not in ('DEFAULT'): continue matched_lines.append(match) if matched_lines: address_size = int(len(matched_lines[0].group('address'))/2) display.finish() def parseSectionFile(filename): """Parse readelf sections output file and fill sections_lines and sections_addr Parameters ---------- filename : str Filename to parse """ # We assume there is about 20 sections display.setTarget(' * Parse sections file (1/2)', line_count(filename)) with open(filename, 'r') as fd: for line in fd.readlines(): display.progress(1) line = line.rstrip() if not line: continue match = sections_re.match(line) if not match: continue sections_lines.append(match) display.finish() display.setTarget(' * Parse sections file (2/2)', line_count(filename)) for match in sections_lines: display.progress(1) start = int(match.group('address'), 16) size = int(match.group('size'), 16) offset = int(match.group('offset'), 16) sections_addr.append((start, start+size, offset)) display.finish() def addAllMembers(): """Add all other members that have not been computed """ display.setTarget(' * Add all members', len(matched_lines), 10) for match in matched_lines: display.progress(1) virtual = False funcaddr = int(match.group('address'),16) name = match.group('name') if name.startswith('typeinfo') or\ name.startswith('vtable'): continue if name.startswith('non-virtual thunk to '): name = name[len('non-virtual thunk to '):] virtual = True parser = CPPPrototypeParser(name) class_ = None obj = None funcname = '' classname = '' if match.group('type') == 'FUNC': classname = parser.fullclassname # C functions doesn't have () in their name if not '(' in name: obj = Function(name + '()', funcaddr) global_namespace.addChild(obj) continue else: (funcname, funcnamespaces) = funcnameFromProtype(name) obj = Function(funcname, funcaddr, virtual=virtual, namespace=funcnamespaces) # No classname : add into global namespace if not classname: global_namespace.addChild(obj) continue class_ = findObjectInCache(classname) else: # Object if parser.funcname: obj = Attribute(parser.funcname, funcaddr) classname = parser.classname elif parser.classname: obj = Attribute(parser.classname, funcaddr) # No namespaces : add into global namespace if not parser.namespaces: global_namespace.addChild(obj) continue classname = '::'.join(parser.namespaces) class_ = findObjectInCache(parser.fullclassname) # Try to search in namespaces from C++ method if not class_ and parser.namespaces: class_ = findObjectInCache('::'.join(parser.namespaces)) # Try to search in namespaces from C function if not class_ and classname in namespaces.keys(): class_ = namespaces[classname] # If still not, it's a new class/function if not class_: if not classname: sys.stderr.write('AAM Err3 "{}" "{}"\n'.format(name, funcname)) continue else: class_ = createClass(classname) try: # Could be class or namespace class_.addChild(obj) except: sys.stderr.write('Not class {} {}\n'.format(name, class_.name)) sys.stderr.flush() # raise display.finish() def fixBadClassAssertion(): """We could have consider obj from a class and created it, but in fact, it just namespace """ toDelete = [] toAdd = [] display.setTarget(' * Fix bad class assertion (1/2)', len(global_namespace.childs)) for obj in global_namespace.childs: display.progress(1) if type(obj) == Class and obj.looksLikeNamespace(): if obj.name in namespaces: newNamespace = namespaces[obj.name] else: newNamespace = Namespace(obj.name) newNamespace.fillFrom(obj) toAdd.append(newNamespace) toDelete.append(obj) display.finish() display.setTarget(' * Fix bad class assertion (2/2)', len(toDelete)+len(toAdd)) for obj in toDelete: display.progress(1) global_namespace.removeChild(obj) for obj in toAdd: display.progress(1) global_namespace.addChild(obj) display.finish() def analyseDependencies(): """Find classes present in method parameters but not previously found """ display.setTarget(' * Analyse dependencies (1/2)', len(namespaces.keys())) allParams = [] for namespace in namespaces.values(): display.progress(1) params = namespace.getDependencies() for param in params: allParams.append(param) display.finish() if allParams: allParams = list(set(allParams)) display.setTarget(' * Analyse dependencies (2/2)', len(allParams)) for param in allParams: display.progress(1) class_ = findObjectInCache(param) if not class_: createClass(param) display.finish() header_re = re.compile('[A-Za-z0-9_-]+') def headerNameToFilename(name): """Transform namespace name into filename Keep only characters from header_re and change name in lower case Parameters ---------- name : str Name to transform Returns ------- str Computed name """ if '::' in name: parser = CPPPrototypeParser(name) if not parser.namespaces: return None res = parser.namespaces[0].lower() else: res = name.lower() if '.so' in res: res = os.path.basename(res).split('.so')[0] # Remove .so* extension res = ''.join([c for c in res if header_re.match(c)]) return res def outputHeader(namespace, filename): """Create header file from namespace description Parameters ---------- namespace : str Namespace name filename : str Output filename """ dependecies = namespace.getDependencies() # Remove standard dependencies if 'std' in dependecies: dependecies.remove('std') elif '__gnu_cxx' in dependecies: dependecies.remove('__gnu_cxx') headers = [] define = '_{}'.format(os.path.basename(filename).upper().replace('.', '_')) with open(filename, 'w') as fd: fd.write('/*\n') fd.write(' File automatically generated by SOAdvancedDissector.py\n') fd.write(' More information at http://indefero.soutade.fr/p/soadvanceddissector\n') fd.write('*/\n\n') fd.write('#ifndef {}\n'.format(define)) fd.write('#define {}\n\n'.format(define)) for dep in dependecies: headername = headerNameToFilename(dep) if headername and not headername in headers: fd.write('#include <{}.h>\n'.format(headername)) # Filter multiple headers headers.append(headername) if dependecies: fd.write('\n\n') fd.write('{}'.format(namespace)) fd.write('#endif // {}'.format(define)) def writeResult(target, outputDir, cleanOutputDir): """Write result into header files (one per namespace) Parameters ---------- cleanOutputDir : bool Clean output directory before processing """ if cleanOutputDir: print('Clean {}'.format(outputDir)) shutil.rmtree(outputDir, ignore_errors=True) if not os.path.exists(outputDir): os.mkdir(outputDir) setPrintIndent(True) keys = namespaces.keys() display.setTarget(' * Write output files', len(keys)) for namespace in keys: if namespace == 'std': continue # Don't try to write std classes filename = '{}.h'.format(headerNameToFilename(namespace)) if namespace == 'global': filename = '{}.h'.format(headerNameToFilename(target)) outputHeader(namespaces[namespace], '{}/{}'.format(outputDir, filename)) display.progress(1) display.finish() setPrintIndent(False) if __name__ == "__main__": parser = argparse.ArgumentParser(description='Extract interfaces (classes, functions, variables) from a GNU/Linux shared library') parser.add_argument('-f', '--file', help="Target file", dest="target", required=True) parser.add_argument('-s', '--section-file', help="Section file (result from 'readelf --sections|c++filt')", dest="section_file", required=True) parser.add_argument('-S', '--symbol-file', help="Symbol file (result from 'readelf -sW|c++filt')", dest="symbol_file", required=True) parser.add_argument('-V', '--vtable-file', help="Dynamic vtable file (result from 'vtable-dumper --demangle|c++filt')", dest="vtable_file") parser.add_argument('-o', '--output-dir', help="output directory (default ./output)", default="./output", dest="output_dir") parser.add_argument('-c', '--clean-output-dir', help="Clean output directory before computing (instead update it)", default=False, action="store_true", dest="clean_output") parser.add_argument('-r', '--print-raw-virtual-table', help="Print raw virtual table (debug purpose)", default=False, action="store_true", dest="raw_virtual_table") args = parser.parse_args() setPrintRawVirtualTable(args.raw_virtual_table) print('Analyse {}'.format(args.target)) parseSectionFile(args.section_file) parseSymbolFile(args.symbol_file) parseTypeinfo() parseStaticVtable(args.target) if args.vtable_file: parseDynamicVtable(args.vtable_file) fixupInheritance() addAllMembers() if args.vtable_file: fixBadClassAssertion() analyseDependencies() writeResult(args.target, args.output_dir, args.clean_output) print('Result wrote in {}'.format(args.output_dir))