Emeric Vigier | 2f62582 | 2012-08-06 11:09:52 -0400 | [diff] [blame] | 1 | #! /usr/bin/python |
| 2 | |
| 3 | import os |
| 4 | import sys |
| 5 | import string |
| 6 | import re |
| 7 | |
| 8 | ## hash from symbol name to list of symbols with that name, |
| 9 | ## where the list of symbols contains a list representing each symbol |
| 10 | symbols = {} |
| 11 | roots = {} |
| 12 | |
| 13 | def createBacklinks(name, syms): |
| 14 | for s in syms: |
| 15 | refs = s[2] |
| 16 | for r in refs: |
| 17 | ## for each ref, add ourselves as a referencer |
| 18 | if symbols.has_key(r): |
| 19 | targets = symbols[r] |
| 20 | for t in targets: |
| 21 | if name not in t[5]: |
| 22 | t[5].append(name) |
| 23 | |
| 24 | def markSymbol(frm, name): |
| 25 | if not symbols.has_key(name): |
| 26 | print "%s referenced but was not in the objdump" |
| 27 | syms = symbols[name] |
| 28 | ## print ambiguous references unless they are internal noise like ".L129" |
| 29 | if len(syms) > 1 and name[0] != '.': |
| 30 | print "Reference to symbol '%s' from '%s' is ambiguous, marking all '%s'" % (name, frm, name) |
| 31 | print syms |
| 32 | for s in syms: |
| 33 | if s[4]: |
| 34 | pass ## already marked |
| 35 | else: |
| 36 | s[4] = 1 |
| 37 | refs = s[2] |
| 38 | for r in refs: |
| 39 | markSymbol(s[0], r) |
| 40 | |
| 41 | def cmpFilename(a, b): |
| 42 | v = cmp(a[1], b[1]) |
| 43 | if v == 0: |
| 44 | v = cmp(a[0], b[0]) |
| 45 | return v |
| 46 | |
| 47 | def sizeAsString(bytes): |
| 48 | if bytes < 1024: |
| 49 | return "%d bytes" % bytes |
| 50 | elif bytes < 1024*1024: |
| 51 | return "%.2gK" % (bytes / 1024.0) |
| 52 | else: |
| 53 | return "%.2gM" % (bytes / 1024.0 / 1024.0) |
| 54 | |
| 55 | def printLost(): |
| 56 | list = [] |
| 57 | filename = None |
| 58 | for (name, syms) in symbols.items(): |
| 59 | s = syms[0] ## we always mark all or none for now |
| 60 | if not s[4] and name[0] != '.': ## skip .L129 type symbols |
| 61 | filename = s[3] |
| 62 | if not filename: |
| 63 | filename = "unknown file" |
| 64 | list.append ((name, filename, s[5], s[7])) |
| 65 | |
| 66 | file_summaries = [] |
| 67 | total_unused = 0 |
| 68 | total_this_file = 0 |
| 69 | filename = None |
| 70 | list.sort(cmpFilename) |
| 71 | for l in list: |
| 72 | next_filename = l[1] |
| 73 | if next_filename != filename: |
| 74 | if total_this_file > 0: |
| 75 | file_summaries.append (" %s may be unused in %s" % (sizeAsString(total_this_file), filename)) |
| 76 | print "%s has these symbols not reachable from exported symbols:" % next_filename |
| 77 | filename = next_filename |
| 78 | total_this_file = 0 |
| 79 | print " %s %s" % (l[0], sizeAsString(l[3])) |
| 80 | total_unused = total_unused + l[3] |
| 81 | total_this_file = total_this_file + l[3] |
| 82 | for trace in l[2]: |
| 83 | print " referenced from %s" % trace |
| 84 | |
| 85 | for fs in file_summaries: |
| 86 | print fs |
| 87 | print "%s total may be unused" % sizeAsString(total_unused) |
| 88 | |
| 89 | def main(): |
| 90 | |
| 91 | ## 0001aa44 <_dbus_message_get_network_data>: |
| 92 | sym_re = re.compile ('([0-9a-f]+) <([^>]+)>:') |
| 93 | ## 1aa49: e8 00 00 00 00 call 1aa4e <_dbus_message_get_network_data+0xa> |
| 94 | ref_re = re.compile (' <([^>]+)> *$') |
| 95 | ## /home/hp/dbus-cvs/dbus/dbus/dbus-message.c:139 |
| 96 | file_re = re.compile ('^(\/[^:].*):[0-9]+$') |
| 97 | ## _dbus_message_get_network_data+0xa |
| 98 | funcname_re = re.compile ('([^+]+)\+[0-9a-fx]+') |
| 99 | ## 00005410 T dbus_address_entries_free |
| 100 | dynsym_re = re.compile ('T ([^ \n]+)$') |
| 101 | |
| 102 | filename = sys.argv[1] |
| 103 | |
| 104 | command = """ |
| 105 | objdump -D --demangle -l %s |
| 106 | """ % filename |
| 107 | |
| 108 | command = string.strip (command) |
| 109 | |
| 110 | print "Running: %s" % command |
| 111 | |
| 112 | f = os.popen(command) |
| 113 | |
| 114 | ## first we find which functions reference which other functions |
| 115 | current_sym = None |
| 116 | lines = f.readlines() |
| 117 | for l in lines: |
| 118 | addr = None |
| 119 | name = None |
| 120 | target = None |
| 121 | file = None |
| 122 | |
| 123 | match = sym_re.match(l) |
| 124 | if match: |
| 125 | addr = match.group(1) |
| 126 | name = match.group(2) |
| 127 | else: |
| 128 | match = ref_re.search(l) |
| 129 | if match: |
| 130 | target = match.group(1) |
| 131 | else: |
| 132 | match = file_re.match(l) |
| 133 | if match: |
| 134 | file = match.group(1) |
| 135 | |
| 136 | if name: |
| 137 | ## 0 symname, 1 address, 2 references, 3 filename, 4 reached, 5 referenced-by 6 backlinked 7 approx size |
| 138 | item = [name, addr, [], None, 0, [], 0, 0] |
| 139 | if symbols.has_key(name): |
| 140 | symbols[name].append(item) |
| 141 | else: |
| 142 | symbols[name] = [item] |
| 143 | |
| 144 | if current_sym: |
| 145 | prev_addr = long(current_sym[1], 16) |
| 146 | our_addr = long(item[1], 16) |
| 147 | item[7] = our_addr - prev_addr |
| 148 | if item[7] < 0: |
| 149 | print "Computed negative size %d for %s" % (item[7], item[0]) |
| 150 | item[7] = 0 |
| 151 | |
| 152 | current_sym = item |
| 153 | |
| 154 | elif target and current_sym: |
| 155 | match = funcname_re.match(target) |
| 156 | if match: |
| 157 | ## dump the "+address" |
| 158 | target = match.group(1) |
| 159 | if target == current_sym[0]: |
| 160 | pass ## skip self-references |
| 161 | else: |
| 162 | current_sym[2].append (target) |
| 163 | |
| 164 | elif file and current_sym: |
| 165 | if file.startswith('/usr/include'): |
| 166 | ## inlined libc thingy |
| 167 | pass |
| 168 | elif current_sym[0].startswith('.debug'): |
| 169 | ## debug info |
| 170 | pass |
| 171 | elif current_sym[3] and current_sym[3] != file: |
| 172 | raise Exception ("%s in both %s and %s" % (current_sym[0], current_sym[3], file)) |
| 173 | else: |
| 174 | current_sym[3] = file |
| 175 | |
| 176 | ## now we need to find the roots (exported symbols) |
| 177 | command = "nm -D %s" % filename |
| 178 | print "Running: %s" % command |
| 179 | f = os.popen(command) |
| 180 | lines = f.readlines () |
| 181 | for l in lines: |
| 182 | match = dynsym_re.search(l) |
| 183 | if match: |
| 184 | name = match.group(1) |
| 185 | if roots.has_key(name): |
| 186 | raise Exception("symbol %s exported twice?" % name) |
| 187 | else: |
| 188 | roots[name] = 1 |
| 189 | |
| 190 | print "%d symbols exported from this object" % len(roots) |
| 191 | |
| 192 | ## these functions are used only indirectly, so we don't |
| 193 | ## notice they are used. Manually add them as roots... |
| 194 | vtable_roots = ['unix_finalize', |
| 195 | 'unix_handle_watch', |
| 196 | 'unix_disconnect', |
| 197 | 'unix_connection_set', |
| 198 | 'unix_do_iteration', |
| 199 | 'unix_live_messages_changed', |
| 200 | 'unix_get_unix_fd', |
| 201 | 'handle_client_data_cookie_sha1_mech', |
| 202 | 'handle_client_data_external_mech', |
| 203 | 'handle_server_data_cookie_sha1_mech', |
| 204 | 'handle_server_data_external_mech', |
| 205 | 'handle_client_initial_response_cookie_sha1_mech', |
| 206 | 'handle_client_initial_response_external_mech', |
| 207 | 'handle_client_shutdown_cookie_sha1_mech', |
| 208 | 'handle_client_shutdown_external_mech', |
| 209 | 'handle_server_shutdown_cookie_sha1_mech', |
| 210 | 'handle_server_shutdown_external_mech' |
| 211 | ] |
| 212 | |
| 213 | for vr in vtable_roots: |
| 214 | if roots.has_key(vr): |
| 215 | raise Exception("%s is already a root" % vr) |
| 216 | roots[vr] = 1 |
| 217 | |
| 218 | for k in roots.keys(): |
| 219 | markSymbol("root", k) |
| 220 | |
| 221 | for (k, v) in symbols.items(): |
| 222 | createBacklinks(k, v) |
| 223 | |
| 224 | print """ |
| 225 | |
| 226 | The symbols mentioned below don't appear to be reachable starting from |
| 227 | the dynamic exports of the library. However, this program is pretty |
| 228 | dumb; a limitation that creates false positives is that it can only |
| 229 | trace 'reachable' through hardcoded function calls, if a function is |
| 230 | called only through a vtable, it won't be marked reachable (and |
| 231 | neither will its children in the call graph). |
| 232 | |
| 233 | Also, the sizes mentioned are more or less completely bogus. |
| 234 | |
| 235 | """ |
| 236 | |
| 237 | print "The following are hardcoded in as vtable roots: %s" % vtable_roots |
| 238 | |
| 239 | printLost() |
| 240 | |
| 241 | if __name__ == "__main__": |
| 242 | main() |