blob: a58597f795a37c96cc165ce8bba5e84da88435b7 [file] [log] [blame]
Emeric Vigier2f625822012-08-06 11:09:52 -04001#! /usr/bin/python
2
3import os
4import sys
5import string
6import re
7
8## hash from symbol name to list of symbols with that name,
9## where the list of symbols contains a list representing each symbol
10symbols = {}
11roots = {}
12
13def createBacklinks(name, syms):
14 for s in syms:
15 refs = s[2]
16 for r in refs:
17 ## for each ref, add ourselves as a referencer
18 if symbols.has_key(r):
19 targets = symbols[r]
20 for t in targets:
21 if name not in t[5]:
22 t[5].append(name)
23
24def markSymbol(frm, name):
25 if not symbols.has_key(name):
26 print "%s referenced but was not in the objdump"
27 syms = symbols[name]
28 ## print ambiguous references unless they are internal noise like ".L129"
29 if len(syms) > 1 and name[0] != '.':
30 print "Reference to symbol '%s' from '%s' is ambiguous, marking all '%s'" % (name, frm, name)
31 print syms
32 for s in syms:
33 if s[4]:
34 pass ## already marked
35 else:
36 s[4] = 1
37 refs = s[2]
38 for r in refs:
39 markSymbol(s[0], r)
40
41def cmpFilename(a, b):
42 v = cmp(a[1], b[1])
43 if v == 0:
44 v = cmp(a[0], b[0])
45 return v
46
47def sizeAsString(bytes):
48 if bytes < 1024:
49 return "%d bytes" % bytes
50 elif bytes < 1024*1024:
51 return "%.2gK" % (bytes / 1024.0)
52 else:
53 return "%.2gM" % (bytes / 1024.0 / 1024.0)
54
55def printLost():
56 list = []
57 filename = None
58 for (name, syms) in symbols.items():
59 s = syms[0] ## we always mark all or none for now
60 if not s[4] and name[0] != '.': ## skip .L129 type symbols
61 filename = s[3]
62 if not filename:
63 filename = "unknown file"
64 list.append ((name, filename, s[5], s[7]))
65
66 file_summaries = []
67 total_unused = 0
68 total_this_file = 0
69 filename = None
70 list.sort(cmpFilename)
71 for l in list:
72 next_filename = l[1]
73 if next_filename != filename:
74 if total_this_file > 0:
75 file_summaries.append (" %s may be unused in %s" % (sizeAsString(total_this_file), filename))
76 print "%s has these symbols not reachable from exported symbols:" % next_filename
77 filename = next_filename
78 total_this_file = 0
79 print " %s %s" % (l[0], sizeAsString(l[3]))
80 total_unused = total_unused + l[3]
81 total_this_file = total_this_file + l[3]
82 for trace in l[2]:
83 print " referenced from %s" % trace
84
85 for fs in file_summaries:
86 print fs
87 print "%s total may be unused" % sizeAsString(total_unused)
88
89def main():
90
91 ## 0001aa44 <_dbus_message_get_network_data>:
92 sym_re = re.compile ('([0-9a-f]+) <([^>]+)>:')
93 ## 1aa49: e8 00 00 00 00 call 1aa4e <_dbus_message_get_network_data+0xa>
94 ref_re = re.compile (' <([^>]+)> *$')
95 ## /home/hp/dbus-cvs/dbus/dbus/dbus-message.c:139
96 file_re = re.compile ('^(\/[^:].*):[0-9]+$')
97 ## _dbus_message_get_network_data+0xa
98 funcname_re = re.compile ('([^+]+)\+[0-9a-fx]+')
99 ## 00005410 T dbus_address_entries_free
100 dynsym_re = re.compile ('T ([^ \n]+)$')
101
102 filename = sys.argv[1]
103
104 command = """
105 objdump -D --demangle -l %s
106 """ % filename
107
108 command = string.strip (command)
109
110 print "Running: %s" % command
111
112 f = os.popen(command)
113
114 ## first we find which functions reference which other functions
115 current_sym = None
116 lines = f.readlines()
117 for l in lines:
118 addr = None
119 name = None
120 target = None
121 file = None
122
123 match = sym_re.match(l)
124 if match:
125 addr = match.group(1)
126 name = match.group(2)
127 else:
128 match = ref_re.search(l)
129 if match:
130 target = match.group(1)
131 else:
132 match = file_re.match(l)
133 if match:
134 file = match.group(1)
135
136 if name:
137 ## 0 symname, 1 address, 2 references, 3 filename, 4 reached, 5 referenced-by 6 backlinked 7 approx size
138 item = [name, addr, [], None, 0, [], 0, 0]
139 if symbols.has_key(name):
140 symbols[name].append(item)
141 else:
142 symbols[name] = [item]
143
144 if current_sym:
145 prev_addr = long(current_sym[1], 16)
146 our_addr = long(item[1], 16)
147 item[7] = our_addr - prev_addr
148 if item[7] < 0:
149 print "Computed negative size %d for %s" % (item[7], item[0])
150 item[7] = 0
151
152 current_sym = item
153
154 elif target and current_sym:
155 match = funcname_re.match(target)
156 if match:
157 ## dump the "+address"
158 target = match.group(1)
159 if target == current_sym[0]:
160 pass ## skip self-references
161 else:
162 current_sym[2].append (target)
163
164 elif file and current_sym:
165 if file.startswith('/usr/include'):
166 ## inlined libc thingy
167 pass
168 elif current_sym[0].startswith('.debug'):
169 ## debug info
170 pass
171 elif current_sym[3] and current_sym[3] != file:
172 raise Exception ("%s in both %s and %s" % (current_sym[0], current_sym[3], file))
173 else:
174 current_sym[3] = file
175
176 ## now we need to find the roots (exported symbols)
177 command = "nm -D %s" % filename
178 print "Running: %s" % command
179 f = os.popen(command)
180 lines = f.readlines ()
181 for l in lines:
182 match = dynsym_re.search(l)
183 if match:
184 name = match.group(1)
185 if roots.has_key(name):
186 raise Exception("symbol %s exported twice?" % name)
187 else:
188 roots[name] = 1
189
190 print "%d symbols exported from this object" % len(roots)
191
192 ## these functions are used only indirectly, so we don't
193 ## notice they are used. Manually add them as roots...
194 vtable_roots = ['unix_finalize',
195 'unix_handle_watch',
196 'unix_disconnect',
197 'unix_connection_set',
198 'unix_do_iteration',
199 'unix_live_messages_changed',
200 'unix_get_unix_fd',
201 'handle_client_data_cookie_sha1_mech',
202 'handle_client_data_external_mech',
203 'handle_server_data_cookie_sha1_mech',
204 'handle_server_data_external_mech',
205 'handle_client_initial_response_cookie_sha1_mech',
206 'handle_client_initial_response_external_mech',
207 'handle_client_shutdown_cookie_sha1_mech',
208 'handle_client_shutdown_external_mech',
209 'handle_server_shutdown_cookie_sha1_mech',
210 'handle_server_shutdown_external_mech'
211 ]
212
213 for vr in vtable_roots:
214 if roots.has_key(vr):
215 raise Exception("%s is already a root" % vr)
216 roots[vr] = 1
217
218 for k in roots.keys():
219 markSymbol("root", k)
220
221 for (k, v) in symbols.items():
222 createBacklinks(k, v)
223
224 print """
225
226The symbols mentioned below don't appear to be reachable starting from
227the dynamic exports of the library. However, this program is pretty
228dumb; a limitation that creates false positives is that it can only
229trace 'reachable' through hardcoded function calls, if a function is
230called only through a vtable, it won't be marked reachable (and
231neither will its children in the call graph).
232
233Also, the sizes mentioned are more or less completely bogus.
234
235"""
236
237 print "The following are hardcoded in as vtable roots: %s" % vtable_roots
238
239 printLost()
240
241if __name__ == "__main__":
242 main()