3 # mklibs.py: An automated way to create a minimal /lib/ directory.
5 # Copyright 2001 by Falk Hueffner <falk@debian.org>
6 # & Goswin Brederlow <goswin.brederlow@student.uni-tuebingen.de>
8 # mklibs.sh by Marcus Brinkmann <Marcus.Brinkmann@ruhr-uni-bochum.de>
11 # This program is free software; you can redistribute it and/or modify
12 # it under the terms of the GNU General Public License as published by
13 # the Free Software Foundation; either version 2 of the License, or
14 # (at your option) any later version.
16 # This program is distributed in the hope that it will be useful,
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 # GNU General Public License for more details.
21 # You should have received a copy of the GNU General Public License
22 # along with this program; if not, write to the Free Software
23 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 # - Gather all unresolved symbols and libraries needed by the programs
28 # and reduced libraries
29 # - Gather all symbols provided by the already reduced libraries
30 # (none on the first pass)
31 # - If all symbols are provided we are done
32 # - go through all libraries and remember what symbols they provide
33 # - go through all unresolved/needed symbols and mark them as used
35 # - find pic file (if not present copy and strip the so)
36 # - compile in only used symbols
41 # * complete argument parsing as given as comment in main
56 debuglevel = DEBUG_NORMAL
58 def debug(level, *msg):
59 if debuglevel >= level:
60 print string.join(msg)
62 # A simple set class. It should be replaced with the standard sets.Set
63 # type as soon as Python 2.3 is out.
71 def contains(self, obj):
72 return self.__dict.has_key(obj)
79 return self.__dict.keys()
82 return len(self.__dict)
84 def __eq__(self, other):
85 return self.__dict == other.__dict
88 return `self.__dict.keys()`
91 return `self.__dict.keys()`
93 # return a list of lines of output of the command
94 def command(command, *args):
95 debug(DEBUG_SPAM, "calling", command, string.join(args))
96 (status, output) = commands.getstatusoutput(command + ' ' + string.join(args))
97 if os.WEXITSTATUS(status) != 0:
98 print "Command failed with status", os.WEXITSTATUS(status), ":", \
99 command, string.join(args)
100 print "With output:", output
102 return string.split(output, '\n')
104 # Filter a list according to a regexp containing a () group. Return
106 def regexpfilter(list, regexp, groupnr = 1):
107 pattern = re.compile(regexp)
110 match = pattern.match(x)
112 result.add(match.group(groupnr))
116 # Return a Set of rpath strings for the passed object
118 if not os.access(obj, os.F_OK):
119 raise "Cannot find lib: " + obj
120 output = command(target + "objdump", "--private-headers", obj)
121 return map(lambda x: root + "/" + x, regexpfilter(output, ".*RPATH\s*(\S+)$").elems())
123 # Return a Set of libraries the passed objects depend on.
124 def library_depends(obj):
125 if not os.access(obj, os.F_OK):
126 raise "Cannot find lib: " + obj
127 output = command(target + "objdump", "--private-headers", obj)
128 return regexpfilter(output, ".*NEEDED\s*(\S+)$")
130 # Return a list of libraries the passed objects depend on. The
131 # libraries are in "-lfoo" format suitable for passing to gcc.
132 def library_depends_gcc_libnames(obj):
133 if not os.access(obj, os.F_OK):
134 raise "Cannot find lib: " + obj
135 output = command(target + "objdump", "--private-headers", obj)
136 output = regexpfilter(output, ".*NEEDED\s*lib(\S+)\.so.*$")
137 if not output.elems():
140 return "-l" + string.join(output.elems(), " -l")
142 # Scan readelf output. Example:
143 # Num: Value Size Type Bind Vis Ndx Name
144 # 1: 000000012002ab48 168 FUNC GLOBAL DEFAULT UND strchr@GLIBC_2.0 (2)
146 re.compile("\s*\d+: .+\s+\d+\s+\w+\s+(\w+)+\s+\w+\s+(\w+)\s+([^\s@]+)")
148 # Return undefined symbols in an object as a Set of tuples (name, weakness)
149 def undefined_symbols(obj):
150 if not os.access(obj, os.F_OK):
151 raise "Cannot find lib" + obj
154 output = command(target + "readelf", "-s", "-W", obj)
156 match = symline_regexp.match(line)
158 bind, ndx, name = match.groups()
160 result.add((name, bind == "WEAK"))
163 # Return a Set of symbols provided by a library
164 def provided_symbols(obj):
165 if not os.access(obj, os.F_OK):
166 raise "Cannot find lib" + obj
169 debug(DEBUG_SPAM, "provided_symbols result = ", `result`)
170 output = command(target + "readelf", "-s", "-W", obj)
172 match = symline_regexp.match(line)
174 bind, ndx, name = match.groups()
175 if bind != "LOCAL" and not ndx in ("UND", "ABS"):
176 debug(DEBUG_SPAM, "provided_symbols adding ", `name`)
180 # Return real target of a symlink
181 def resolve_link(file):
182 debug(DEBUG_SPAM, "resolving", file)
183 while S_ISLNK(os.lstat(file)[ST_MODE]):
184 new_file = os.readlink(file)
185 if new_file[0] != "/":
186 file = os.path.join(os.path.dirname(file), new_file)
189 debug(DEBUG_SPAM, "resolved to", file)
192 # Find complete path of a library, by searching in lib_path
194 for path in lib_path:
195 if os.access(path + "/" + lib, os.F_OK):
196 return path + "/" + lib
200 # Find a PIC archive for the library
202 base_name = so_pattern.match(lib).group(1)
203 for path in lib_path:
204 for file in glob.glob(path + "/" + base_name + "_pic.a"):
205 if os.access(file, os.F_OK):
206 return resolve_link(file)
209 # Find a PIC .map file for the library
210 def find_pic_map(lib):
211 base_name = so_pattern.match(lib).group(1)
212 for path in lib_path:
213 for file in glob.glob(path + "/" + base_name + "_pic.map"):
214 if os.access(file, os.F_OK):
215 return resolve_link(file)
218 def extract_soname(so_file):
219 soname_data = regexpfilter(command(target + "readelf", "--all", "-W", so_file),
220 ".*SONAME.*\[(.*)\].*")
221 if soname_data.elems():
222 return soname_data.elems()[0]
230 print >> outfd, "Usage: mklibs [OPTION]... -d DEST FILE ..."
231 print >> outfd, "Make a set of minimal libraries for FILE(s) in DEST."
233 print >> outfd, " -d, --dest-dir DIRECTORY create libraries in DIRECTORY"
234 print >> outfd, " -D, --no-default-lib omit default libpath (", string.join(default_lib_path, " : "), ")"
235 print >> outfd, " -L DIRECTORY[:DIRECTORY]... add DIRECTORY(s) to the library search path"
236 print >> outfd, " --ldlib LDLIB use LDLIB for the dynamic linker"
237 print >> outfd, " --libc-extras-dir DIRECTORY look for libc extra files in DIRECTORY"
238 # Ugh... Adding the trailing '-' breaks common practice.
239 #print >> outfd, " --target TARGET prepend TARGET- to the gcc and binutils calls"
240 print >> outfd, " --target TARGET prepend TARGET to the gcc and binutils calls"
241 print >> outfd, " --root ROOT search in ROOT for library rpaths"
242 print >> outfd, " -v, --verbose explain what is being done"
243 print >> outfd, " -h, --help display this help and exit"
247 print "mklibs: version ",vers
250 #################### main ####################
251 ## Usage: ./mklibs.py [OPTION]... -d DEST FILE ...
252 ## Make a set of minimal libraries for FILE ... in directory DEST.
255 ## -L DIRECTORY Add DIRECTORY to library search path.
256 ## -D, --no-default-lib Do not use default lib directories of /lib:/usr/lib
257 ## -n, --dry-run Don't actually run any commands; just print them.
258 ## -v, --verbose Print additional progress information.
259 ## -V, --version Print the version number and exit.
260 ## -h, --help Print this help and exit.
261 ## --ldlib Name of dynamic linker (overwrites environment variable ldlib)
262 ## --libc-extras-dir Directory for libc extra files
263 ## --target Use as prefix for gcc or binutils calls
265 ## -d, --dest-dir DIRECTORY Create libraries in DIRECTORY.
267 ## Required arguments for long options are also mandatory for the short options.
269 # Clean the environment
270 vers="0.12 with uClibc fixes"
271 os.environ['LC_ALL'] = "C"
275 longopts = ["no-default-lib", "dry-run", "verbose", "version", "help",
276 "dest-dir=", "ldlib=", "libc-extras-dir=", "target=", "root="]
278 # some global variables
283 include_default_lib_path = "yes"
284 default_lib_path = ["/lib/", "/usr/lib/", "/usr/X11R6/lib/"]
285 libc_extras_dir = "/usr/lib/libc_pic"
288 so_pattern = re.compile("((lib|ld).*)\.so(\..+)*")
289 script_pattern = re.compile("^#!\s*/")
292 optlist, proglist = getopt.getopt(sys.argv[1:], opts, longopts)
293 except getopt.GetoptError, msg:
294 print >> sys.stderr, msg
297 for opt, arg in optlist:
298 if opt in ("-v", "--verbose"):
299 if debuglevel < DEBUG_SPAM:
300 debuglevel = debuglevel + 1
302 lib_path.extend(string.split(arg, ":"))
303 elif opt in ("-d", "--dest-dir"):
305 elif opt in ("-D", "--no-default-lib"):
306 include_default_lib_path = "no"
307 elif opt == "--ldlib":
309 elif opt == "--libc-extras-dir":
310 libc_extras_dir = arg
311 elif opt == "--target":
314 elif opt in ("-r", "--root"):
316 elif opt in ("--help", "-h"):
319 elif opt in ("--version", "-V"):
323 print "WARNING: unknown option: " + opt + "\targ: " + arg
325 if include_default_lib_path == "yes":
326 lib_path.extend(default_lib_path)
329 ldlib = os.getenv("ldlib")
331 objects = {} # map from inode to filename
332 for prog in proglist:
333 inode = os.stat(prog)[ST_INO]
334 if objects.has_key(inode):
335 debug(DEBUG_SPAM, prog, "is a hardlink to", objects[inode])
336 elif so_pattern.match(prog):
337 debug(DEBUG_SPAM, prog, "is a library")
338 elif script_pattern.match(open(prog).read(256)):
339 debug(DEBUG_SPAM, prog, "is a script")
341 objects[inode] = prog
344 pattern = re.compile(".*Requesting program interpreter:.*/([^\]/]+).*")
345 for obj in objects.values():
346 output = command(target + "readelf", "--program-headers", obj)
348 match = pattern.match(x)
350 ldlib = match.group(1)
356 sys.exit("E: Dynamic linker not found, aborting.")
358 debug(DEBUG_NORMAL, "I: Using", ldlib, "as dynamic linker.")
360 pattern = re.compile(".*ld-uClibc.*");
361 if pattern.match(ldlib):
367 for obj in objects.values():
368 rpath_val = rpath(obj)
371 if debuglevel >= DEBUG_VERBOSE:
372 print "Adding rpath " + string.join(rpath_val, ":") + " for " + obj
373 lib_rpath.extend(rpath_val)
375 print "warning: " + obj + " may need rpath, but --root not specified"
377 lib_path.extend(lib_rpath)
380 previous_pass_unresolved = Set()
382 debug(DEBUG_NORMAL, "I: library reduction pass", `passnr`)
383 if debuglevel >= DEBUG_VERBOSE:
385 for obj in objects.values():
386 print obj[string.rfind(obj, '/') + 1:],
390 # Gather all already reduced libraries and treat them as objects as well
392 for lib in regexpfilter(os.listdir(dest_path), "(.*-so-stripped)$").elems():
393 obj = dest_path + "/" + lib
394 small_libs.append(obj)
395 inode = os.stat(obj)[ST_INO]
396 if objects.has_key(inode):
397 debug(DEBUG_SPAM, obj, "is hardlink to", objects[inode])
402 for obj in objects.values():
403 small_libs.append(obj)
404 debug(DEBUG_VERBOSE, "Object:", obj)
406 # calculate what symbols and libraries are needed
407 needed_symbols = Set() # Set of (name, weakness-flag)
409 for obj in objects.values():
410 needed_symbols.merge(undefined_symbols(obj))
411 libraries.merge(library_depends(obj))
413 # FIXME: on i386 this is undefined but not marked UND
414 # I don't know how to detect those symbols but this seems
415 # to be the only one and including it on alpha as well
416 # doesn't hurt. I guess all archs can live with this.
417 needed_symbols.add(("sys_siglist", 1))
419 # calculate what symbols are present in small_libs
420 present_symbols = Set()
421 for lib in small_libs:
422 present_symbols.merge(provided_symbols(lib))
426 present_symbols_elems = present_symbols.elems()
428 for (symbol, is_weak) in needed_symbols.elems():
429 if not symbol in present_symbols_elems:
430 debug(DEBUG_SPAM, "Still need:", symbol, `is_weak`)
431 unresolved.add((symbol, is_weak))
432 num_unresolved = num_unresolved + 1
434 debug (DEBUG_NORMAL, `needed_symbols.size()`, "symbols,",
435 `num_unresolved`, "unresolved")
437 if num_unresolved == 0:
440 if unresolved == previous_pass_unresolved:
441 # No progress in last pass. Verify all remaining symbols are weak.
442 for (symbol, is_weak) in unresolved.elems():
444 raise "Unresolvable symbol " + symbol
447 previous_pass_unresolved = unresolved
450 library_symbols_used = {}
453 # Calculate all symbols each library provides
454 for library in libraries.elems():
455 path = find_lib(library)
457 sys.exit("Library not found: " + library + " in path: "
458 + string.join(lib_path, " : "))
459 symbols = provided_symbols(path)
460 library_symbols[library] = Set()
461 library_symbols_used[library] = Set()
462 for symbol in symbols.elems():
463 if symbol_provider.has_key(symbol):
464 # in doubt, prefer symbols from libc
465 if re.match("^libc[\.-]", library):
466 library_symbols[library].add(symbol)
467 symbol_provider[symbol] = library
469 debug(DEBUG_SPAM, "duplicate symbol", symbol, "in",
470 symbol_provider[symbol], "and", library)
472 library_symbols[library].add(symbol)
473 symbol_provider[symbol] = library
475 # which symbols are actually used from each lib
476 for (symbol, is_weak) in needed_symbols.elems():
477 if not symbol_provider.has_key(symbol):
479 if not uclibc or (symbol != "main"):
480 raise "No library provides non-weak " + symbol
482 lib = symbol_provider[symbol]
483 library_symbols_used[lib].add(symbol)
486 for library in libraries.elems():
487 debug(DEBUG_VERBOSE, "reducing", library)
488 debug(DEBUG_SPAM, "using: " + string.join(library_symbols_used[library].elems()))
489 so_file = find_lib(library)
490 if root and (re.compile("^" + root).search(so_file)):
491 debug(DEBUG_VERBOSE, "no action required for " + so_file)
493 so_file_name = os.path.basename(so_file)
495 sys.exit("File not found:" + library)
496 pic_file = find_pic(library)
498 # No pic file, so we have to use the .so file, no reduction
499 debug(DEBUG_VERBOSE, "No pic file found for", so_file, "; copying")
500 command(target + "objcopy", "--strip-unneeded -R .note -R .comment",
501 so_file, dest_path + "/" + so_file_name + "-so-stripped")
503 # we have a pic file, recompile
504 debug(DEBUG_SPAM, "extracting from:", pic_file, "so_file:", so_file)
505 soname = extract_soname(so_file)
507 debug(DEBUG_VERBOSE, so_file, " has no soname, copying")
509 debug(DEBUG_SPAM, "soname:", soname)
510 base_name = so_pattern.match(library).group(1)
511 # libc needs its soinit.o and sofini.o as well as the pic
512 if (base_name == "libc") and not uclibc:
513 # force dso_handle.os to be included, otherwise reduced libc
514 # may segfault in ptmalloc_init due to undefined weak reference
515 extra_flags = find_lib(ldlib) + " -u __dso_handle"
516 extra_pre_obj = libc_extras_dir + "/soinit.o"
517 extra_post_obj = libc_extras_dir + "/sofini.o"
522 map_file = find_pic_map(library)
524 extra_flags = extra_flags + " -Wl,--version-script=" + map_file
525 if library_symbols_used[library].elems():
526 joined_symbols = "-u" + string.join(library_symbols_used[library].elems(), " -u")
529 # compile in only used symbols
530 command(target + "gcc",
531 "-nostdlib -nostartfiles -shared -Wl,-soname=" + soname,\
533 "-o", dest_path + "/" + so_file_name + "-so", \
538 "-lgcc -L", dest_path, \
539 "-L" + string.join(lib_path, " -L"), \
540 library_depends_gcc_libnames(so_file))
542 command(target + "objcopy", "--strip-unneeded -R .note -R .comment",
543 dest_path + "/" + so_file_name + "-so",
544 dest_path + "/" + so_file_name + "-so-stripped")
546 debug(DEBUG_VERBOSE, so_file, "\t", `os.stat(so_file)[ST_SIZE]`)
547 debug(DEBUG_VERBOSE, dest_path + "/" + so_file_name + "-so", "\t",
548 `os.stat(dest_path + "/" + so_file_name + "-so")[ST_SIZE]`)
549 debug(DEBUG_VERBOSE, dest_path + "/" + so_file_name + "-so-stripped",
550 "\t", `os.stat(dest_path + "/" + so_file_name + "-so-stripped")[ST_SIZE]`)
552 # Finalising libs and cleaning up
553 for lib in regexpfilter(os.listdir(dest_path), "(.*)-so-stripped$").elems():
554 os.rename(dest_path + "/" + lib + "-so-stripped", dest_path + "/" + lib)
555 for lib in regexpfilter(os.listdir(dest_path), "(.*-so)$").elems():
556 os.remove(dest_path + "/" + lib)
558 # Canonicalize library names.
559 for lib in regexpfilter(os.listdir(dest_path), "(.*so[.\d]*)$").elems():
560 this_lib_path = dest_path + "/" + lib
561 if os.path.islink(this_lib_path):
562 debug(DEBUG_VERBOSE, "Unlinking %s." % lib)
563 os.remove(this_lib_path)
565 soname = extract_soname(this_lib_path)
567 debug(DEBUG_VERBOSE, "Moving %s to %s." % (lib, soname))
568 os.rename(dest_path + "/" + lib, dest_path + "/" + soname)
570 # Make sure the dynamic linker is present and is executable
571 ld_file = find_lib(ldlib)
572 ld_file_name = os.path.basename(ld_file)
574 if not os.access(dest_path + "/" + ld_file_name, os.F_OK):
575 debug(DEBUG_NORMAL, "I: stripping and copying dynamic linker.")
576 command(target + "objcopy", "--strip-unneeded -R .note -R .comment",
577 ld_file, dest_path + "/" + ld_file_name)
579 os.chmod(dest_path + "/" + ld_file_name, 0755)