#!/usr/pkg/bin/python """routines for creating and dumping ELF files this is my plan for the future direction of postForth, to ensure unique hashes for every newly defined word in a dictionary, no matter how many there may be""" Copyright = """ elf -- routines for creating and dumping ELF files Copyright (C) 2005 John Comeau This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. """ errormessage = "Not all needed libraries found, upgrade or check path: " try: True # not defined in older Python releases except: True, False = 1, 0 try: import sys, os, types, re, pwd sys.path.append(os.path.join(pwd.getpwuid(os.geteuid())[5], 'lib', 'python')) errormessage = errormessage + repr(sys.path) from com.jcomeau import gpl, jclicense except: try: sys.stderr.write("%s\n" % errormessage) except: print errormessage raise # get name this program was called as myself = os.path.split(sys.argv[0])[1] command = os.path.splitext(myself)[0] # chop any suffix (extension) # now get name we gave it when we wrote it originalself = re.compile('[0-9A-Za-z]+').search(Copyright).group() # globals and routines that should be in every program # (yes, you could import them, but there are problems in that approach too) def DebugPrint(*whatever): return False # defined instead by pytest module, use that for debugging def join(*args): "for pythons without str.join" string, array = args if type(array) == types.StringType: array = eval(array) if hasattr(str, 'join'): return string.join(array) else: joined = '' for index in range(0, len(array)): joined = joined + array[index] if index != (len(array) - 1): joined = joined + string return joined def split(*args): "for pythons without str.split" string, string_to_split = args if not len(string): string = None if hasattr('str', 'split'): return string_to_split.split(string) else: return re.compile(re.escape(string)).split(string_to_split) # other globals, specific to this program import struct # 32-bit data types Elf32_Addr = ' 0: padding = '\0' * (length - (len(string) % length)) return string + padding def padlen(strlen, length): return len(pad('\0' * strlen, length)) def nopad(string, length): "nop version of pad, for 'commenting out' padding during testing" return string def nopadlen(strlen, length): "nop version of nopadlen, see nopad" return strlen def symtab_entry(*args): entry = elf_structure(symtab_entry_struct) return entry def elf_hash(symbol): """hash a symbol according to this implementation of the algorithm from the spec in pmft11.pdf: unsigned long elf_hash (const unsigned char *name) { unsigned long h = 0, g; while (*name) { h=(h << 4) + *name++; if (g = h & 0xf0000000) h ^= g >> 24; h &= ~g; } return h; } this will probably work with unicode characters above chr(127) also, but the standard doesn't allow that as far as I can see. """ DebugPrint('hashing', symbol) hash = 0 for byte in symbol: hash = ((hash << 4) & 0xffffffff) + ord(byte) high_nybble = hash & 0xf0000000L if high_nybble: hash ^= high_nybble >> 24 #DebugPrint('hash before ANDing with ~0x%x: 0x%x' % (high_nybble, hash)) hash &= ~high_nybble DebugPrint('hash value of "%s": 0x%x' % (symbol, hash)) return hash def section_header(*init): header = elf_structure(section_header_struct) if len(init): for key in init[0].keys(): header.field[key].default = [init[0][key]] return header def program_header(*init): header = elf_structure(program_header_struct) if len(init): for key in init[0].keys(): header.field[key].default = [init[0][key]] return header def dump_section_header(section_header): header = elf_structure(section_header_struct, section_header) return header def elf_header(): header = elf_structure(elf_header_struct) header.field['e_ehsize'].default = [struct.calcsize(header.packinfo)] header.field['e_phentsize'].default = [struct.calcsize( program_header().packinfo)] header.field['e_shentsize'].default = [struct.calcsize( section_header().packinfo)] return header def dump_elf_header(filename): file = open(filename) data = file.read() file.close() header = elf_structure(elf_header_struct, data) return header.getvalue() def dump_name(data, offset): return data[offset:data.index('\0', offset)] def dump_elf_file(filename): file = open(filename) data = file.read() file.close() file = elf_structure(elf_header_struct, data) file.data = data file.name_section_index = file.field['e_shstrndx'].rawvalue() if file.field['e_shoff']: file.section_headers = section_headers(file) for section_header in file.section_headers: section_type = section_header.field['sh_type'].rawvalue() print dump_name(file.data[file.names_offset:], section_header.field['sh_name'].rawvalue()), 'type: %d' % section_type if section_type == SHT_HASH: dump_hash_table(file, section_header) elif section_type == SHT_DYNSYM or section_type == SHT_SYMTAB: dump_symbol_table(file, section_header) else: dump_raw( file.data[section_header.field['sh_offset'].rawvalue():], section_header.field['sh_size'].rawvalue()) def unpack_integers(packed): return struct.unpack('<%dI' % (len(packed) / 4), packed) def section_headers(file): entries = [] headers = file.data[int(file.field['e_shoff'].rawvalue()):] sections = file.field['e_shnum'].rawvalue() section_header_entry_size = file.field['e_shentsize'].rawvalue() section_names = dump_section_header( headers[file.name_section_index * section_header_entry_size:]) file.names_offset = section_names.field['sh_offset'].rawvalue() for index in range(sections): section_header = dump_section_header( headers[index * section_header_entry_size:]) entries.append(section_header) return entries def dump_hash_table(file, section_header): print 'dumping hash table' table = file.data[section_header.field['sh_offset'].rawvalue():] linked_symbols = file.section_headers[ section_header.field['sh_link'].rawvalue()] dump_symbol_table(file, linked_symbols) size = section_header.field['sh_size'].rawvalue() file.hash_table = unpack_integers(table[0:size]) nbucket, nchain = file.hash_table[0], file.hash_table[1] buckets = file.hash_table[2:2 + nbucket] chains = file.hash_table[2 + nbucket:] print buckets, chains for entry in range(len(file.symbol_table)): string = dump_string(file.linked_strings, file.symbol_table[entry].field['st_name'].rawvalue()) print 'symbol table entry %d: "%s", 0x%x, 0x%x, %s' % (entry, string, file.symbol_table[entry].field['st_value'].rawvalue(), file.symbol_table[entry].field['st_size'].rawvalue(), dump_hash(file, string, buckets, chains)) def dump_hash(file, string, buckets, chains): hash = elf_hash(string) bucket = hash % len(buckets) chain = buckets[bucket] chain_list = '%d' % chain while True: chain_list += '->%d' % chains[chain] chain = chains[chain] if chain == 0: break return '0x%x -> %d: %s' % (hash, bucket, chain_list) def dump_symbol_table(file, section_header): symtab_entry_size = elf_structure(symtab_entry_struct).getsize() print 'symtab entry size: %d' % symtab_entry_size symbol_table = file.data[section_header.field['sh_offset'].rawvalue():] local_symbols = section_header.field['sh_info'].rawvalue() print 'local symbols: %d' % local_symbols size = section_header.field['sh_size'].rawvalue() file.symbol_table = [] for offset in range(0, len(symbol_table[0:size]), symtab_entry_size): file.symbol_table.append(elf_structure(symtab_entry_struct, symbol_table[offset:offset + symtab_entry_size])) linked_strings = file.section_headers[ section_header.field['sh_link'].rawvalue()] strings_size = linked_strings.field['sh_size'].rawvalue() file.linked_strings = file.data[linked_strings.field['sh_offset'].rawvalue():] file.linked_strings = file.linked_strings[0:strings_size] print 'linked strings: "%s"' % repr(file.linked_strings) for entry in range(len(file.symbol_table)): if entry < local_symbols: continue print 'symbol table entry %d: "%s", 0x%x, 0x%x' % (entry, dump_string(file.linked_strings, file.symbol_table[entry].field['st_name'].rawvalue()), file.symbol_table[entry].field['st_value'].rawvalue(), file.symbol_table[entry].field['st_size'].rawvalue()) def dump_string(string_table, offset): return string_table[offset:string_table.index('\0', offset)] def dump_raw(data, size): print 'dumping raw data' print repr(data[0:size]) def symtab(): return [symtab_entry()] def dump_symtab(*args): print 'dumping symbol table:' def create_elf_file(objectfile, elffile): elf = elf_file() elf.text = open(objectfile).read().rstrip(chr(0x90)) # strip trailing NOPs elffile = open(elffile, 'wb') elffile.write(elf.getvalue()) elffile.close() return True def elf(): """default entry point, make symlinks to other entry points as needed""" print __doc__ if __name__ == '__main__': # if this program was imported by another, the above test will fail, # and this following code won't be used... function = command; args = sys.argv[1:] # default case if command == originalself: try: if len(args) and eval('type(%s) == types.FunctionType' % args[0]): function = sys.argv[1]; args = sys.argv[2:] except: pass print eval('%s%s' % (function, repr(tuple(args)))) or '' else: # if you want something to be done on import, do it here; otherwise pass pass