elf.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. # SPDX-License-Identifier: GPL-2.0+
  2. # Copyright (c) 2016 Google, Inc
  3. # Written by Simon Glass <sjg@chromium.org>
  4. #
  5. # Handle various things related to ELF images
  6. #
  7. from collections import namedtuple, OrderedDict
  8. import io
  9. import os
  10. import re
  11. import shutil
  12. import struct
  13. import tempfile
  14. from u_boot_pylib import command
  15. from u_boot_pylib import tools
  16. from u_boot_pylib import tout
  17. ELF_TOOLS = True
  18. try:
  19. from elftools.elf.elffile import ELFFile
  20. from elftools.elf.elffile import ELFError
  21. from elftools.elf.sections import SymbolTableSection
  22. except: # pragma: no cover
  23. ELF_TOOLS = False
  24. # BSYM in little endian, keep in sync with include/binman_sym.h
  25. BINMAN_SYM_MAGIC_VALUE = 0x4d595342
  26. # Information about an EFL symbol:
  27. # section (str): Name of the section containing this symbol
  28. # address (int): Address of the symbol (its value)
  29. # size (int): Size of the symbol in bytes
  30. # weak (bool): True if the symbol is weak
  31. # offset (int or None): Offset of the symbol's data in the ELF file, or None if
  32. # not known
  33. Symbol = namedtuple('Symbol', ['section', 'address', 'size', 'weak', 'offset'])
  34. # Information about an ELF file:
  35. # data: Extracted program contents of ELF file (this would be loaded by an
  36. # ELF loader when reading this file
  37. # load: Load address of code
  38. # entry: Entry address of code
  39. # memsize: Number of bytes in memory occupied by loading this ELF file
  40. ElfInfo = namedtuple('ElfInfo', ['data', 'load', 'entry', 'memsize'])
  41. def GetSymbols(fname, patterns):
  42. """Get the symbols from an ELF file
  43. Args:
  44. fname: Filename of the ELF file to read
  45. patterns: List of regex patterns to search for, each a string
  46. Returns:
  47. None, if the file does not exist, or Dict:
  48. key: Name of symbol
  49. value: Hex value of symbol
  50. """
  51. stdout = tools.run('objdump', '-t', fname)
  52. lines = stdout.splitlines()
  53. if patterns:
  54. re_syms = re.compile('|'.join(patterns))
  55. else:
  56. re_syms = None
  57. syms = {}
  58. syms_started = False
  59. for line in lines:
  60. if not line or not syms_started:
  61. if 'SYMBOL TABLE' in line:
  62. syms_started = True
  63. line = None # Otherwise code coverage complains about 'continue'
  64. continue
  65. if re_syms and not re_syms.search(line):
  66. continue
  67. space_pos = line.find(' ')
  68. value, rest = line[:space_pos], line[space_pos + 1:]
  69. flags = rest[:7]
  70. parts = rest[7:].split()
  71. section, size = parts[:2]
  72. if len(parts) > 2:
  73. name = parts[2] if parts[2] != '.hidden' else parts[3]
  74. syms[name] = Symbol(section, int(value, 16), int(size, 16),
  75. flags[1] == 'w', None)
  76. # Sort dict by address
  77. return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
  78. def _GetFileOffset(elf, addr):
  79. """Get the file offset for an address
  80. Args:
  81. elf (ELFFile): ELF file to check
  82. addr (int): Address to search for
  83. Returns
  84. int: Offset of that address in the ELF file, or None if not valid
  85. """
  86. for seg in elf.iter_segments():
  87. seg_end = seg['p_vaddr'] + seg['p_filesz']
  88. if seg.header['p_type'] == 'PT_LOAD':
  89. if addr >= seg['p_vaddr'] and addr < seg_end:
  90. return addr - seg['p_vaddr'] + seg['p_offset']
  91. def GetFileOffset(fname, addr):
  92. """Get the file offset for an address
  93. Args:
  94. fname (str): Filename of ELF file to check
  95. addr (int): Address to search for
  96. Returns
  97. int: Offset of that address in the ELF file, or None if not valid
  98. """
  99. if not ELF_TOOLS:
  100. raise ValueError("Python: No module named 'elftools'")
  101. with open(fname, 'rb') as fd:
  102. elf = ELFFile(fd)
  103. return _GetFileOffset(elf, addr)
  104. def GetSymbolFromAddress(fname, addr):
  105. """Get the symbol at a particular address
  106. Args:
  107. fname (str): Filename of ELF file to check
  108. addr (int): Address to search for
  109. Returns:
  110. str: Symbol name, or None if no symbol at that address
  111. """
  112. if not ELF_TOOLS:
  113. raise ValueError("Python: No module named 'elftools'")
  114. with open(fname, 'rb') as fd:
  115. elf = ELFFile(fd)
  116. syms = GetSymbols(fname, None)
  117. for name, sym in syms.items():
  118. if sym.address == addr:
  119. return name
  120. def GetSymbolFileOffset(fname, patterns):
  121. """Get the symbols from an ELF file
  122. Args:
  123. fname: Filename of the ELF file to read
  124. patterns: List of regex patterns to search for, each a string
  125. Returns:
  126. None, if the file does not exist, or Dict:
  127. key: Name of symbol
  128. value: Hex value of symbol
  129. """
  130. if not ELF_TOOLS:
  131. raise ValueError("Python: No module named 'elftools'")
  132. syms = {}
  133. with open(fname, 'rb') as fd:
  134. elf = ELFFile(fd)
  135. re_syms = re.compile('|'.join(patterns))
  136. for section in elf.iter_sections():
  137. if isinstance(section, SymbolTableSection):
  138. for symbol in section.iter_symbols():
  139. if not re_syms or re_syms.search(symbol.name):
  140. addr = symbol.entry['st_value']
  141. syms[symbol.name] = Symbol(
  142. section.name, addr, symbol.entry['st_size'],
  143. symbol.entry['st_info']['bind'] == 'STB_WEAK',
  144. _GetFileOffset(elf, addr))
  145. # Sort dict by address
  146. return OrderedDict(sorted(syms.items(), key=lambda x: x[1].address))
  147. def GetSymbolAddress(fname, sym_name):
  148. """Get a value of a symbol from an ELF file
  149. Args:
  150. fname: Filename of the ELF file to read
  151. patterns: List of regex patterns to search for, each a string
  152. Returns:
  153. Symbol value (as an integer) or None if not found
  154. """
  155. syms = GetSymbols(fname, [sym_name])
  156. sym = syms.get(sym_name)
  157. if not sym:
  158. return None
  159. return sym.address
  160. def GetPackString(sym, msg):
  161. """Get the struct.pack/unpack string to use with a given symbol
  162. Args:
  163. sym (Symbol): Symbol to check. Only the size member is checked
  164. @msg (str): String which indicates the entry being processed, used for
  165. errors
  166. Returns:
  167. str: struct string to use, .e.g. '<I'
  168. Raises:
  169. ValueError: Symbol has an unexpected size
  170. """
  171. if sym.size == 4:
  172. return '<I'
  173. elif sym.size == 8:
  174. return '<Q'
  175. else:
  176. raise ValueError('%s has size %d: only 4 and 8 are supported' %
  177. (msg, sym.size))
  178. def GetSymbolOffset(elf_fname, sym_name, base_sym=None):
  179. """Read the offset of a symbol compared to base symbol
  180. This is useful for obtaining the value of a single symbol relative to the
  181. base of a binary blob.
  182. Args:
  183. elf_fname: Filename of the ELF file to read
  184. sym_name (str): Name of symbol to read
  185. base_sym (str): Base symbol to sue to calculate the offset (or None to
  186. use '__image_copy_start'
  187. Returns:
  188. int: Offset of the symbol relative to the base symbol
  189. """
  190. if not base_sym:
  191. base_sym = '__image_copy_start'
  192. fname = tools.get_input_filename(elf_fname)
  193. syms = GetSymbols(fname, [base_sym, sym_name])
  194. base = syms[base_sym].address
  195. val = syms[sym_name].address
  196. return val - base
  197. def LookupAndWriteSymbols(elf_fname, entry, section, is_elf=False,
  198. base_sym=None):
  199. """Replace all symbols in an entry with their correct values
  200. The entry contents is updated so that values for referenced symbols will be
  201. visible at run time. This is done by finding out the symbols offsets in the
  202. entry (using the ELF file) and replacing them with values from binman's data
  203. structures.
  204. Args:
  205. elf_fname: Filename of ELF image containing the symbol information for
  206. entry
  207. entry: Entry to process
  208. section: Section which can be used to lookup symbol values
  209. base_sym: Base symbol marking the start of the image
  210. Returns:
  211. int: Number of symbols written
  212. """
  213. if not base_sym:
  214. base_sym = '__image_copy_start'
  215. fname = tools.get_input_filename(elf_fname)
  216. syms = GetSymbols(fname, ['image', 'binman'])
  217. if is_elf:
  218. if not ELF_TOOLS:
  219. msg = ("Section '%s': entry '%s'" %
  220. (section.GetPath(), entry.GetPath()))
  221. raise ValueError(f'{msg}: Cannot write symbols to an ELF file without Python elftools')
  222. new_syms = {}
  223. with open(fname, 'rb') as fd:
  224. elf = ELFFile(fd)
  225. for name, sym in syms.items():
  226. offset = _GetFileOffset(elf, sym.address)
  227. new_syms[name] = Symbol(sym.section, sym.address, sym.size,
  228. sym.weak, offset)
  229. syms = new_syms
  230. if not syms:
  231. tout.debug('LookupAndWriteSymbols: no syms')
  232. return 0
  233. base = syms.get(base_sym)
  234. if not base and not is_elf:
  235. tout.debug('LookupAndWriteSymbols: no base')
  236. return 0
  237. base_addr = 0 if is_elf else base.address
  238. count = 0
  239. for name, sym in syms.items():
  240. if name.startswith('_binman'):
  241. msg = ("Section '%s': Symbol '%s'\n in entry '%s'" %
  242. (section.GetPath(), name, entry.GetPath()))
  243. if is_elf:
  244. # For ELF files, use the file offset
  245. offset = sym.offset
  246. else:
  247. # For blobs use the offset of the symbol, calculated by
  248. # subtracting the base address which by definition is at the
  249. # start
  250. offset = sym.address - base.address
  251. if offset < 0 or offset + sym.size > entry.contents_size:
  252. raise ValueError('%s has offset %x (size %x) but the contents '
  253. 'size is %x' % (entry.GetPath(), offset,
  254. sym.size,
  255. entry.contents_size))
  256. pack_string = GetPackString(sym, msg)
  257. if name == '_binman_sym_magic':
  258. value = BINMAN_SYM_MAGIC_VALUE
  259. else:
  260. # Look up the symbol in our entry tables.
  261. value = section.GetImage().LookupImageSymbol(name, sym.weak,
  262. msg, base_addr)
  263. if value is None:
  264. value = -1
  265. pack_string = pack_string.lower()
  266. value_bytes = struct.pack(pack_string, value)
  267. tout.debug('%s:\n insert %s, offset %x, value %x, length %d' %
  268. (msg, name, offset, value, len(value_bytes)))
  269. entry.data = (entry.data[:offset] + value_bytes +
  270. entry.data[offset + sym.size:])
  271. count += 1
  272. if count:
  273. tout.detail(
  274. f"Section '{section.GetPath()}': entry '{entry.GetPath()}' : {count} symbols")
  275. return count
  276. def GetSymbolValue(sym, data, msg):
  277. """Get the value of a symbol
  278. This can only be used on symbols with an integer value.
  279. Args:
  280. sym (Symbol): Symbol to check
  281. data (butes): Data for the ELF file - the symbol data appears at offset
  282. sym.offset
  283. @msg (str): String which indicates the entry being processed, used for
  284. errors
  285. Returns:
  286. int: Value of the symbol
  287. Raises:
  288. ValueError: Symbol has an unexpected size
  289. """
  290. pack_string = GetPackString(sym, msg)
  291. value = struct.unpack(pack_string, data[sym.offset:sym.offset + sym.size])
  292. return value[0]
  293. def MakeElf(elf_fname, text, data):
  294. """Make an elf file with the given data in a single section
  295. The output file has a several section including '.text' and '.data',
  296. containing the info provided in arguments.
  297. Args:
  298. elf_fname: Output filename
  299. text: Text (code) to put in the file's .text section
  300. data: Data to put in the file's .data section
  301. """
  302. outdir = tempfile.mkdtemp(prefix='binman.elf.')
  303. s_file = os.path.join(outdir, 'elf.S')
  304. # Spilt the text into two parts so that we can make the entry point two
  305. # bytes after the start of the text section
  306. text_bytes1 = ['\t.byte\t%#x' % byte for byte in text[:2]]
  307. text_bytes2 = ['\t.byte\t%#x' % byte for byte in text[2:]]
  308. data_bytes = ['\t.byte\t%#x' % byte for byte in data]
  309. with open(s_file, 'w') as fd:
  310. print('''/* Auto-generated C program to produce an ELF file for testing */
  311. .section .text
  312. .code32
  313. .globl _start
  314. .type _start, @function
  315. %s
  316. _start:
  317. %s
  318. .ident "comment"
  319. .comm fred,8,4
  320. .section .empty
  321. .globl _empty
  322. _empty:
  323. .byte 1
  324. .globl ernie
  325. .data
  326. .type ernie, @object
  327. .size ernie, 4
  328. ernie:
  329. %s
  330. ''' % ('\n'.join(text_bytes1), '\n'.join(text_bytes2), '\n'.join(data_bytes)),
  331. file=fd)
  332. lds_file = os.path.join(outdir, 'elf.lds')
  333. # Use a linker script to set the alignment and text address.
  334. with open(lds_file, 'w') as fd:
  335. print('''/* Auto-generated linker script to produce an ELF file for testing */
  336. PHDRS
  337. {
  338. text PT_LOAD ;
  339. data PT_LOAD ;
  340. empty PT_LOAD FLAGS ( 6 ) ;
  341. note PT_NOTE ;
  342. }
  343. SECTIONS
  344. {
  345. . = 0xfef20000;
  346. ENTRY(_start)
  347. .text . : SUBALIGN(0)
  348. {
  349. *(.text)
  350. } :text
  351. .data : {
  352. *(.data)
  353. } :data
  354. _bss_start = .;
  355. .empty : {
  356. *(.empty)
  357. } :empty
  358. /DISCARD/ : {
  359. *(.note.gnu.property)
  360. }
  361. .note : {
  362. *(.comment)
  363. } :note
  364. .bss _bss_start (OVERLAY) : {
  365. *(.bss)
  366. }
  367. }
  368. ''', file=fd)
  369. # -static: Avoid requiring any shared libraries
  370. # -nostdlib: Don't link with C library
  371. # -Wl,--build-id=none: Don't generate a build ID, so that we just get the
  372. # text section at the start
  373. # -m32: Build for 32-bit x86
  374. # -T...: Specifies the link script, which sets the start address
  375. cc, args = tools.get_target_compile_tool('cc')
  376. args += ['-static', '-nostdlib', '-Wl,--build-id=none', '-m32', '-T',
  377. lds_file, '-o', elf_fname, s_file]
  378. stdout = command.output(cc, *args)
  379. shutil.rmtree(outdir)
  380. def DecodeElf(data, location):
  381. """Decode an ELF file and return information about it
  382. Args:
  383. data: Data from ELF file
  384. location: Start address of data to return
  385. Returns:
  386. ElfInfo object containing information about the decoded ELF file
  387. """
  388. if not ELF_TOOLS:
  389. raise ValueError("Python: No module named 'elftools'")
  390. file_size = len(data)
  391. with io.BytesIO(data) as fd:
  392. elf = ELFFile(fd)
  393. data_start = 0xffffffff
  394. data_end = 0
  395. mem_end = 0
  396. virt_to_phys = 0
  397. for i in range(elf.num_segments()):
  398. segment = elf.get_segment(i)
  399. if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
  400. skipped = 1 # To make code-coverage see this line
  401. continue
  402. start = segment['p_paddr']
  403. mend = start + segment['p_memsz']
  404. rend = start + segment['p_filesz']
  405. data_start = min(data_start, start)
  406. data_end = max(data_end, rend)
  407. mem_end = max(mem_end, mend)
  408. if not virt_to_phys:
  409. virt_to_phys = segment['p_paddr'] - segment['p_vaddr']
  410. output = bytearray(data_end - data_start)
  411. for i in range(elf.num_segments()):
  412. segment = elf.get_segment(i)
  413. if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
  414. skipped = 1 # To make code-coverage see this line
  415. continue
  416. start = segment['p_paddr']
  417. offset = 0
  418. if start < location:
  419. offset = location - start
  420. start = location
  421. # A legal ELF file can have a program header with non-zero length
  422. # but zero-length file size and a non-zero offset which, added
  423. # together, are greater than input->size (i.e. the total file size).
  424. # So we need to not even test in the case that p_filesz is zero.
  425. # Note: All of this code is commented out since we don't have a test
  426. # case for it.
  427. size = segment['p_filesz']
  428. #if not size:
  429. #continue
  430. #end = segment['p_offset'] + segment['p_filesz']
  431. #if end > file_size:
  432. #raise ValueError('Underflow copying out the segment. File has %#x bytes left, segment end is %#x\n',
  433. #file_size, end)
  434. output[start - data_start:start - data_start + size] = (
  435. segment.data()[offset:])
  436. return ElfInfo(output, data_start, elf.header['e_entry'] + virt_to_phys,
  437. mem_end - data_start)
  438. def UpdateFile(infile, outfile, start_sym, end_sym, insert):
  439. tout.notice("Creating file '%s' with data length %#x (%d) between symbols '%s' and '%s'" %
  440. (outfile, len(insert), len(insert), start_sym, end_sym))
  441. syms = GetSymbolFileOffset(infile, [start_sym, end_sym])
  442. if len(syms) != 2:
  443. raise ValueError("Expected two symbols '%s' and '%s': got %d: %s" %
  444. (start_sym, end_sym, len(syms),
  445. ','.join(syms.keys())))
  446. size = syms[end_sym].offset - syms[start_sym].offset
  447. if len(insert) > size:
  448. raise ValueError("Not enough space in '%s' for data length %#x (%d); size is %#x (%d)" %
  449. (infile, len(insert), len(insert), size, size))
  450. data = tools.read_file(infile)
  451. newdata = data[:syms[start_sym].offset]
  452. newdata += insert + tools.get_bytes(0, size - len(insert))
  453. newdata += data[syms[end_sym].offset:]
  454. tools.write_file(outfile, newdata)
  455. tout.info('Written to offset %#x' % syms[start_sym].offset)
  456. def read_loadable_segments(data):
  457. """Read segments from an ELF file
  458. Args:
  459. data (bytes): Contents of file
  460. Returns:
  461. tuple:
  462. list of segments, each:
  463. int: Segment number (0 = first)
  464. int: Start address of segment in memory
  465. bytes: Contents of segment
  466. int: entry address for image
  467. Raises:
  468. ValueError: elftools is not available
  469. """
  470. if not ELF_TOOLS:
  471. raise ValueError("Python: No module named 'elftools'")
  472. with io.BytesIO(data) as inf:
  473. try:
  474. elf = ELFFile(inf)
  475. except ELFError as err:
  476. raise ValueError(err)
  477. entry = elf.header['e_entry']
  478. segments = []
  479. for i in range(elf.num_segments()):
  480. segment = elf.get_segment(i)
  481. if segment['p_type'] != 'PT_LOAD' or not segment['p_memsz']:
  482. skipped = 1 # To make code-coverage see this line
  483. continue
  484. start = segment['p_offset']
  485. rend = start + segment['p_filesz']
  486. segments.append((i, segment['p_paddr'], data[start:rend]))
  487. return segments, entry
  488. def is_valid(data):
  489. """Check if some binary data is a valid ELF file
  490. Args:
  491. data (bytes): Bytes to check
  492. Returns:
  493. bool: True if a valid Elf file, False if not
  494. """
  495. try:
  496. DecodeElf(data, 0)
  497. return True
  498. except ELFError:
  499. return False