llvm-mirror/utils/extract-section.py

#!/usr/bin/env python
from __future__ import print_function
'''
Helper script to print out the raw content of an ELF section.
Example usages:
```
# print out as bits by default
extract-section.py .text --input-file=foo.o
```
```
# read from stdin and print out in hex
cat foo.o | extract-section.py -h .text
```
This is merely a wrapper around `llvm-readobj` that focuses on the binary
content as well as providing more formatting options.
'''

# Unfortunately reading binary from stdin is not so trivial in Python...
def read_raw_stdin():
    import sys
    if sys.version_info >= (3, 0):
        reading_source = sys.stdin.buffer
    else:
        # Windows will always read as string so we need some
        # special handling
        if sys.platform == 'win32':
            import os, msvcrt
            msvcrt.setformat(sys.stdin.fileno(), os.O_BINARY)
        reading_source = sys.stdin
    return reading_source.read()

def get_raw_section_dump(readobj_path, section_name, input_file):
    import subprocess
    cmd = [readobj_path, '-elf-output-style=GNU', '--hex-dump={}'.format(section_name),
            input_file]
    proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)

    if input_file == '-':
        # From stdin
        out,_ = proc.communicate(input=read_raw_stdin())
    else:
        out,_ = proc.communicate()

    return out.decode('utf-8') if type(out) is not str else out

if __name__ == '__main__':
    import argparse
    # The default '-h' (--help) will conflict with our '-h' (hex) format
    arg_parser = argparse.ArgumentParser(add_help=False)
    arg_parser.add_argument('--readobj-path', metavar='<executable path>', type=str,
            help='Path to llvm-readobj')
    arg_parser.add_argument('--input-file', metavar='<file>', type=str,
            help='Input object file, or \'-\' to read from stdin')
    arg_parser.add_argument('section', metavar='<name>', type=str,
            help='Name of the section to extract')
    # Output format
    format_group = arg_parser.add_mutually_exclusive_group()
    format_group.add_argument('-b', dest='format', action='store_const', const='bits',
            help='Print out in bits')
    arg_parser.add_argument('--byte-indicator', action='store_true',
            help='Whether to print a \'.\' every 8 bits in bits printing mode')
    format_group.add_argument('-h', dest='format', action='store_const', const='hex',
            help='Print out in hexadecimal')
    arg_parser.add_argument('--hex-width', metavar='<# of bytes>', type=int,
            help='The width (in byte) of every element in hex printing mode')

    arg_parser.add_argument('--help', action='help')
    arg_parser.set_defaults(format='bits', tool_path='llvm-readobj', input_file='-',
            byte_indicator=False, hex_width=4)
    args = arg_parser.parse_args()

    raw_section = get_raw_section_dump(args.tool_path, args.section, args.input_file)

    results = []
    for line in raw_section.splitlines(False):
        if line.startswith('Hex dump'):
            continue
        parts = line.strip().split(' ')[1:]
        for part in parts[:4]:
            # exclude any non-hex dump string
            try:
                val = int(part, 16)
                if args.format == 'bits':
                    # divided into bytes first
                    for byte in [(val >> off) & 0xFF for off in (24,16,8,0)]:
                        for bit in [(byte >> off) & 1 for off in range(7, -1, -1)]:
                            results.append(str(bit))
                        if args.byte_indicator:
                            results.append('.')
                elif args.format == 'hex':
                    assert args.hex_width <= 4 and args.hex_width > 0
                    width_bits = args.hex_width * 8
                    offsets = [off for off in range(32 - width_bits, -1, -width_bits)]
                    mask = (1 << width_bits) - 1
                    format_str = "{:0" + str(args.hex_width * 2) + "x}"
                    for word in [(val >> i) & mask for i in offsets]:
                        results.append(format_str.format(word))
            except:
                break
    print(' '.join(results), end='')
[M68k][test](6/8) Add all of the tests And a small utilities -- extract-section.py -- that helps extracting specific object file section and printing in textual format. This utility is just a workaround for tests inside `Encoding`. Hopefully in the future we can replace dependencies in those tests with existing tools (e.g. llvm-readobj). Please refer to this bug for more context: https://bugs.llvm.org/show_bug.cgi?id=49245 Note that since we don't have AsmParser for now, we are testing the MC part using MIR as input and put those tests under the `Encoding` folder. In the future when AsmParser (and disassembler) is finished, those tests will be moved to `test/MC/M68k`. Authors: myhsu, m4yers, glaubitz Differential Revision: https://reviews.llvm.org/D88392 2021-03-08 01:32:37 +01:00			`#!/usr/bin/env python`
			`from __future__ import print_function`
			`'''`
			`Helper script to print out the raw content of an ELF section.`
			`Example usages:`
			```
			`# print out as bits by default`
			`extract-section.py .text --input-file=foo.o`
			```
			```
			`# read from stdin and print out in hex`
			`cat foo.o \| extract-section.py -h .text`
			```
			This is merely a wrapper around `llvm-readobj` that focuses on the binary
			`content as well as providing more formatting options.`
			`'''`

			`# Unfortunately reading binary from stdin is not so trivial in Python...`
			`def read_raw_stdin():`
			`import sys`
			`if sys.version_info >= (3, 0):`
			`reading_source = sys.stdin.buffer`
			`else:`
			`# Windows will always read as string so we need some`
			`# special handling`
			`if sys.platform == 'win32':`
			`import os, msvcrt`
			`msvcrt.setformat(sys.stdin.fileno(), os.O_BINARY)`
			`reading_source = sys.stdin`
			`return reading_source.read()`

			`def get_raw_section_dump(readobj_path, section_name, input_file):`
			`import subprocess`
			`cmd = [readobj_path, '-elf-output-style=GNU', '--hex-dump={}'.format(section_name),`
			`input_file]`
[M68k] Fix extract-section.py under Python 3 read_raw_stdin() was opening a file in binary mode, but Popen was being told to use text mode (universal_newlines). This is benign on Python 2 but an error on Python 3. Differential Revision: https://reviews.llvm.org/D98428 2021-03-14 19:36:57 +01:00			`proc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE)`
[M68k][test](6/8) Add all of the tests And a small utilities -- extract-section.py -- that helps extracting specific object file section and printing in textual format. This utility is just a workaround for tests inside `Encoding`. Hopefully in the future we can replace dependencies in those tests with existing tools (e.g. llvm-readobj). Please refer to this bug for more context: https://bugs.llvm.org/show_bug.cgi?id=49245 Note that since we don't have AsmParser for now, we are testing the MC part using MIR as input and put those tests under the `Encoding` folder. In the future when AsmParser (and disassembler) is finished, those tests will be moved to `test/MC/M68k`. Authors: myhsu, m4yers, glaubitz Differential Revision: https://reviews.llvm.org/D88392 2021-03-08 01:32:37 +01:00
			`if input_file == '-':`
			`# From stdin`
			`out,_ = proc.communicate(input=read_raw_stdin())`
			`else:`
			`out,_ = proc.communicate()`

			`return out.decode('utf-8') if type(out) is not str else out`

			`if __name__ == '__main__':`
			`import argparse`
			`# The default '-h' (--help) will conflict with our '-h' (hex) format`
			`arg_parser = argparse.ArgumentParser(add_help=False)`
			`arg_parser.add_argument('--readobj-path', metavar='<executable path>', type=str,`
			`help='Path to llvm-readobj')`
			`arg_parser.add_argument('--input-file', metavar='<file>', type=str,`
			`help='Input object file, or \'-\' to read from stdin')`
			`arg_parser.add_argument('section', metavar='<name>', type=str,`
			`help='Name of the section to extract')`
			`# Output format`
			`format_group = arg_parser.add_mutually_exclusive_group()`
			`format_group.add_argument('-b', dest='format', action='store_const', const='bits',`
			`help='Print out in bits')`
			`arg_parser.add_argument('--byte-indicator', action='store_true',`
			`help='Whether to print a \'.\' every 8 bits in bits printing mode')`
			`format_group.add_argument('-h', dest='format', action='store_const', const='hex',`
			`help='Print out in hexadecimal')`
			`arg_parser.add_argument('--hex-width', metavar='<# of bytes>', type=int,`
			`help='The width (in byte) of every element in hex printing mode')`

			`arg_parser.add_argument('--help', action='help')`
			`arg_parser.set_defaults(format='bits', tool_path='llvm-readobj', input_file='-',`
			`byte_indicator=False, hex_width=4)`
			`args = arg_parser.parse_args()`

			`raw_section = get_raw_section_dump(args.tool_path, args.section, args.input_file)`

			`results = []`
			`for line in raw_section.splitlines(False):`
			`if line.startswith('Hex dump'):`
			`continue`
			`parts = line.strip().split(' ')[1:]`
			`for part in parts[:4]:`
			`# exclude any non-hex dump string`
			`try:`
			`val = int(part, 16)`
			`if args.format == 'bits':`
			`# divided into bytes first`
			`for byte in [(val >> off) & 0xFF for off in (24,16,8,0)]:`
			`for bit in [(byte >> off) & 1 for off in range(7, -1, -1)]:`
			`results.append(str(bit))`
			`if args.byte_indicator:`
			`results.append('.')`
			`elif args.format == 'hex':`
			`assert args.hex_width <= 4 and args.hex_width > 0`
			`width_bits = args.hex_width * 8`
			`offsets = [off for off in range(32 - width_bits, -1, -width_bits)]`
			`mask = (1 << width_bits) - 1`
			`format_str = "{:0" + str(args.hex_width * 2) + "x}"`
			`for word in [(val >> i) & mask for i in offsets]:`
			`results.append(format_str.format(word))`
			`except:`
			`break`
			`print(' '.join(results), end='')`