2017-06-07 05:48:56 +02:00
|
|
|
//===- llvm/BinaryFormat/Magic.cpp - File magic identification --*- C++ -*-===//
|
|
|
|
//
|
2019-01-19 09:50:56 +01:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2017-06-07 05:48:56 +02:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "llvm/BinaryFormat/Magic.h"
|
2020-06-08 17:37:42 +02:00
|
|
|
#include "llvm/ADT/StringRef.h"
|
|
|
|
#include "llvm/ADT/Twine.h"
|
2017-06-07 05:48:56 +02:00
|
|
|
#include "llvm/BinaryFormat/COFF.h"
|
|
|
|
#include "llvm/BinaryFormat/ELF.h"
|
|
|
|
#include "llvm/BinaryFormat/MachO.h"
|
|
|
|
#include "llvm/Support/Endian.h"
|
|
|
|
#include "llvm/Support/FileSystem.h"
|
2018-03-08 20:45:20 +01:00
|
|
|
#include "llvm/Support/MemoryBuffer.h"
|
2017-06-07 05:48:56 +02:00
|
|
|
|
|
|
|
#if !defined(_MSC_VER) && !defined(__MINGW32__)
|
|
|
|
#include <unistd.h>
|
|
|
|
#else
|
|
|
|
#include <io.h>
|
|
|
|
#endif
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
using namespace llvm::support::endian;
|
|
|
|
using namespace llvm::sys::fs;
|
|
|
|
|
|
|
|
template <size_t N>
|
|
|
|
static bool startswith(StringRef Magic, const char (&S)[N]) {
|
|
|
|
return Magic.startswith(StringRef(S, N - 1));
|
|
|
|
}
|
|
|
|
|
2018-05-01 18:10:38 +02:00
|
|
|
/// Identify the magic in magic.
|
2017-06-07 05:48:56 +02:00
|
|
|
file_magic llvm::identify_magic(StringRef Magic) {
|
|
|
|
if (Magic.size() < 4)
|
|
|
|
return file_magic::unknown;
|
|
|
|
switch ((unsigned char)Magic[0]) {
|
|
|
|
case 0x00: {
|
|
|
|
// COFF bigobj, CL.exe's LTO object file, or short import library file
|
|
|
|
if (startswith(Magic, "\0\0\xFF\xFF")) {
|
|
|
|
size_t MinSize =
|
|
|
|
offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic);
|
|
|
|
if (Magic.size() < MinSize)
|
|
|
|
return file_magic::coff_import_library;
|
|
|
|
|
|
|
|
const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
|
|
|
|
if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
|
|
|
|
return file_magic::coff_object;
|
|
|
|
if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
|
|
|
|
return file_magic::coff_cl_gl_object;
|
|
|
|
return file_magic::coff_import_library;
|
|
|
|
}
|
|
|
|
// Windows resource file
|
2017-07-08 05:06:10 +02:00
|
|
|
if (Magic.size() >= sizeof(COFF::WinResMagic) &&
|
|
|
|
memcmp(Magic.data(), COFF::WinResMagic, sizeof(COFF::WinResMagic)) == 0)
|
2017-06-07 05:48:56 +02:00
|
|
|
return file_magic::windows_resource;
|
|
|
|
// 0x0000 = COFF unknown machine type
|
|
|
|
if (Magic[1] == 0)
|
|
|
|
return file_magic::coff_object;
|
|
|
|
if (startswith(Magic, "\0asm"))
|
|
|
|
return file_magic::wasm_object;
|
|
|
|
break;
|
|
|
|
}
|
[XCOFF] Add functionality for parsing AIX XCOFF object file headers
Summary:
1. Add functionality for parsing AIX XCOFF object files headers.
2. Only support 32-bit AIX XCOFF object files in this patch.
3. Print out the AIX XCOFF object file header in YAML format.
Reviewers: sfertile, hubert.reinterpretcast, jasonliu, mstorsjo, zturner, rnk
Reviewed By: sfertile, hubert.reinterpretcast
Subscribers: jsji, mgorny, hiraditya, jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59419
Patch by Digger Lin
llvm-svn: 357663
2019-04-04 02:53:21 +02:00
|
|
|
|
|
|
|
case 0x01:
|
|
|
|
// XCOFF format
|
|
|
|
if (startswith(Magic, "\x01\xDF"))
|
|
|
|
return file_magic::xcoff_object_32;
|
2019-07-09 20:09:11 +02:00
|
|
|
if (startswith(Magic, "\x01\xF7"))
|
|
|
|
return file_magic::xcoff_object_64;
|
[XCOFF] Add functionality for parsing AIX XCOFF object file headers
Summary:
1. Add functionality for parsing AIX XCOFF object files headers.
2. Only support 32-bit AIX XCOFF object files in this patch.
3. Print out the AIX XCOFF object file header in YAML format.
Reviewers: sfertile, hubert.reinterpretcast, jasonliu, mstorsjo, zturner, rnk
Reviewed By: sfertile, hubert.reinterpretcast
Subscribers: jsji, mgorny, hiraditya, jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59419
Patch by Digger Lin
llvm-svn: 357663
2019-04-04 02:53:21 +02:00
|
|
|
break;
|
|
|
|
|
2021-07-20 16:50:18 +02:00
|
|
|
case 0x03:
|
|
|
|
if (startswith(Magic, "\x03\xF0\x00"))
|
|
|
|
return file_magic::goff_object;
|
|
|
|
break;
|
|
|
|
|
2017-06-07 05:48:56 +02:00
|
|
|
case 0xDE: // 0x0B17C0DE = BC wraper
|
|
|
|
if (startswith(Magic, "\xDE\xC0\x17\x0B"))
|
|
|
|
return file_magic::bitcode;
|
|
|
|
break;
|
|
|
|
case 'B':
|
|
|
|
if (startswith(Magic, "BC\xC0\xDE"))
|
|
|
|
return file_magic::bitcode;
|
|
|
|
break;
|
|
|
|
case '!':
|
|
|
|
if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
|
|
|
|
return file_magic::archive;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\177':
|
|
|
|
if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
|
|
|
|
bool Data2MSB = Magic[5] == 2;
|
|
|
|
unsigned high = Data2MSB ? 16 : 17;
|
|
|
|
unsigned low = Data2MSB ? 17 : 16;
|
|
|
|
if (Magic[high] == 0) {
|
|
|
|
switch (Magic[low]) {
|
|
|
|
default:
|
|
|
|
return file_magic::elf;
|
|
|
|
case 1:
|
|
|
|
return file_magic::elf_relocatable;
|
|
|
|
case 2:
|
|
|
|
return file_magic::elf_executable;
|
|
|
|
case 3:
|
|
|
|
return file_magic::elf_shared_object;
|
|
|
|
case 4:
|
|
|
|
return file_magic::elf_core;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// It's still some type of ELF file.
|
|
|
|
return file_magic::elf;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 0xCA:
|
|
|
|
if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
|
|
|
|
startswith(Magic, "\xCA\xFE\xBA\xBF")) {
|
|
|
|
// This is complicated by an overlap with Java class files.
|
|
|
|
// See the Mach-O section in /usr/share/file/magic for details.
|
|
|
|
if (Magic.size() >= 8 && Magic[7] < 43)
|
|
|
|
return file_magic::macho_universal_binary;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
// The two magic numbers for mach-o are:
|
|
|
|
// 0xfeedface - 32-bit mach-o
|
|
|
|
// 0xfeedfacf - 64-bit mach-o
|
|
|
|
case 0xFE:
|
|
|
|
case 0xCE:
|
|
|
|
case 0xCF: {
|
|
|
|
uint16_t type = 0;
|
|
|
|
if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
|
|
|
|
startswith(Magic, "\xFE\xED\xFA\xCF")) {
|
|
|
|
/* Native endian */
|
|
|
|
size_t MinSize;
|
|
|
|
if (Magic[3] == char(0xCE))
|
|
|
|
MinSize = sizeof(MachO::mach_header);
|
|
|
|
else
|
|
|
|
MinSize = sizeof(MachO::mach_header_64);
|
|
|
|
if (Magic.size() >= MinSize)
|
|
|
|
type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
|
|
|
|
} else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
|
|
|
|
startswith(Magic, "\xCF\xFA\xED\xFE")) {
|
|
|
|
/* Reverse endian */
|
|
|
|
size_t MinSize;
|
|
|
|
if (Magic[0] == char(0xCE))
|
|
|
|
MinSize = sizeof(MachO::mach_header);
|
|
|
|
else
|
|
|
|
MinSize = sizeof(MachO::mach_header_64);
|
|
|
|
if (Magic.size() >= MinSize)
|
|
|
|
type = Magic[15] << 24 | Magic[14] << 12 | Magic[13] << 8 | Magic[12];
|
|
|
|
}
|
|
|
|
switch (type) {
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
case 1:
|
|
|
|
return file_magic::macho_object;
|
|
|
|
case 2:
|
|
|
|
return file_magic::macho_executable;
|
|
|
|
case 3:
|
|
|
|
return file_magic::macho_fixed_virtual_memory_shared_lib;
|
|
|
|
case 4:
|
|
|
|
return file_magic::macho_core;
|
|
|
|
case 5:
|
|
|
|
return file_magic::macho_preload_executable;
|
|
|
|
case 6:
|
|
|
|
return file_magic::macho_dynamically_linked_shared_lib;
|
|
|
|
case 7:
|
|
|
|
return file_magic::macho_dynamic_linker;
|
|
|
|
case 8:
|
|
|
|
return file_magic::macho_bundle;
|
|
|
|
case 9:
|
|
|
|
return file_magic::macho_dynamically_linked_shared_lib_stub;
|
|
|
|
case 10:
|
|
|
|
return file_magic::macho_dsym_companion;
|
|
|
|
case 11:
|
|
|
|
return file_magic::macho_kext_bundle;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case 0xF0: // PowerPC Windows
|
|
|
|
case 0x83: // Alpha 32-bit
|
|
|
|
case 0x84: // Alpha 64-bit
|
|
|
|
case 0x66: // MPS R4000 Windows
|
|
|
|
case 0x50: // mc68K
|
|
|
|
case 0x4c: // 80386 Windows
|
|
|
|
case 0xc4: // ARMNT Windows
|
|
|
|
if (Magic[1] == 0x01)
|
|
|
|
return file_magic::coff_object;
|
|
|
|
LLVM_FALLTHROUGH;
|
|
|
|
|
|
|
|
case 0x90: // PA-RISC Windows
|
|
|
|
case 0x68: // mc68K Windows
|
|
|
|
if (Magic[1] == 0x02)
|
|
|
|
return file_magic::coff_object;
|
|
|
|
break;
|
|
|
|
|
[Object] Add basic minidump support
Summary:
This patch adds basic support for reading minidump files. It contains
the definitions of various important minidump data structures (header,
stream directory), and of one minidump stream (SystemInfo). The ability
to read other streams will be added in follow-up patches. However, all
streams can be read even now as raw data, which means lldb's minidump
support (where this code is taken from) can be immediately rebased on
top of this patch as soon as it lands.
As we don't have any support for generating minidump files (yet), this
tests the code via unit tests with some small handcrafted binaries in
the form of c char arrays.
Reviewers: Bigcheese, jhenderson, zturner
Subscribers: srhines, dschuff, mgorny, fedor.sergeev, lemo, clayborg, JDevlieghere, aprantl, lldb-commits, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59291
llvm-svn: 356652
2019-03-21 10:18:59 +01:00
|
|
|
case 'M': // Possible MS-DOS stub on Windows PE file, MSF/PDB file or a
|
|
|
|
// Minidump file.
|
2017-08-31 14:50:42 +02:00
|
|
|
if (startswith(Magic, "MZ") && Magic.size() >= 0x3c + 4) {
|
2017-06-07 05:48:56 +02:00
|
|
|
uint32_t off = read32le(Magic.data() + 0x3c);
|
|
|
|
// PE/COFF file, either EXE or DLL.
|
2017-10-19 03:32:18 +02:00
|
|
|
if (Magic.substr(off).startswith(
|
|
|
|
StringRef(COFF::PEMagic, sizeof(COFF::PEMagic))))
|
2017-06-07 05:48:56 +02:00
|
|
|
return file_magic::pecoff_executable;
|
|
|
|
}
|
2018-03-07 19:40:41 +01:00
|
|
|
if (Magic.startswith("Microsoft C/C++ MSF 7.00\r\n"))
|
|
|
|
return file_magic::pdb;
|
[Object] Add basic minidump support
Summary:
This patch adds basic support for reading minidump files. It contains
the definitions of various important minidump data structures (header,
stream directory), and of one minidump stream (SystemInfo). The ability
to read other streams will be added in follow-up patches. However, all
streams can be read even now as raw data, which means lldb's minidump
support (where this code is taken from) can be immediately rebased on
top of this patch as soon as it lands.
As we don't have any support for generating minidump files (yet), this
tests the code via unit tests with some small handcrafted binaries in
the form of c char arrays.
Reviewers: Bigcheese, jhenderson, zturner
Subscribers: srhines, dschuff, mgorny, fedor.sergeev, lemo, clayborg, JDevlieghere, aprantl, lldb-commits, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59291
llvm-svn: 356652
2019-03-21 10:18:59 +01:00
|
|
|
if (startswith(Magic, "MDMP"))
|
|
|
|
return file_magic::minidump;
|
2017-06-07 05:48:56 +02:00
|
|
|
break;
|
|
|
|
|
2017-06-29 08:30:56 +02:00
|
|
|
case 0x64: // x86-64 or ARM64 Windows.
|
|
|
|
if (Magic[1] == char(0x86) || Magic[1] == char(0xaa))
|
2017-06-07 05:48:56 +02:00
|
|
|
return file_magic::coff_object;
|
|
|
|
break;
|
|
|
|
|
[BinaryFormat] Teach identify_magic about Tapi files.
Summary:
Tapi files are YAML files that start with the !tapi tag. The only execption are
TBD v1 files, which don't have a tag. In that case we have to scan a little
further and check if the first key "archs" exists.
This is the first patch in a series of patches to add libObject support for
text-based dynamic library (.tbd) files.
This patch is practically exactly the same as D37820, that was never pushed to master,
and is needed for future commits related to reading tbd files for llvm-nm
Reviewers: ributzka, steven_wu, bollu, espindola, jfb, shafik, jdoerfert
Reviewed By: steven_wu
Subscribers: dexonsmith, llvm-commits
Tags: #llvm, #clang, #sanitizers, #lldb, #libc, #openmp
Differential Revision: https://reviews.llvm.org/D66149
llvm-svn: 369579
2019-08-21 23:00:16 +02:00
|
|
|
case 0x2d: // YAML '-'
|
|
|
|
if (startswith(Magic, "--- !tapi") || startswith(Magic, "---\narchs:"))
|
|
|
|
return file_magic::tapi_file;
|
|
|
|
break;
|
|
|
|
|
2017-06-07 05:48:56 +02:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return file_magic::unknown;
|
|
|
|
}
|
|
|
|
|
|
|
|
std::error_code llvm::identify_magic(const Twine &Path, file_magic &Result) {
|
[NFC] Reordering parameters in getFile and getFileOrSTDIN
In future patches I will be setting the IsText parameter frequently so I will refactor the args to be in the following order. I have removed the FileSize parameter because it is never used.
```
static ErrorOr<std::unique_ptr<MemoryBuffer>>
getFile(const Twine &Filename, bool IsText = false,
bool RequiresNullTerminator = true, bool IsVolatile = false);
static ErrorOr<std::unique_ptr<MemoryBuffer>>
getFileOrSTDIN(const Twine &Filename, bool IsText = false,
bool RequiresNullTerminator = true);
static ErrorOr<std::unique_ptr<MB>>
getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset,
bool IsText, bool RequiresNullTerminator, bool IsVolatile);
static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
getFile(const Twine &Filename, bool IsVolatile = false);
```
Reviewed By: jhenderson
Differential Revision: https://reviews.llvm.org/D99182
2021-03-25 14:47:25 +01:00
|
|
|
auto FileOrError = MemoryBuffer::getFile(Path, /*IsText=*/false,
|
|
|
|
/*RequiresNullTerminator=*/false);
|
2018-03-08 20:45:20 +01:00
|
|
|
if (!FileOrError)
|
|
|
|
return FileOrError.getError();
|
2017-06-07 05:48:56 +02:00
|
|
|
|
2018-03-08 20:45:20 +01:00
|
|
|
std::unique_ptr<MemoryBuffer> FileBuffer = std::move(*FileOrError);
|
|
|
|
Result = identify_magic(FileBuffer->getBuffer());
|
2017-06-07 05:48:56 +02:00
|
|
|
|
|
|
|
return std::error_code();
|
|
|
|
}
|