LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
MCObjectDisassembler.h
Go to the documentation of this file.
1 //===-- llvm/MC/MCObjectDisassembler.h --------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the declaration of the MCObjectDisassembler class, which
11 // can be used to construct an MCModule and an MC CFG from an ObjectFile.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_MC_MCOBJECTDISASSEMBLER_H
16 #define LLVM_MC_MCOBJECTDISASSEMBLER_H
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/OwningPtr.h"
20 #include "llvm/ADT/StringRef.h"
21 #include "llvm/Support/DataTypes.h"
23 #include <vector>
24 
25 namespace llvm {
26 
27 namespace object {
28  class ObjectFile;
29  class MachOObjectFile;
30 }
31 
32 class MCBasicBlock;
33 class MCDisassembler;
34 class MCFunction;
35 class MCInstrAnalysis;
36 class MCModule;
37 class MCObjectSymbolizer;
38 
39 /// \brief Disassemble an ObjectFile to an MCModule and MCFunctions.
40 /// This class builds on MCDisassembler to disassemble whole sections, creating
41 /// MCAtom (MCTextAtom for disassembled sections and MCDataAtom for raw data).
42 /// It can also be used to create a control flow graph consisting of MCFunctions
43 /// and MCBasicBlocks.
45 public:
47  const MCDisassembler &Dis,
48  const MCInstrAnalysis &MIA);
49  virtual ~MCObjectDisassembler() {}
50 
51  /// \brief Build an MCModule, creating atoms and optionally functions.
52  /// \param withCFG Also build a CFG by adding MCFunctions to the Module.
53  /// If withCFG is false, the MCModule built only contains atoms, representing
54  /// what was found in the object file. If withCFG is true, MCFunctions are
55  /// created, containing MCBasicBlocks. All text atoms are split to form basic
56  /// block atoms, which then each back an MCBasicBlock.
57  MCModule *buildModule(bool withCFG = false);
58 
60 
61  typedef std::vector<uint64_t> AddressSetTy;
62  /// \name Create a new MCFunction.
63  MCFunction *createFunction(MCModule *Module, uint64_t BeginAddr,
64  AddressSetTy &CallTargets,
65  AddressSetTy &TailCallTargets);
66 
67  /// \brief Set the region on which to fallback if disassembly was requested
68  /// somewhere not accessible in the object file.
69  /// This is used for dynamic disassembly (see RawMemoryObject).
71  FallbackRegion.reset(Region.take());
72  }
73 
74  /// \brief Set the symbolizer to use to get information on external functions.
75  /// Note that this isn't used to do instruction-level symbolization (that is,
76  /// plugged into MCDisassembler), but to symbolize function call targets.
77  void setSymbolizer(MCObjectSymbolizer *ObjectSymbolizer) {
78  MOS = ObjectSymbolizer;
79  }
80 
81  /// \brief Get the effective address of the entrypoint, or 0 if there is none.
82  virtual uint64_t getEntrypoint();
83 
84  /// \name Get the addresses of static constructors/destructors in the object.
85  /// The caller is expected to know how to interpret the addresses;
86  /// for example, Mach-O init functions expect 5 arguments, not for ELF.
87  /// The addresses are original object file load addresses, not effective.
88  /// @{
91  /// @}
92 
93  /// \name Translation between effective and objectfile load address.
94  /// @{
95  /// \brief Compute the effective load address, from an objectfile virtual
96  /// address. This is implemented in a format-specific way, to take into
97  /// account things like PIE/ASLR when doing dynamic disassembly.
98  /// For example, on Mach-O this would be done by adding the VM addr slide,
99  /// on glibc ELF by keeping a map between segment load addresses, filled
100  /// using dl_iterate_phdr, etc..
101  /// In most static situations and in the default impl., this returns \p Addr.
102  virtual uint64_t getEffectiveLoadAddr(uint64_t Addr);
103 
104  /// \brief Compute the original load address, as specified in the objectfile.
105  /// This is the inverse of getEffectiveLoadAddr.
106  virtual uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr);
107  /// @}
108 
109 protected:
114 
115  /// \brief The fallback memory region, outside the object file.
117 
118  /// \brief Return a memory region suitable for reading starting at \p Addr.
119  /// In most cases, this returns a StringRefMemoryObject backed by the
120  /// containing section. When no section was found, this returns the
121  /// FallbackRegion, if it is suitable.
122  /// If it is not, or if there is no fallback region, this returns 0.
123  MemoryObject *getRegionFor(uint64_t Addr);
124 
125 private:
126  /// \brief Fill \p Module by creating an atom for each section.
127  /// This could be made much smarter, using information like symbols, but also
128  /// format-specific features, like mach-o function_start or data_in_code LCs.
129  void buildSectionAtoms(MCModule *Module);
130 
131  /// \brief Enrich \p Module with a CFG consisting of MCFunctions.
132  /// \param Module An MCModule returned by buildModule, with no CFG.
133  /// NOTE: Each MCBasicBlock in a MCFunction is backed by a single MCTextAtom.
134  /// When the CFG is built, contiguous instructions that were previously in a
135  /// single MCTextAtom will be split in multiple basic block atoms.
136  void buildCFG(MCModule *Module);
137 
138  MCBasicBlock *getBBAt(MCModule *Module, MCFunction *MCFN, uint64_t BeginAddr,
139  AddressSetTy &CallTargets,
140  AddressSetTy &TailCallTargets);
141 };
142 
144  const object::MachOObjectFile &MOOF;
145 
146  uint64_t VMAddrSlide;
147  uint64_t HeaderLoadAddress;
148 
149  // __DATA;__mod_init_func support.
150  llvm::StringRef ModInitContents;
151  // __DATA;__mod_exit_func support.
152  llvm::StringRef ModExitContents;
153 
154 public:
155  /// \brief Construct a Mach-O specific object disassembler.
156  /// \param VMAddrSlide The virtual address slide applied by dyld.
157  /// \param HeaderLoadAddress The load address of the mach_header for this
158  /// object.
160  const MCDisassembler &Dis,
161  const MCInstrAnalysis &MIA, uint64_t VMAddrSlide,
162  uint64_t HeaderLoadAddress);
163 
164 protected:
165  uint64_t getEffectiveLoadAddr(uint64_t Addr) LLVM_OVERRIDE;
166  uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr) LLVM_OVERRIDE;
167  uint64_t getEntrypoint() LLVM_OVERRIDE;
168 
171 };
172 
173 }
174 
175 #endif
const object::ObjectFile & Obj
The main container class for the LLVM Intermediate Representation.
Definition: Module.h:112
MCModule * buildModule(bool withCFG=false)
Build an MCModule, creating atoms and optionally functions.
MCMachOObjectDisassembler(const object::MachOObjectFile &MOOF, const MCDisassembler &Dis, const MCInstrAnalysis &MIA, uint64_t VMAddrSlide, uint64_t HeaderLoadAddress)
Construct a Mach-O specific object disassembler.
virtual uint64_t getEffectiveLoadAddr(uint64_t Addr)
const MCInstrAnalysis & MIA
void setSymbolizer(MCObjectSymbolizer *ObjectSymbolizer)
Set the symbolizer to use to get information on external functions. Note that this isn't used to do i...
Represents a function in machine code, containing MCBasicBlocks. MCFunctions are created by MCModule...
Definition: MCFunction.h:85
MCFunction * createFunction(MCModule *Module, uint64_t BeginAddr, AddressSetTy &CallTargets, AddressSetTy &TailCallTargets)
virtual ArrayRef< uint64_t > getStaticInitFunctions()
uint64_t getEffectiveLoadAddr(uint64_t Addr) LLVM_OVERRIDE
A single entry single exit Region.
Definition: RegionInfo.h:202
virtual ArrayRef< uint64_t > getStaticExitFunctions()
Disassemble an ObjectFile to an MCModule and MCFunctions. This class builds on MCDisassembler to disa...
uint64_t getEntrypoint() LLVM_OVERRIDE
Get the effective address of the entrypoint, or 0 if there is none.
An ObjectFile-backed symbolizer.
const MCDisassembler & Dis
Basic block containing a sequence of disassembled instructions. The basic block is backed by an MCTex...
Definition: MCFunction.h:33
ArrayRef< uint64_t > getStaticExitFunctions() LLVM_OVERRIDE
virtual uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr)
Compute the original load address, as specified in the objectfile. This is the inverse of getEffectiv...
OwningPtr< MemoryObject > FallbackRegion
The fallback memory region, outside the object file.
ArrayRef< uint64_t > getStaticInitFunctions() LLVM_OVERRIDE
A completely disassembled object file or executable. It comprises a list of MCAtom's, each representing a contiguous range of either instructions or data. An MCModule is created using MCObjectDisassembler::buildModule.
Definition: MCModule.h:36
std::vector< uint64_t > AddressSetTy
MemoryObject * getRegionFor(uint64_t Addr)
Return a memory region suitable for reading starting at Addr. In most cases, this returns a StringRef...
void setFallbackRegion(OwningPtr< MemoryObject > &Region)
Set the region on which to fallback if disassembly was requested somewhere not accessible in the obje...
MCObjectDisassembler(const object::ObjectFile &Obj, const MCDisassembler &Dis, const MCInstrAnalysis &MIA)
virtual uint64_t getEntrypoint()
Get the effective address of the entrypoint, or 0 if there is none.
uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr) LLVM_OVERRIDE
Compute the original load address, as specified in the objectfile. This is the inverse of getEffectiv...
#define LLVM_OVERRIDE
Definition: Compiler.h:155