32 using namespace object;
37 : Obj(Obj), Dis(Dis), MIA(MIA), MOS(0) {}
47 if (Name ==
"main" || Name ==
"_main") {
49 SI->getAddress(Entrypoint);
86 buildSectionAtoms(Module);
100 bool isText; SI->isText(isText);
101 bool isData; SI->isData(isData);
102 if (!isData && !isText)
105 uint64_t StartAddr; SI->getAddress(StartAddr);
106 uint64_t SecSize; SI->getSize(SecSize);
111 StringRef Contents; SI->getContents(Contents);
115 if (Contents.
size() != SecSize || !SecSize)
117 uint64_t EndAddr = StartAddr + SecSize - 1;
126 for (uint64_t Index = 0; Index < SecSize; Index += InstSize) {
127 const uint64_t CurAddr = StartAddr + Index;
138 assert(InstSize &&
"getInstruction() consumed no bytes");
141 InvalidData = Module->
createDataAtom(CurAddr, CurAddr+InstSize - 1);
143 for (uint64_t
I = 0;
I < InstSize; ++
I)
144 InvalidData->
addData(Contents[Index+
I]);
150 for (uint64_t Index = 0; Index < SecSize; ++Index)
151 Data->
addData(Contents[Index]);
167 BBInfo() : Atom(0), BB(0) {}
169 void addSucc(BBInfo &Succ) {
171 Succ.Preds.insert(
this);
177 std::sort(V.begin(), V.end());
178 V.erase(std::unique(V.begin(), V.end()), V.end());
181 void MCObjectDisassembler::buildCFG(
MCModule *Module) {
182 typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
183 BBInfoByAddrTy BBInfos;
193 SI->getType(SymType);
196 SI->getAddress(SymAddr);
198 Calls.push_back(SymAddr);
199 Splits.push_back(SymAddr);
204 &&
"Module already has a CFG!");
217 Splits.push_back(II->Address + II->Size);
221 Calls.push_back(Target);
222 Splits.push_back(Target);
231 for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end();
256 CurBB.addSucc(BBInfos[Target]);
265 for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end();
267 BBInfo &BBI = BBInfos[*CI];
268 if (!BBI.Atom)
continue;
275 for (
size_t wi = 0; wi < Worklist.
size(); ++wi) {
276 BBInfo *BBI = Worklist[wi];
281 for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
284 for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
290 for (
size_t wi = 0; wi < Worklist.
size(); ++wi) {
291 BBInfo *BBI = Worklist[wi];
295 for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end();
299 for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end();
323 uint64_t BBBeginAddr,
324 AddressSetTy &CallTargets,
325 AddressSetTy &TailCallTargets) {
326 typedef std::map<uint64_t, BBInfo> BBInfoByAddrTy;
328 BBInfoByAddrTy BBInfos;
329 AddrWorklistTy Worklist;
331 Worklist.
insert(BBBeginAddr);
332 for (
size_t wi = 0; wi < Worklist.size(); ++wi) {
333 const uint64_t BeginAddr = Worklist[wi];
334 BBInfo *BBI = &BBInfos[BeginAddr];
337 assert(!TA &&
"Discovered basic block already has an associated atom!");
342 TA = cast<MCTextAtom>(
A);
350 BBInfoByAddrTy::iterator It = BBInfos.find(TA->
getBeginAddr());
351 if (It != BBInfos.end() && It->second.Atom) {
352 BBI->SuccAddrs = It->second.SuccAddrs;
353 It->second.SuccAddrs.clear();
354 It->second.SuccAddrs.push_back(BeginAddr);
373 EndAddr = std::min(EndAddr, NextAtom->getBeginAddr());
375 for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) {
387 uint64_t BranchTarget;
390 CallTargets.push_back(BranchTarget);
399 assert(TA &&
"Couldn't disassemble atom, none was created!");
400 assert(TA->
begin() != TA->
end() &&
"Empty atom!");
403 assert(Region &&
"Couldn't find region for already disassembled code!");
404 uint64_t EndRegion = Region->getBase() + Region->getExtent();
411 BBI->SuccAddrs.push_back(TA->
getEndAddr() + 1);
417 uint64_t BranchTarget;
424 if (!ExtFnName.
empty()) {
425 TailCallTargets.push_back(BranchTarget);
426 CallTargets.push_back(BranchTarget);
428 BBI->SuccAddrs.push_back(BranchTarget);
429 Worklist.insert(BranchTarget);
435 for (
size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
436 const uint64_t BeginAddr = Worklist[wi];
437 BBInfo *BBI = &BBInfos[BeginAddr];
439 assert(BBI->Atom &&
"Found a basic block without an associated atom!");
442 BBI->BB = MCFN->
find(BeginAddr);
451 for (
size_t wi = 0, we = Worklist.size(); wi != we; ++wi) {
452 const uint64_t BeginAddr = Worklist[wi];
453 BBInfo *BBI = &BBInfos[BeginAddr];
457 for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(),
458 SE = BBI->SuccAddrs.end();
466 assert(BBInfos[Worklist[0]].BB &&
467 "No basic block created at requested address?");
469 return BBInfos[Worklist[0]].BB;
480 if (!ExtFnName.
empty())
490 if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr)
496 getBBAt(Module, MCFN, BeginAddr, CallTargets, TailCallTargets);
505 uint64_t HeaderLoadAddress)
507 VMAddrSlide(VMAddrSlide), HeaderLoadAddress(HeaderLoadAddress) {
517 if (Name ==
"__mod_init_func") {
518 DEBUG(
dbgs() <<
"Found __mod_init_func section!\n");
519 SI->getContents(ModInitContents);
520 }
else if (Name ==
"__mod_exit_func") {
521 DEBUG(
dbgs() <<
"Found __mod_exit_func section!\n");
522 SI->getContents(ModExitContents);
529 return Addr + VMAddrSlide;
534 return EffectiveAddr - VMAddrSlide;
538 uint64_t EntryFileOffset = 0;
544 for (
unsigned I = 0;; ++
I) {
545 if (Load.
C.
cmd == MachO::LC_MAIN) {
551 if (
I == LoadCommandCount - 1)
563 return EntryFileOffset + HeaderLoadAddress;
570 size_t EntrySize = 8;
571 size_t EntryCount = ModInitContents.
size() / EntrySize;
573 reinterpret_cast<const uint64_t *
>(ModInitContents.
data()), EntryCount);
580 size_t EntrySize = 8;
581 size_t EntryCount = ModExitContents.
size() / EntrySize;
583 reinterpret_cast<const uint64_t *
>(ModExitContents.
data()), EntryCount);
MCModule * buildEmptyModule()
FunctionListTy::iterator func_iterator
AtomListTy::iterator atom_iterator
virtual uint64_t getExtent() const =0
size_t size() const
size - Get the string size.
const object::ObjectFile & Obj
const MCAtom * findFirstAtomAfter(uint64_t Addr) const
The main container class for the LLVM Intermediate Representation.
MCBasicBlock & createBlock(const MCTextAtom &Insts)
Create an MCBasicBlock backed by Insts and add it to this function.
An atom consisting of disassembled instructions.
enable_if_c<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
StringRef getName() const
MCModule * buildModule(bool withCFG=false)
Build an MCModule, creating atoms and optionally functions.
MCMachOObjectDisassembler(const object::MachOObjectFile &MOOF, const MCDisassembler &Dis, const MCInstrAnalysis &MIA, uint64_t VMAddrSlide, uint64_t HeaderLoadAddress)
Construct a Mach-O specific object disassembler.
StringRef substr(size_t Start, size_t N=npos) const
virtual bool isBranch(const MCInst &Inst) const
virtual uint64_t getEffectiveLoadAddr(uint64_t Addr)
size_type size() const
Determine the number of elements in the SetVector.
void addPredecessor(const MCBasicBlock *MCBB)
LoopInfoBase< BlockT, LoopT > * LI
const_iterator end() const
virtual section_iterator end_sections() const
void addInst(const MCInst &Inst, uint64_t Size)
Append an instruction, expanding the atom if necessary.
static std::string utohexstr(uint64_t X)
void addSuccessor(const MCBasicBlock *MCBB)
LoadCommandInfo getNextLoadCommandInfo(const LoadCommandInfo &L) const
const MCAtom * findAtomContaining(uint64_t Addr) const
An atom consising of a sequence of bytes.
An entry in an MCTextAtom: a disassembled instruction. NOTE: Both the Address and Size field are actu...
#define llvm_unreachable(msg)
virtual section_iterator end_sections() const =0
virtual section_iterator begin_sections() const
const MCInstrAnalysis & MIA
MachO::mach_header getHeader() const
bool insert(const value_type &X)
Insert a new element into the SetVector.
const MCDecodedInst & back() const
static void RemoveDupsFromAddressVector(MCObjectDisassembler::AddressSetTy &V)
const char * data() const
static std::string utostr(uint64_t X, bool isNeg=false)
MCFunction * createFunction(StringRef Name)
Create a new MCFunction.
MCDataAtom * createDataAtom(uint64_t Begin, uint64_t End)
virtual section_iterator begin_sections() const =0
Represents a function in machine code, containing MCBasicBlocks. MCFunctions are created by MCModule...
uint64_t getBeginAddr() const
Get the start address of the atom.
virtual uint64_t getBase() const =0
MCFunction * createFunction(MCModule *Module, uint64_t BeginAddr, AddressSetTy &CallTargets, AddressSetTy &TailCallTargets)
virtual ArrayRef< uint64_t > getStaticInitFunctions()
MCTextAtom * createTextAtom(uint64_t Begin, uint64_t End)
uint64_t getEffectiveLoadAddr(uint64_t Addr) LLVM_OVERRIDE
A single entry single exit Region.
const_atom_iterator atom_end() const
virtual ArrayRef< uint64_t > getStaticExitFunctions()
Disassemble an ObjectFile to an MCModule and MCFunctions. This class builds on MCDisassembler to disa...
virtual bool isCall(const MCInst &Inst) const
virtual symbol_iterator end_symbols() const =0
virtual DecodeStatus getInstruction(MCInst &instr, uint64_t &size, const MemoryObject ®ion, uint64_t address, raw_ostream &vStream, raw_ostream &cStream) const =0
virtual bool evaluateBranch(const MCInst &Inst, uint64_t Addr, uint64_t Size, uint64_t &Target) const
const_func_iterator func_begin() const
virtual bool isTerminator(const MCInst &Inst) const
const_func_iterator func_end() const
content_iterator & increment(error_code &err)
size_t find_last_of(char C, size_t From=npos) const
A SetVector that performs no allocations if smaller than a certain size.
uint64_t getEntrypoint() LLVM_OVERRIDE
Get the effective address of the entrypoint, or 0 if there is none.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
virtual bool isConditionalBranch(const MCInst &Inst) const
void setName(StringRef NewName)
const MCDisassembler & Dis
Basic block containing a sequence of disassembled instructions. The basic block is backed by an MCTex...
const uint64_t UnknownAddressOrSize
raw_ostream & dbgs()
dbgs - Return a circular-buffered debug stream.
LoadCommandInfo getFirstLoadCommandInfo() const
ArrayRef< uint64_t > getStaticExitFunctions() LLVM_OVERRIDE
Represents a contiguous range of either instructions (a TextAtom) or data (a DataAtom). Address ranges are expressed as closed intervals.
InstListTy::const_iterator const_iterator
StringRefMemoryObject - Simple StringRef-backed MemoryObject.
virtual uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr)
Compute the original load address, as specified in the objectfile. This is the inverse of getEffectiv...
OwningPtr< MemoryObject > FallbackRegion
The fallback memory region, outside the object file.
ArrayRef< uint64_t > getStaticInitFunctions() LLVM_OVERRIDE
uint64_t getEndAddr() const
Get the end address, i.e. the last one inside the atom.
MCTextAtom * split(uint64_t SplitPt) LLVM_OVERRIDE
Splits the atom in two at a given address.
A completely disassembled object file or executable. It comprises a list of MCAtom's, each representing a contiguous range of either instructions or data. An MCModule is created using MCObjectDisassembler::buildModule.
std::vector< uint64_t > AddressSetTy
virtual symbol_iterator begin_symbols() const =0
MemoryObject * getRegionFor(uint64_t Addr)
Return a memory region suitable for reading starting at Addr. In most cases, this returns a StringRef...
raw_ostream & nulls()
nulls() - This returns a reference to a raw_ostream which discards output.
const_iterator begin() const
const MCBasicBlock * find(uint64_t StartAddr) const
Find the basic block, if any, that starts at StartAddr.
MCObjectDisassembler(const object::ObjectFile &Obj, const MCDisassembler &Dis, const MCInstrAnalysis &MIA)
void addData(const MCData &D)
Append a data entry, expanding the atom if necessary.
virtual uint64_t getEntrypoint()
Get the effective address of the entrypoint, or 0 if there is none.
const_atom_iterator atom_begin() const
virtual StringRef findExternalFunctionAt(uint64_t Addr)
Look for an external function symbol at Addr. (References through the ELF PLT, Mach-O stubs...
uint64_t getOriginalLoadAddr(uint64_t EffectiveAddr) LLVM_OVERRIDE
Compute the original load address, as specified in the objectfile. This is the inverse of getEffectiv...
bool empty() const
empty - Check if the string is empty.