LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
BitstreamReader.h
Go to the documentation of this file.
1 //===- BitstreamReader.h - Low-level bitstream reader interface -*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This header defines the BitstreamReader class. This class can be used to
11 // read an arbitrary bitstream, regardless of its contents.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_BITCODE_BITSTREAMREADER_H
16 #define LLVM_BITCODE_BITSTREAMREADER_H
17 
18 #include "llvm/ADT/OwningPtr.h"
19 #include "llvm/Bitcode/BitCodes.h"
20 #include "llvm/Support/Endian.h"
22 #include <climits>
23 #include <string>
24 #include <vector>
25 
26 namespace llvm {
27 
28  class Deserializer;
29 
30 /// BitstreamReader - This class is used to read from an LLVM bitcode stream,
31 /// maintaining information that is global to decoding the entire file. While
32 /// a file is being read, multiple cursors can be independently advanced or
33 /// skipped around within the file. These are represented by the
34 /// BitstreamCursor class.
36 public:
37  /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
38  /// These describe abbreviations that all blocks of the specified ID inherit.
39  struct BlockInfo {
40  unsigned BlockID;
41  std::vector<BitCodeAbbrev*> Abbrevs;
42  std::string Name;
43 
44  std::vector<std::pair<unsigned, std::string> > RecordNames;
45  };
46 private:
48 
49  std::vector<BlockInfo> BlockInfoRecords;
50 
51  /// IgnoreBlockInfoNames - This is set to true if we don't care about the
52  /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer
53  /// uses this.
54  bool IgnoreBlockInfoNames;
55 
57  void operator=(const BitstreamReader&) LLVM_DELETED_FUNCTION;
58 public:
59  BitstreamReader() : IgnoreBlockInfoNames(true) {
60  }
61 
62  BitstreamReader(const unsigned char *Start, const unsigned char *End) {
63  IgnoreBlockInfoNames = true;
64  init(Start, End);
65  }
66 
68  BitcodeBytes.reset(bytes);
69  }
70 
71  void init(const unsigned char *Start, const unsigned char *End) {
72  assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
73  BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
74  }
75 
76  StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
77 
79  // Free the BlockInfoRecords.
80  while (!BlockInfoRecords.empty()) {
81  BlockInfo &Info = BlockInfoRecords.back();
82  // Free blockinfo abbrev info.
83  for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
84  i != e; ++i)
85  Info.Abbrevs[i]->dropRef();
86  BlockInfoRecords.pop_back();
87  }
88  }
89 
90  /// CollectBlockInfoNames - This is called by clients that want block/record
91  /// name information.
92  void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
93  bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
94 
95  //===--------------------------------------------------------------------===//
96  // Block Manipulation
97  //===--------------------------------------------------------------------===//
98 
99  /// hasBlockInfoRecords - Return true if we've already read and processed the
100  /// block info block for this Bitstream. We only process it for the first
101  /// cursor that walks over it.
102  bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
103 
104  /// getBlockInfo - If there is block info for the specified ID, return it,
105  /// otherwise return null.
106  const BlockInfo *getBlockInfo(unsigned BlockID) const {
107  // Common case, the most recent entry matches BlockID.
108  if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
109  return &BlockInfoRecords.back();
110 
111  for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
112  i != e; ++i)
113  if (BlockInfoRecords[i].BlockID == BlockID)
114  return &BlockInfoRecords[i];
115  return 0;
116  }
117 
118  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
119  if (const BlockInfo *BI = getBlockInfo(BlockID))
120  return *const_cast<BlockInfo*>(BI);
121 
122  // Otherwise, add a new record.
123  BlockInfoRecords.push_back(BlockInfo());
124  BlockInfoRecords.back().BlockID = BlockID;
125  return BlockInfoRecords.back();
126  }
127 };
128 
129 
130 /// BitstreamEntry - When advancing through a bitstream cursor, each advance can
131 /// discover a few different kinds of entries:
132 /// Error - Malformed bitcode was found.
133 /// EndBlock - We've reached the end of the current block, (or the end of the
134 /// file, which is treated like a series of EndBlock records.
135 /// SubBlock - This is the start of a new subblock of a specific ID.
136 /// Record - This is a record with a specific AbbrevID.
137 ///
139  enum {
144  } Kind;
145 
146  unsigned ID;
147 
149  BitstreamEntry E; E.Kind = Error; return E;
150  }
152  BitstreamEntry E; E.Kind = EndBlock; return E;
153  }
154  static BitstreamEntry getSubBlock(unsigned ID) {
155  BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
156  }
157  static BitstreamEntry getRecord(unsigned AbbrevID) {
158  BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
159  }
160 };
161 
162 /// BitstreamCursor - This represents a position within a bitcode file. There
163 /// may be multiple independent cursors reading within one bitstream, each
164 /// maintaining their own local state.
165 ///
166 /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not
167 /// be passed by value.
169  friend class Deserializer;
170  BitstreamReader *BitStream;
171  size_t NextChar;
172 
173 
174  /// CurWord/word_t - This is the current data we have pulled from the stream
175  /// but have not returned to the client. This is specifically and
176  /// intentionally defined to follow the word size of the host machine for
177  /// efficiency. We use word_t in places that are aware of this to make it
178  /// perfectly explicit what is going on.
179  typedef uint32_t word_t;
180  word_t CurWord;
181 
182  /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
183  /// is always from [0...31/63] inclusive (depending on word size).
184  unsigned BitsInCurWord;
185 
186  // CurCodeSize - This is the declared size of code values used for the current
187  // block, in bits.
188  unsigned CurCodeSize;
189 
190  /// CurAbbrevs - Abbrevs installed at in this block.
191  std::vector<BitCodeAbbrev*> CurAbbrevs;
192 
193  struct Block {
194  unsigned PrevCodeSize;
195  std::vector<BitCodeAbbrev*> PrevAbbrevs;
196  explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
197  };
198 
199  /// BlockScope - This tracks the codesize of parent blocks.
200  SmallVector<Block, 8> BlockScope;
201 
202 
203 public:
204  BitstreamCursor() : BitStream(0), NextChar(0) {
205  }
206  BitstreamCursor(const BitstreamCursor &RHS) : BitStream(0), NextChar(0) {
207  operator=(RHS);
208  }
209 
210  explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) {
211  NextChar = 0;
212  CurWord = 0;
213  BitsInCurWord = 0;
214  CurCodeSize = 2;
215  }
216 
218  freeState();
219 
220  BitStream = &R;
221  NextChar = 0;
222  CurWord = 0;
223  BitsInCurWord = 0;
224  CurCodeSize = 2;
225  }
226 
228  freeState();
229  }
230 
231  void operator=(const BitstreamCursor &RHS);
232 
233  void freeState();
234 
235  bool isEndPos(size_t pos) {
236  return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
237  }
238 
239  bool canSkipToPos(size_t pos) const {
240  // pos can be skipped to if it is a valid address or one byte past the end.
241  return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
242  static_cast<uint64_t>(pos - 1));
243  }
244 
245  uint32_t getWord(size_t pos) {
246  uint8_t buf[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
247  BitStream->getBitcodeBytes().readBytes(pos, sizeof(buf), buf);
248  return *reinterpret_cast<support::ulittle32_t *>(buf);
249  }
250 
251  bool AtEndOfStream() {
252  return BitsInCurWord == 0 && isEndPos(NextChar);
253  }
254 
255  /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
256  unsigned getAbbrevIDWidth() const { return CurCodeSize; }
257 
258  /// GetCurrentBitNo - Return the bit # of the bit we are reading.
259  uint64_t GetCurrentBitNo() const {
260  return NextChar*CHAR_BIT - BitsInCurWord;
261  }
262 
264  return BitStream;
265  }
267  return BitStream;
268  }
269 
270  /// Flags that modify the behavior of advance().
271  enum {
272  /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does
273  /// not automatically pop the block scope when the end of a block is
274  /// reached.
276 
277  /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are
278  /// returned just like normal records.
280  };
281 
282  /// advance - Advance the current bitstream, returning the next entry in the
283  /// stream.
284  BitstreamEntry advance(unsigned Flags = 0) {
285  while (1) {
286  unsigned Code = ReadCode();
287  if (Code == bitc::END_BLOCK) {
288  // Pop the end of the block unless Flags tells us not to.
290  return BitstreamEntry::getError();
292  }
293 
294  if (Code == bitc::ENTER_SUBBLOCK)
296 
297  if (Code == bitc::DEFINE_ABBREV &&
299  // We read and accumulate abbrev's, the client can't do anything with
300  // them anyway.
302  continue;
303  }
304 
305  return BitstreamEntry::getRecord(Code);
306  }
307  }
308 
309  /// advanceSkippingSubblocks - This is a convenience function for clients that
310  /// don't expect any subblocks. This just skips over them automatically.
312  while (1) {
313  // If we found a normal entry, return it.
314  BitstreamEntry Entry = advance(Flags);
315  if (Entry.Kind != BitstreamEntry::SubBlock)
316  return Entry;
317 
318  // If we found a sub-block, just skip over it and check the next entry.
319  if (SkipBlock())
320  return BitstreamEntry::getError();
321  }
322  }
323 
324  /// JumpToBit - Reset the stream to the specified bit number.
325  void JumpToBit(uint64_t BitNo) {
326  uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1);
327  unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
328  assert(canSkipToPos(ByteNo) && "Invalid location");
329 
330  // Move the cursor to the right word.
331  NextChar = ByteNo;
332  BitsInCurWord = 0;
333  CurWord = 0;
334 
335  // Skip over any bits that are already consumed.
336  if (WordBitNo) {
337  if (sizeof(word_t) > 4)
338  Read64(WordBitNo);
339  else
340  Read(WordBitNo);
341  }
342  }
343 
344 
345  uint32_t Read(unsigned NumBits) {
346  assert(NumBits && NumBits <= 32 &&
347  "Cannot return zero or more than 32 bits!");
348 
349  // If the field is fully contained by CurWord, return it quickly.
350  if (BitsInCurWord >= NumBits) {
351  uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits));
352  CurWord >>= NumBits;
353  BitsInCurWord -= NumBits;
354  return R;
355  }
356 
357  // If we run out of data, stop at the end of the stream.
358  if (isEndPos(NextChar)) {
359  CurWord = 0;
360  BitsInCurWord = 0;
361  return 0;
362  }
363 
364  uint32_t R = uint32_t(CurWord);
365 
366  // Read the next word from the stream.
367  uint8_t Array[sizeof(word_t)] = {0};
368 
369  BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array), Array);
370 
371  // Handle big-endian byte-swapping if necessary.
373  <word_t, support::little, support::unaligned> EndianValue;
374  memcpy(&EndianValue, Array, sizeof(Array));
375 
376  CurWord = EndianValue;
377 
378  NextChar += sizeof(word_t);
379 
380  // Extract NumBits-BitsInCurWord from what we just read.
381  unsigned BitsLeft = NumBits-BitsInCurWord;
382 
383  // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive.
384  R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft)))
385  << BitsInCurWord);
386 
387  // BitsLeft bits have just been used up from CurWord. BitsLeft is in the
388  // range [1..32]/[1..64] so be careful how we shift.
389  if (BitsLeft != sizeof(word_t)*8)
390  CurWord >>= BitsLeft;
391  else
392  CurWord = 0;
393  BitsInCurWord = sizeof(word_t)*8-BitsLeft;
394  return R;
395  }
396 
397  uint64_t Read64(unsigned NumBits) {
398  if (NumBits <= 32) return Read(NumBits);
399 
400  uint64_t V = Read(32);
401  return V | (uint64_t)Read(NumBits-32) << 32;
402  }
403 
404  uint32_t ReadVBR(unsigned NumBits) {
405  uint32_t Piece = Read(NumBits);
406  if ((Piece & (1U << (NumBits-1))) == 0)
407  return Piece;
408 
409  uint32_t Result = 0;
410  unsigned NextBit = 0;
411  while (1) {
412  Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
413 
414  if ((Piece & (1U << (NumBits-1))) == 0)
415  return Result;
416 
417  NextBit += NumBits-1;
418  Piece = Read(NumBits);
419  }
420  }
421 
422  // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size. The
423  // chunk size of the VBR must still be <= 32 bits though.
424  uint64_t ReadVBR64(unsigned NumBits) {
425  uint32_t Piece = Read(NumBits);
426  if ((Piece & (1U << (NumBits-1))) == 0)
427  return uint64_t(Piece);
428 
429  uint64_t Result = 0;
430  unsigned NextBit = 0;
431  while (1) {
432  Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
433 
434  if ((Piece & (1U << (NumBits-1))) == 0)
435  return Result;
436 
437  NextBit += NumBits-1;
438  Piece = Read(NumBits);
439  }
440  }
441 
442 private:
443  void SkipToFourByteBoundary() {
444  // If word_t is 64-bits and if we've read less than 32 bits, just dump
445  // the bits we have up to the next 32-bit boundary.
446  if (sizeof(word_t) > 4 &&
447  BitsInCurWord >= 32) {
448  CurWord >>= BitsInCurWord-32;
449  BitsInCurWord = 32;
450  return;
451  }
452 
453  BitsInCurWord = 0;
454  CurWord = 0;
455  }
456 public:
457 
458  unsigned ReadCode() {
459  return Read(CurCodeSize);
460  }
461 
462 
463  // Block header:
464  // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
465 
466  /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
467  /// the block.
468  unsigned ReadSubBlockID() {
469  return ReadVBR(bitc::BlockIDWidth);
470  }
471 
472  /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
473  /// over the body of this block. If the block record is malformed, return
474  /// true.
475  bool SkipBlock() {
476  // Read and ignore the codelen value. Since we are skipping this block, we
477  // don't care what code widths are used inside of it.
479  SkipToFourByteBoundary();
480  unsigned NumFourBytes = Read(bitc::BlockSizeWidth);
481 
482  // Check that the block wasn't partially defined, and that the offset isn't
483  // bogus.
484  size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
485  if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
486  return true;
487 
488  JumpToBit(SkipTo);
489  return false;
490  }
491 
492  /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
493  /// the block, and return true if the block has an error.
494  bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0);
495 
496  bool ReadBlockEnd() {
497  if (BlockScope.empty()) return true;
498 
499  // Block tail:
500  // [END_BLOCK, <align4bytes>]
501  SkipToFourByteBoundary();
502 
503  popBlockScope();
504  return false;
505  }
506 
507 private:
508 
509  void popBlockScope() {
510  CurCodeSize = BlockScope.back().PrevCodeSize;
511 
512  // Delete abbrevs from popped scope.
513  for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
514  i != e; ++i)
515  CurAbbrevs[i]->dropRef();
516 
517  BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
518  BlockScope.pop_back();
519  }
520 
521  //===--------------------------------------------------------------------===//
522  // Record Processing
523  //===--------------------------------------------------------------------===//
524 
525 private:
526  void readAbbreviatedLiteral(const BitCodeAbbrevOp &Op,
527  SmallVectorImpl<uint64_t> &Vals);
528  void readAbbreviatedField(const BitCodeAbbrevOp &Op,
529  SmallVectorImpl<uint64_t> &Vals);
530  void skipAbbreviatedField(const BitCodeAbbrevOp &Op);
531 
532 public:
533 
534  /// getAbbrev - Return the abbreviation for the specified AbbrevId.
535  const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
536  unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV;
537  assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
538  return CurAbbrevs[AbbrevNo];
539  }
540 
541  /// skipRecord - Read the current record and discard it.
542  void skipRecord(unsigned AbbrevID);
543 
544  unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals,
545  StringRef *Blob = 0);
546 
547  //===--------------------------------------------------------------------===//
548  // Abbrev Processing
549  //===--------------------------------------------------------------------===//
550  void ReadAbbrevRecord();
551 
552  bool ReadBlockInfoBlock();
553 };
554 
555 } // End llvm namespace
556 
557 #endif
const BitCodeAbbrev * getAbbrev(unsigned AbbrevID)
getAbbrev - Return the abbreviation for the specified AbbrevId.
bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP=0)
virtual int readBytes(uint64_t address, uint64_t size, uint8_t *buf) const LLVM_OVERRIDE=0
bool hasBlockInfoRecords() const
StreamableMemoryObject & getBitcodeBytes()
BitstreamCursor(BitstreamReader &R)
unsigned readRecord(unsigned AbbrevID, SmallVectorImpl< uint64_t > &Vals, StringRef *Blob=0)
uint64_t GetCurrentBitNo() const
GetCurrentBitNo - Return the bit # of the bit we are reading.
bool canSkipToPos(size_t pos) const
const BlockInfo * getBlockInfo(unsigned BlockID) const
std::vector< std::pair< unsigned, std::string > > RecordNames
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:56
BlockInfo & getOrCreateBlockInfo(unsigned BlockID)
static BitstreamEntry getSubBlock(unsigned ID)
void operator=(const BitstreamCursor &RHS)
BitstreamEntry advanceSkippingSubblocks(unsigned Flags=0)
BitstreamReader * getBitStreamReader()
#define true
Definition: ConvertUTF.c:65
uint64_t ReadVBR64(unsigned NumBits)
StreamableMemoryObject * getNonStreamedMemoryObject(const unsigned char *Start, const unsigned char *End)
void init(const unsigned char *Start, const unsigned char *End)
void init(BitstreamReader &R)
std::vector< BitCodeAbbrev * > Abbrevs
virtual bool isObjectEnd(uint64_t address) const =0
uint64_t Read64(unsigned NumBits)
bool isEndPos(size_t pos)
uint32_t ReadVBR(unsigned NumBits)
BitstreamReader(const unsigned char *Start, const unsigned char *End)
void skipRecord(unsigned AbbrevID)
skipRecord - Read the current record and discard it.
#define LLVM_DELETED_FUNCTION
Definition: Compiler.h:137
virtual bool isValidAddress(uint64_t address) const =0
const BitstreamReader * getBitStreamReader() const
uint32_t Read(unsigned NumBits)
BitstreamCursor(const BitstreamCursor &RHS)
static BitstreamEntry getEndBlock()
static BitstreamEntry getRecord(unsigned AbbrevID)
uint32_t getWord(size_t pos)
void JumpToBit(uint64_t BitNo)
JumpToBit - Reset the stream to the specified bit number.
static BitstreamEntry getError()
BitstreamReader(StreamableMemoryObject *bytes)
BitstreamEntry advance(unsigned Flags=0)
unsigned getAbbrevIDWidth() const
getAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
enum llvm::BitstreamEntry::@27 Kind