LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
YAMLParser.h
Go to the documentation of this file.
1 //===--- YAMLParser.h - Simple YAML parser --------------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This is a YAML 1.2 parser.
11 //
12 // See http://www.yaml.org/spec/1.2/spec.html for the full standard.
13 //
14 // This currently does not implement the following:
15 // * Multi-line literal folding.
16 // * Tag resolution.
17 // * UTF-16.
18 // * BOMs anywhere other than the first Unicode scalar value in the file.
19 //
20 // The most important class here is Stream. This represents a YAML stream with
21 // 0, 1, or many documents.
22 //
23 // SourceMgr sm;
24 // StringRef input = getInput();
25 // yaml::Stream stream(input, sm);
26 //
27 // for (yaml::document_iterator di = stream.begin(), de = stream.end();
28 // di != de; ++di) {
29 // yaml::Node *n = di->getRoot();
30 // if (n) {
31 // // Do something with n...
32 // } else
33 // break;
34 // }
35 //
36 //===----------------------------------------------------------------------===//
37 
38 #ifndef LLVM_SUPPORT_YAMLPARSER_H
39 #define LLVM_SUPPORT_YAMLPARSER_H
40 
41 #include "llvm/ADT/OwningPtr.h"
42 #include "llvm/ADT/SmallString.h"
43 #include "llvm/ADT/StringRef.h"
44 #include "llvm/Support/Allocator.h"
45 #include "llvm/Support/SMLoc.h"
46 
47 #include <map>
48 #include <limits>
49 #include <utility>
50 
51 namespace llvm {
52 class MemoryBuffer;
53 class SourceMgr;
54 class raw_ostream;
55 class Twine;
56 
57 namespace yaml {
58 
59 class document_iterator;
60 class Document;
61 class Node;
62 class Scanner;
63 struct Token;
64 
65 /// @brief Dump all the tokens in this stream to OS.
66 /// @returns true if there was an error, false otherwise.
67 bool dumpTokens(StringRef Input, raw_ostream &);
68 
69 /// @brief Scans all tokens in input without outputting anything. This is used
70 /// for benchmarking the tokenizer.
71 /// @returns true if there was an error, false otherwise.
72 bool scanTokens(StringRef Input);
73 
74 /// @brief Escape \a Input for a double quoted scalar.
75 std::string escape(StringRef Input);
76 
77 /// @brief This class represents a YAML stream potentially containing multiple
78 /// documents.
79 class Stream {
80 public:
81  /// @brief This keeps a reference to the string referenced by \p Input.
83 
84  /// @brief This takes ownership of \p InputBuffer.
85  Stream(MemoryBuffer *InputBuffer, SourceMgr &);
86  ~Stream();
87 
90  void skip();
91  bool failed();
92  bool validate() {
93  skip();
94  return !failed();
95  }
96 
97  void printError(Node *N, const Twine &Msg);
98 
99 private:
100  OwningPtr<Scanner> scanner;
101  OwningPtr<Document> CurrentDoc;
102 
103  friend class Document;
104 };
105 
106 /// @brief Abstract base class for all Nodes.
107 class Node {
108  virtual void anchor();
109 public:
110  enum NodeKind {
117  };
118 
119  Node(unsigned int Type, OwningPtr<Document> &, StringRef Anchor,
120  StringRef Tag);
121 
122  /// @brief Get the value of the anchor attached to this node. If it does not
123  /// have one, getAnchor().size() will be 0.
124  StringRef getAnchor() const { return Anchor; }
125 
126  /// \brief Get the tag as it was written in the document. This does not
127  /// perform tag resolution.
128  StringRef getRawTag() const { return Tag; }
129 
130  /// \brief Get the verbatium tag for a given Node. This performs tag resoluton
131  /// and substitution.
132  std::string getVerbatimTag() const;
133 
134  SMRange getSourceRange() const { return SourceRange; }
135  void setSourceRange(SMRange SR) { SourceRange = SR; }
136 
137  // These functions forward to Document and Scanner.
138  Token &peekNext();
139  Token getNext();
140  Node *parseBlockNode();
142  void setError(const Twine &Message, Token &Location) const;
143  bool failed() const;
144 
145  virtual void skip() {}
146 
147  unsigned int getType() const { return TypeID; }
148 
149  void *operator new ( size_t Size
150  , BumpPtrAllocator &Alloc
151  , size_t Alignment = 16) throw() {
152  return Alloc.Allocate(Size, Alignment);
153  }
154 
155  void operator delete(void *Ptr, BumpPtrAllocator &Alloc, size_t) throw() {
156  Alloc.Deallocate(Ptr);
157  }
158 
159 protected:
162 
163  void operator delete(void *) throw() {}
164 
165  virtual ~Node() {}
166 
167 private:
168  unsigned int TypeID;
169  StringRef Anchor;
170  /// \brief The tag as typed in the document.
171  StringRef Tag;
172 };
173 
174 /// @brief A null value.
175 ///
176 /// Example:
177 /// !!null null
178 class NullNode : public Node {
179  virtual void anchor();
180 public:
182  : Node(NK_Null, D, StringRef(), StringRef()) {}
183 
184  static inline bool classof(const Node *N) {
185  return N->getType() == NK_Null;
186  }
187 };
188 
189 /// @brief A scalar node is an opaque datum that can be presented as a
190 /// series of zero or more Unicode scalar values.
191 ///
192 /// Example:
193 /// Adena
194 class ScalarNode : public Node {
195  virtual void anchor();
196 public:
198  StringRef Val)
199  : Node(NK_Scalar, D, Anchor, Tag), Value(Val) {
200  SMLoc Start = SMLoc::getFromPointer(Val.begin());
201  SMLoc End = SMLoc::getFromPointer(Val.end());
202  SourceRange = SMRange(Start, End);
203  }
204 
205  // Return Value without any escaping or folding or other fun YAML stuff. This
206  // is the exact bytes that are contained in the file (after conversion to
207  // utf8).
208  StringRef getRawValue() const { return Value; }
209 
210  /// @brief Gets the value of this node as a StringRef.
211  ///
212  /// @param Storage is used to store the content of the returned StringRef iff
213  /// it requires any modification from how it appeared in the source.
214  /// This happens with escaped characters and multi-line literals.
215  StringRef getValue(SmallVectorImpl<char> &Storage) const;
216 
217  static inline bool classof(const Node *N) {
218  return N->getType() == NK_Scalar;
219  }
220 
221 private:
223 
224  StringRef unescapeDoubleQuoted( StringRef UnquotedValue
225  , StringRef::size_type Start
226  , SmallVectorImpl<char> &Storage) const;
227 };
228 
229 /// @brief A key and value pair. While not technically a Node under the YAML
230 /// representation graph, it is easier to treat them this way.
231 ///
232 /// TODO: Consider making this not a child of Node.
233 ///
234 /// Example:
235 /// Section: .text
236 class KeyValueNode : public Node {
237  virtual void anchor();
238 public:
240  : Node(NK_KeyValue, D, StringRef(), StringRef())
241  , Key(0)
242  , Value(0)
243  {}
244 
245  /// @brief Parse and return the key.
246  ///
247  /// This may be called multiple times.
248  ///
249  /// @returns The key, or nullptr if failed() == true.
250  Node *getKey();
251 
252  /// @brief Parse and return the value.
253  ///
254  /// This may be called multiple times.
255  ///
256  /// @returns The value, or nullptr if failed() == true.
257  Node *getValue();
258 
259  virtual void skip() LLVM_OVERRIDE {
260  getKey()->skip();
261  getValue()->skip();
262  }
263 
264  static inline bool classof(const Node *N) {
265  return N->getType() == NK_KeyValue;
266  }
267 
268 private:
269  Node *Key;
270  Node *Value;
271 };
272 
273 /// @brief This is an iterator abstraction over YAML collections shared by both
274 /// sequences and maps.
275 ///
276 /// BaseT must have a ValueT* member named CurrentEntry and a member function
277 /// increment() which must set CurrentEntry to 0 to create an end iterator.
278 template <class BaseT, class ValueT>
280  : public std::iterator<std::forward_iterator_tag, ValueT> {
281 public:
283  basic_collection_iterator(BaseT *B) : Base(B) {}
284 
285  ValueT *operator ->() const {
286  assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
287  return Base->CurrentEntry;
288  }
289 
290  ValueT &operator *() const {
291  assert(Base && Base->CurrentEntry &&
292  "Attempted to dereference end iterator!");
293  return *Base->CurrentEntry;
294  }
295 
296  operator ValueT*() const {
297  assert(Base && Base->CurrentEntry && "Attempted to access end iterator!");
298  return Base->CurrentEntry;
299  }
300 
301  bool operator !=(const basic_collection_iterator &Other) const {
302  if(Base != Other.Base)
303  return true;
304  return (Base && Other.Base) && Base->CurrentEntry
305  != Other.Base->CurrentEntry;
306  }
307 
309  assert(Base && "Attempted to advance iterator past end!");
310  Base->increment();
311  // Create an end iterator.
312  if (Base->CurrentEntry == 0)
313  Base = 0;
314  return *this;
315  }
316 
317 private:
318  BaseT *Base;
319 };
320 
321 // The following two templates are used for both MappingNode and Sequence Node.
322 template <class CollectionType>
323 typename CollectionType::iterator begin(CollectionType &C) {
324  assert(C.IsAtBeginning && "You may only iterate over a collection once!");
325  C.IsAtBeginning = false;
326  typename CollectionType::iterator ret(&C);
327  ++ret;
328  return ret;
329 }
330 
331 template <class CollectionType>
332 void skip(CollectionType &C) {
333  // TODO: support skipping from the middle of a parsed collection ;/
334  assert((C.IsAtBeginning || C.IsAtEnd) && "Cannot skip mid parse!");
335  if (C.IsAtBeginning)
336  for (typename CollectionType::iterator i = begin(C), e = C.end();
337  i != e; ++i)
338  i->skip();
339 }
340 
341 /// @brief Represents a YAML map created from either a block map for a flow map.
342 ///
343 /// This parses the YAML stream as increment() is called.
344 ///
345 /// Example:
346 /// Name: _main
347 /// Scope: Global
348 class MappingNode : public Node {
349  virtual void anchor();
350 public:
351  enum MappingType {
354  MT_Inline ///< An inline mapping node is used for "[key: value]".
355  };
356 
358  MappingType MT)
359  : Node(NK_Mapping, D, Anchor, Tag), Type(MT), IsAtBeginning(true),
360  IsAtEnd(false), CurrentEntry(0) {}
361 
364  template <class T> friend typename T::iterator yaml::begin(T &);
365  template <class T> friend void yaml::skip(T &);
366 
368  return yaml::begin(*this);
369  }
370 
371  iterator end() { return iterator(); }
372 
373  virtual void skip() LLVM_OVERRIDE {
374  yaml::skip(*this);
375  }
376 
377  static inline bool classof(const Node *N) {
378  return N->getType() == NK_Mapping;
379  }
380 
381 private:
383  bool IsAtBeginning;
384  bool IsAtEnd;
385  KeyValueNode *CurrentEntry;
386 
387  void increment();
388 };
389 
390 /// @brief Represents a YAML sequence created from either a block sequence for a
391 /// flow sequence.
392 ///
393 /// This parses the YAML stream as increment() is called.
394 ///
395 /// Example:
396 /// - Hello
397 /// - World
398 class SequenceNode : public Node {
399  virtual void anchor();
400 public:
404  // Use for:
405  //
406  // key:
407  // - val1
408  // - val2
409  //
410  // As a BlockMappingEntry and BlockEnd are not created in this case.
412  };
413 
416  : Node(NK_Sequence, D, Anchor, Tag), SeqType(ST), IsAtBeginning(true),
417  IsAtEnd(false),
418  WasPreviousTokenFlowEntry(true), // Start with an imaginary ','.
419  CurrentEntry(0) {}
420 
423  template <class T> friend typename T::iterator yaml::begin(T &);
424  template <class T> friend void yaml::skip(T &);
425 
426  void increment();
427 
429  return yaml::begin(*this);
430  }
431 
432  iterator end() { return iterator(); }
433 
434  virtual void skip() LLVM_OVERRIDE {
435  yaml::skip(*this);
436  }
437 
438  static inline bool classof(const Node *N) {
439  return N->getType() == NK_Sequence;
440  }
441 
442 private:
443  SequenceType SeqType;
444  bool IsAtBeginning;
445  bool IsAtEnd;
446  bool WasPreviousTokenFlowEntry;
447  Node *CurrentEntry;
448 };
449 
450 /// @brief Represents an alias to a Node with an anchor.
451 ///
452 /// Example:
453 /// *AnchorName
454 class AliasNode : public Node {
455  virtual void anchor();
456 public:
458  : Node(NK_Alias, D, StringRef(), StringRef()), Name(Val) {}
459 
460  StringRef getName() const { return Name; }
461  Node *getTarget();
462 
463  static inline bool classof(const Node *N) {
464  return N->getType() == NK_Alias;
465  }
466 
467 private:
468  StringRef Name;
469 };
470 
471 /// @brief A YAML Stream is a sequence of Documents. A document contains a root
472 /// node.
473 class Document {
474 public:
475  /// @brief Root for parsing a node. Returns a single node.
476  Node *parseBlockNode();
477 
478  Document(Stream &ParentStream);
479 
480  /// @brief Finish parsing the current document and return true if there are
481  /// more. Return false otherwise.
482  bool skip();
483 
484  /// @brief Parse and return the root level node.
486  if (Root)
487  return Root;
488  return Root = parseBlockNode();
489  }
490 
491  const std::map<StringRef, StringRef> &getTagMap() const {
492  return TagMap;
493  }
494 
495 private:
496  friend class Node;
497  friend class document_iterator;
498 
499  /// @brief Stream to read tokens from.
500  Stream &stream;
501 
502  /// @brief Used to allocate nodes to. All are destroyed without calling their
503  /// destructor when the document is destroyed.
504  BumpPtrAllocator NodeAllocator;
505 
506  /// @brief The root node. Used to support skipping a partially parsed
507  /// document.
508  Node *Root;
509 
510  /// \brief Maps tag prefixes to their expansion.
511  std::map<StringRef, StringRef> TagMap;
512 
513  Token &peekNext();
514  Token getNext();
515  void setError(const Twine &Message, Token &Location) const;
516  bool failed() const;
517 
518  /// @brief Parse %BLAH directives and return true if any were encountered.
519  bool parseDirectives();
520 
521  /// \brief Parse %YAML
522  void parseYAMLDirective();
523 
524  /// \brief Parse %TAG
525  void parseTAGDirective();
526 
527  /// @brief Consume the next token and error if it is not \a TK.
528  bool expectToken(int TK);
529 };
530 
531 /// @brief Iterator abstraction for Documents over a Stream.
533 public:
534  document_iterator() : Doc(0) {}
536 
537  bool operator ==(const document_iterator &Other) {
538  if (isAtEnd() || Other.isAtEnd())
539  return isAtEnd() && Other.isAtEnd();
540 
541  return Doc == Other.Doc;
542  }
543  bool operator !=(const document_iterator &Other) {
544  return !(*this == Other);
545  }
546 
548  assert(Doc != 0 && "incrementing iterator past the end.");
549  if (!(*Doc)->skip()) {
550  Doc->reset(0);
551  } else {
552  Stream &S = (*Doc)->stream;
553  Doc->reset(new Document(S));
554  }
555  return *this;
556  }
557 
559  return *Doc->get();
560  }
561 
563  return *Doc;
564  }
565 
566 private:
567  bool isAtEnd() const {
568  return !Doc || !*Doc;
569  }
570 
571  OwningPtr<Document> *Doc;
572 };
573 
574 }
575 }
576 
577 #endif
static bool classof(const Node *N)
Definition: YAMLParser.h:377
document_iterator operator++()
Definition: YAMLParser.h:547
Node(unsigned int Type, OwningPtr< Document > &, StringRef Anchor, StringRef Tag)
COFF::RelocationTypeX86 Type
Definition: COFFYAML.cpp:227
virtual void skip() LLVM_OVERRIDE
Definition: YAMLParser.h:434
const std::map< StringRef, StringRef > & getTagMap() const
Definition: YAMLParser.h:491
ScalarNode(OwningPtr< Document > &D, StringRef Anchor, StringRef Tag, StringRef Val)
Definition: YAMLParser.h:197
OwningPtr< Document > & Doc
Definition: YAMLParser.h:160
std::string getVerbatimTag() const
Get the verbatium tag for a given Node. This performs tag resoluton and substitution.
Type::TypeID TypeID
bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything. This is used for benchmarking the tokenizer...
Definition: YAMLParser.cpp:629
This is an iterator abstraction over YAML collections shared by both sequences and maps...
Definition: YAMLParser.h:279
StringRef getValue(SmallVectorImpl< char > &Storage) const
Gets the value of this node as a StringRef.
StringRef getRawTag() const
Get the tag as it was written in the document. This does not perform tag resolution.
Definition: YAMLParser.h:128
Represents a YAML sequence created from either a block sequence for a flow sequence.
Definition: YAMLParser.h:398
Node * getKey()
Parse and return the key.
Node * parseBlockNode()
document_iterator begin()
Represents an alias to a Node with an anchor.
Definition: YAMLParser.h:454
void skip(CollectionType &C)
Definition: YAMLParser.h:332
static bool classof(const Node *N)
Definition: YAMLParser.h:438
static bool classof(const Node *N)
Definition: YAMLParser.h:463
document_iterator end()
void setError(const Twine &Message, Token &Location) const
StringRef getRawValue() const
Definition: YAMLParser.h:208
#define false
Definition: ConvertUTF.c:64
virtual void skip() LLVM_OVERRIDE
Definition: YAMLParser.h:373
CollectionType::iterator begin(CollectionType &C)
Definition: YAMLParser.h:323
virtual void skip()
Definition: YAMLParser.h:145
A key and value pair. While not technically a Node under the YAML representation graph, it is easier to treat them this way.
Definition: YAMLParser.h:236
Node * getRoot()
Parse and return the root level node.
Definition: YAMLParser.h:485
basic_collection_iterator< SequenceNode, Node > iterator
Definition: YAMLParser.h:422
static bool classof(const Node *N)
Definition: YAMLParser.h:217
iterator begin() const
Definition: StringRef.h:97
SequenceNode(OwningPtr< Document > &D, StringRef Anchor, StringRef Tag, SequenceType ST)
Definition: YAMLParser.h:414
bool failed() const
bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
Definition: YAMLParser.cpp:548
virtual void skip() LLVM_OVERRIDE
Definition: YAMLParser.h:259
#define true
Definition: ConvertUTF.c:65
void printError(Node *N, const Twine &Msg)
Stream(StringRef Input, SourceMgr &)
This keeps a reference to the string referenced by Input.
KeyValueNode(OwningPtr< Document > &D)
Definition: YAMLParser.h:239
std::string escape(StringRef Input)
Escape Input for a double quoted scalar.
Definition: YAMLParser.cpp:642
Document(Stream &ParentStream)
document_iterator(OwningPtr< Document > &D)
Definition: YAMLParser.h:535
OwningPtr< Document > & operator->()
Definition: YAMLParser.h:562
BumpPtrAllocator & getAllocator()
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
Definition: YAMLParser.h:194
A null value.
Definition: YAMLParser.h:178
static bool classof(const Node *N)
Definition: YAMLParser.h:264
NullNode(OwningPtr< Document > &D)
Definition: YAMLParser.h:181
void setSourceRange(SMRange SR)
Definition: YAMLParser.h:135
AliasNode(OwningPtr< Document > &D, StringRef Val)
Definition: YAMLParser.h:457
Node * getValue()
Parse and return the value.
bool skip()
Finish parsing the current document and return true if there are more. Return false otherwise...
Token & peekNext()
This class represents a YAML stream potentially containing multiple documents.
Definition: YAMLParser.h:79
StringRef getAnchor() const
Get the value of the anchor attached to this node. If it does not have one, getAnchor().size() will be 0.
Definition: YAMLParser.h:124
StringRef getName() const
Definition: YAMLParser.h:460
bool operator!=(const basic_collection_iterator &Other) const
Definition: YAMLParser.h:301
virtual ~Node()
Definition: YAMLParser.h:165
static SMLoc getFromPointer(const char *Ptr)
Definition: SMLoc.h:35
SMRange getSourceRange() const
Definition: YAMLParser.h:134
static bool classof(const Node *N)
Definition: YAMLParser.h:184
size_t size_type
Definition: StringRef.h:46
#define N
Token - A single YAML token.
Definition: YAMLParser.cpp:109
Represents a YAML map created from either a block map for a flow map.
Definition: YAMLParser.h:348
Iterator abstraction for Documents over a Stream.
Definition: YAMLParser.h:532
unsigned int getType() const
Definition: YAMLParser.h:147
basic_collection_iterator< MappingNode, KeyValueNode > iterator
Definition: YAMLParser.h:363
SMRange SourceRange
Definition: YAMLParser.h:161
LLVM Value Representation.
Definition: Value.h:66
MappingNode(OwningPtr< Document > &D, StringRef Anchor, StringRef Tag, MappingType MT)
Definition: YAMLParser.h:357
iterator end() const
Definition: StringRef.h:99
bool operator==(const document_iterator &Other)
Definition: YAMLParser.h:537
basic_collection_iterator & operator++()
Definition: YAMLParser.h:308
Represents a location in source code.
Definition: SMLoc.h:23
bool operator!=(const document_iterator &Other)
Definition: YAMLParser.h:543
An inline mapping node is used for "[key: value]".
Definition: YAMLParser.h:354
Node * parseBlockNode()
Root for parsing a node. Returns a single node.
A YAML Stream is a sequence of Documents. A document contains a root node.
Definition: YAMLParser.h:473
#define LLVM_OVERRIDE
Definition: Compiler.h:155
Abstract base class for all Nodes.
Definition: YAMLParser.h:107