48 if (Input.
size() == 0)
51 switch (uint8_t(Input[0])) {
53 if (Input.
size() >= 4) {
55 && uint8_t(Input[2]) == 0xFE
56 && uint8_t(Input[3]) == 0xFF)
58 if (Input[1] == 0 && Input[2] == 0 && Input[3] != 0)
62 if (Input.
size() >= 2 && Input[1] != 0)
66 if ( Input.
size() >= 4
67 && uint8_t(Input[1]) == 0xFE
72 if (Input.
size() >= 2 && uint8_t(Input[1]) == 0xFE)
76 if (Input.
size() >= 2 && uint8_t(Input[1]) == 0xFF)
80 if ( Input.
size() >= 3
81 && uint8_t(Input[1]) == 0xBB
82 && uint8_t(Input[2]) == 0xBF)
88 if (Input.
size() >= 4 && Input[1] == 0 && Input[2] == 0 && Input[3] == 0)
91 if (Input.
size() >= 2 && Input[1] == 0)
100 void Node::anchor() {}
101 void NullNode::anchor() {}
102 void ScalarNode::anchor() {}
103 void KeyValueNode::anchor() {}
104 void MappingNode::anchor() {}
105 void SequenceNode::anchor() {}
106 void AliasNode::anchor() {}
157 mutable Token Sentinel;
163 return new (Alloc.Allocate<
Token>())
Token(V);
197 return Tok == Other.Tok;
212 if ((*Position & 0x80) == 0) {
213 return std::make_pair(*Position, 1);
217 if (Position + 1 != End &&
218 ((*Position & 0xE0) == 0xC0) &&
219 ((*(Position + 1) & 0xC0) == 0x80)) {
220 uint32_t codepoint = ((*Position & 0x1F) << 6) |
221 (*(Position + 1) & 0x3F);
222 if (codepoint >= 0x80)
223 return std::make_pair(codepoint, 2);
227 if (Position + 2 != End &&
228 ((*Position & 0xF0) == 0xE0) &&
229 ((*(Position + 1) & 0xC0) == 0x80) &&
230 ((*(Position + 2) & 0xC0) == 0x80)) {
231 uint32_t codepoint = ((*Position & 0x0F) << 12) |
232 ((*(Position + 1) & 0x3F) << 6) |
233 (*(Position + 2) & 0x3F);
236 if (codepoint >= 0x800 &&
237 (codepoint < 0xD800 || codepoint > 0xDFFF))
238 return std::make_pair(codepoint, 3);
242 if (Position + 3 != End &&
243 ((*Position & 0xF8) == 0xF0) &&
244 ((*(Position + 1) & 0xC0) == 0x80) &&
245 ((*(Position + 2) & 0xC0) == 0x80) &&
246 ((*(Position + 3) & 0xC0) == 0x80)) {
247 uint32_t codepoint = ((*Position & 0x07) << 18) |
248 ((*(Position + 1) & 0x3F) << 12) |
249 ((*(Position + 2) & 0x3F) << 6) |
250 (*(Position + 3) & 0x3F);
251 if (codepoint >= 0x10000 && codepoint <= 0x10FFFF)
252 return std::make_pair(codepoint, 4);
254 return std::make_pair(0, 0);
288 setError(Message, Current);
298 return StringRef(Current, End - Current);
387 bool consume(uint32_t Expected);
390 void skip(uint32_t Distance);
405 void removeStaleSimpleKeyCandidates();
408 void removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level);
412 bool unrollIndent(
int ToColumn);
416 bool rollIndent(
int ToColumn
421 void scanToNextToken();
424 bool scanStreamStart();
427 bool scanStreamEnd();
430 bool scanDirective();
433 bool scanDocumentIndicator(
bool IsStart);
436 bool scanFlowCollectionStart(
bool IsSequence);
439 bool scanFlowCollectionEnd(
bool IsSequence);
442 bool scanFlowEntry();
445 bool scanBlockEntry();
454 bool scanFlowScalar(
bool IsDoubleQuoted);
457 bool scanPlainScalar();
460 bool scanAliasOrAnchor(
bool IsAlias);
463 bool scanBlockScalar(
bool IsLiteral);
469 bool fetchMoreTokens();
496 bool IsStartOfStream;
499 bool IsSimpleKeyAllowed;
522 if (UnicodeScalarValue <= 0x7F) {
523 Result.
push_back(UnicodeScalarValue & 0x7F);
524 }
else if (UnicodeScalarValue <= 0x7FF) {
525 uint8_t FirstByte = 0xC0 | ((UnicodeScalarValue & 0x7C0) >> 6);
526 uint8_t SecondByte = 0x80 | (UnicodeScalarValue & 0x3F);
529 }
else if (UnicodeScalarValue <= 0xFFFF) {
530 uint8_t FirstByte = 0xE0 | ((UnicodeScalarValue & 0xF000) >> 12);
531 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
532 uint8_t ThirdByte = 0x80 | (UnicodeScalarValue & 0x3F);
536 }
else if (UnicodeScalarValue <= 0x10FFFF) {
537 uint8_t FirstByte = 0xF0 | ((UnicodeScalarValue & 0x1F0000) >> 18);
538 uint8_t SecondByte = 0x80 | ((UnicodeScalarValue & 0x3F000) >> 12);
539 uint8_t ThirdByte = 0x80 | ((UnicodeScalarValue & 0xFC0) >> 6);
540 uint8_t FourthByte = 0x80 | (UnicodeScalarValue & 0x3F);
555 OS <<
"Stream-Start: ";
558 OS <<
"Stream-End: ";
561 OS <<
"Version-Directive: ";
564 OS <<
"Tag-Directive: ";
567 OS <<
"Document-Start: ";
570 OS <<
"Document-End: ";
573 OS <<
"Block-Entry: ";
579 OS <<
"Block-Sequence-Start: ";
582 OS <<
"Block-Mapping-Start: ";
585 OS <<
"Flow-Entry: ";
588 OS <<
"Flow-Sequence-Start: ";
591 OS <<
"Flow-Sequence-End: ";
594 OS <<
"Flow-Mapping-Start: ";
597 OS <<
"Flow-Mapping-End: ";
620 OS << T.
Range <<
"\n";
643 std::string EscapedInput;
646 EscapedInput +=
"\\\\";
648 EscapedInput +=
"\\\"";
650 EscapedInput +=
"\\0";
652 EscapedInput +=
"\\a";
654 EscapedInput +=
"\\b";
656 EscapedInput +=
"\\t";
658 EscapedInput +=
"\\n";
660 EscapedInput +=
"\\v";
662 EscapedInput +=
"\\f";
664 EscapedInput +=
"\\r";
666 EscapedInput +=
"\\e";
667 else if ((
unsigned char)*i < 0x20) {
669 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
670 }
else if (*i & 0x80) {
673 if (UnicodeScalarValue.second == 0) {
677 EscapedInput.insert(EscapedInput.end(), Val.
begin(), Val.
end());
681 if (UnicodeScalarValue.first == 0x85)
682 EscapedInput +=
"\\N";
683 else if (UnicodeScalarValue.first == 0xA0)
684 EscapedInput +=
"\\_";
685 else if (UnicodeScalarValue.first == 0x2028)
686 EscapedInput +=
"\\L";
687 else if (UnicodeScalarValue.first == 0x2029)
688 EscapedInput +=
"\\P";
690 std::string HexStr =
utohexstr(UnicodeScalarValue.first);
691 if (HexStr.size() <= 2)
692 EscapedInput +=
"\\x" + std::string(2 - HexStr.size(),
'0') + HexStr;
693 else if (HexStr.size() <= 4)
694 EscapedInput +=
"\\u" + std::string(4 - HexStr.size(),
'0') + HexStr;
695 else if (HexStr.size() <= 8)
696 EscapedInput +=
"\\U" + std::string(8 - HexStr.size(),
'0') + HexStr;
698 i += UnicodeScalarValue.second - 1;
700 EscapedInput.push_back(*i);
711 , IsStartOfStream(
true)
712 , IsSimpleKeyAllowed(
true)
722 , InputBuffer(Buffer)
723 , Current(InputBuffer->getBufferStart())
724 , End(InputBuffer->getBufferEnd())
729 , IsStartOfStream(
true)
730 , IsSimpleKeyAllowed(
true)
738 bool NeedMore =
false;
740 if (TokenQueue.
empty() || NeedMore) {
741 if (!fetchMoreTokens()) {
744 return TokenQueue.
front();
747 assert(!TokenQueue.
empty() &&
748 "fetchMoreTokens lied about getting tokens!");
750 removeStaleSimpleKeyCandidates();
752 SK.Tok = TokenQueue.
front();
753 if (std::find(SimpleKeys.
begin(), SimpleKeys.
end(), SK)
759 return TokenQueue.
front();
765 if (!TokenQueue.
empty())
770 if (TokenQueue.
empty()) {
771 TokenQueue.Alloc.Reset();
781 if ( *Position == 0x09
782 || (*Position >= 0x20 && *Position <= 0x7E))
786 if (uint8_t(*Position) & 0x80) {
789 && u8d.first != 0xFEFF
790 && ( u8d.first == 0x85
791 || ( u8d.first >= 0xA0
792 && u8d.first <= 0xD7FF)
793 || ( u8d.first >= 0xE000
794 && u8d.first <= 0xFFFD)
795 || ( u8d.first >= 0x10000
796 && u8d.first <= 0x10FFFF)))
797 return Position + u8d.second;
805 if (*Position == 0x0D) {
806 if (Position + 1 != End && *(Position + 1) == 0x0A)
811 if (*Position == 0x0A)
820 if (*Position ==
' ' || *Position ==
'\t')
828 if (*Position ==
' ' || *Position ==
'\t')
830 return skip_nb_char(Position);
845 return (C >=
'0' && C <=
'9')
846 || (C >=
'a' && C <=
'z')
847 || (C >=
'A' && C <=
'Z');
852 || (C >=
'a' && C <=
'z')
853 || (C >=
'A' && C <=
'Z');
861 if (( *Current ==
'%'
866 ||
StringRef(Current, 1).find_first_of(
"#;/?:@&=+$,_.!~*'()[]")
873 return StringRef(Start, Current - Start);
876 StringRef Scanner::scan_ns_plain_one_line() {
881 if (Current == End) {
883 }
else if (*Current ==
':') {
885 if (Current + 1 == End)
888 if (Current + 1 != i) {
893 }
else if (*Current ==
'#') {
896 if (*(Current - 1) & 0x80 || skip_ns_char(Current - 1) == Current) {
909 return StringRef(start, Current - start);
912 bool Scanner::consume(uint32_t Expected) {
913 if (Expected >= 0x80)
917 if (uint8_t(*Current) >= 0x80)
919 if (uint8_t(*Current) == Expected) {
927 void Scanner::skip(uint32_t Distance) {
930 assert(Current <= End &&
"Skipped past the end");
936 if ( *Position ==
' ' || *Position ==
'\t'
937 || *Position ==
'\r' || *Position ==
'\n')
945 if (IsSimpleKeyAllowed) {
949 SK.Column = AtColumn;
950 SK.IsRequired = IsRequired;
951 SK.FlowLevel = FlowLevel;
956 void Scanner::removeStaleSimpleKeyCandidates() {
958 i != SimpleKeys.
end();) {
959 if (i->Line != Line || i->Column + 1024 < Column) {
961 setError(
"Could not find expected : for simple key"
962 , i->Tok->Range.begin());
963 i = SimpleKeys.
erase(i);
969 void Scanner::removeSimpleKeyCandidatesOnFlowLevel(
unsigned Level) {
970 if (!SimpleKeys.
empty() && (SimpleKeys.
end() - 1)->FlowLevel == Level)
974 bool Scanner::unrollIndent(
int ToColumn) {
980 while (Indent > ToColumn) {
990 bool Scanner::rollIndent(
int ToColumn
995 if (Indent < ToColumn) {
1002 TokenQueue.
insert(InsertPoint, T);
1007 void Scanner::scanToNextToken() {
1009 while (*Current ==
' ' || *Current ==
'\t') {
1014 if (*Current ==
'#') {
1035 IsSimpleKeyAllowed =
true;
1039 bool Scanner::scanStreamStart() {
1040 IsStartOfStream =
false;
1048 Current += EI.second;
1052 bool Scanner::scanStreamEnd() {
1061 IsSimpleKeyAllowed =
false;
1070 bool Scanner::scanDirective() {
1074 IsSimpleKeyAllowed =
false;
1079 Current = skip_while(&Scanner::skip_ns_char, Current);
1081 Current = skip_while(&Scanner::skip_s_white, Current);
1084 if (
Name ==
"YAML") {
1085 Current = skip_while(&Scanner::skip_ns_char, Current);
1090 }
else if(
Name ==
"TAG") {
1091 Current = skip_while(&Scanner::skip_ns_char, Current);
1092 Current = skip_while(&Scanner::skip_s_white, Current);
1093 Current = skip_while(&Scanner::skip_ns_char, Current);
1102 bool Scanner::scanDocumentIndicator(
bool IsStart) {
1105 IsSimpleKeyAllowed =
false;
1115 bool Scanner::scanFlowCollectionStart(
bool IsSequence) {
1124 saveSimpleKeyCandidate(TokenQueue.
back(), Column - 1,
false);
1127 IsSimpleKeyAllowed =
true;
1132 bool Scanner::scanFlowCollectionEnd(
bool IsSequence) {
1133 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1134 IsSimpleKeyAllowed =
false;
1146 bool Scanner::scanFlowEntry() {
1147 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1148 IsSimpleKeyAllowed =
true;
1157 bool Scanner::scanBlockEntry() {
1159 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1160 IsSimpleKeyAllowed =
true;
1169 bool Scanner::scanKey() {
1173 removeSimpleKeyCandidatesOnFlowLevel(FlowLevel);
1174 IsSimpleKeyAllowed = !FlowLevel;
1184 bool Scanner::scanValue() {
1187 if (!SimpleKeys.
empty()) {
1191 T.
Range = SK.Tok->Range;
1193 for (i = TokenQueue.
begin(), e = TokenQueue.
end(); i != e; ++i) {
1197 assert(i != e &&
"SimpleKey not in token queue!");
1198 i = TokenQueue.
insert(i, T);
1203 IsSimpleKeyAllowed =
false;
1207 IsSimpleKeyAllowed = !FlowLevel;
1227 assert(Position - 1 >= First);
1231 while (I >= First && *I ==
'\\') --
I;
1234 return (Position - 1 - I) % 2 == 1;
1237 bool Scanner::scanFlowScalar(
bool IsDoubleQuoted) {
1239 unsigned ColStart = Column;
1240 if (IsDoubleQuoted) {
1243 while (Current != End && *Current !=
'"')
1247 }
while ( Current != End
1248 && *(Current - 1) ==
'\\'
1254 if (Current + 1 < End && *Current ==
'\'' && *(Current + 1) ==
'\'') {
1257 }
else if (*Current ==
'\'')
1261 i = skip_b_break(Current);
1276 if (Current == End) {
1277 setError(
"Expected quote at end of scalar", Current);
1287 saveSimpleKeyCandidate(TokenQueue.
back(), ColStart,
false);
1289 IsSimpleKeyAllowed =
false;
1294 bool Scanner::scanPlainScalar() {
1296 unsigned ColStart = Column;
1297 unsigned LeadingBlanks = 0;
1298 assert(Indent >= -1 &&
"Indent must be >= -1 !");
1299 unsigned indent =
static_cast<unsigned>(Indent + 1);
1301 if (*Current ==
'#')
1304 while (!isBlankOrBreak(Current)) {
1305 if ( FlowLevel && *Current ==
':'
1306 && !(isBlankOrBreak(Current + 1) || *(Current + 1) ==
',')) {
1307 setError(
"Found unexpected ':' while scanning a plain scalar", Current);
1312 if ( (*Current ==
':' && isBlankOrBreak(Current + 1))
1314 && (
StringRef(Current, 1).find_first_of(
",:?[]{}")
1326 if (!isBlankOrBreak(Current))
1331 while (isBlankOrBreak(Tmp)) {
1334 if (LeadingBlanks && (Column < indent) && *Tmp ==
'\t') {
1335 setError(
"Found invalid tab character in indentation", Tmp);
1341 i = skip_b_break(Tmp);
1350 if (!FlowLevel && Column < indent)
1355 if (Start == Current) {
1356 setError(
"Got empty plain scalar", Start);
1365 saveSimpleKeyCandidate(TokenQueue.
back(), ColStart,
false);
1367 IsSimpleKeyAllowed =
false;
1372 bool Scanner::scanAliasOrAnchor(
bool IsAlias) {
1374 unsigned ColStart = Column;
1377 if ( *Current ==
'[' || *Current ==
']'
1378 || *Current ==
'{' || *Current ==
'}'
1389 if (Start == Current) {
1390 setError(
"Got empty alias or anchor", Start);
1400 saveSimpleKeyCandidate(TokenQueue.
back(), ColStart,
false);
1402 IsSimpleKeyAllowed =
false;
1407 bool Scanner::scanBlockScalar(
bool IsLiteral) {
1415 i = skip_b_break(Current);
1431 if (Start == Current) {
1432 setError(
"Got empty block scalar", Start);
1443 bool Scanner::scanTag() {
1445 unsigned ColStart = Column;
1447 if (Current == End || isBlankOrBreak(Current));
1448 else if (*Current ==
'<') {
1455 Current = skip_while(&Scanner::skip_ns_char, Current);
1464 saveSimpleKeyCandidate(TokenQueue.
back(), ColStart,
false);
1466 IsSimpleKeyAllowed =
false;
1471 bool Scanner::fetchMoreTokens() {
1472 if (IsStartOfStream)
1473 return scanStreamStart();
1478 return scanStreamEnd();
1480 removeStaleSimpleKeyCandidates();
1482 unrollIndent(Column);
1484 if (Column == 0 && *Current ==
'%')
1485 return scanDirective();
1487 if (Column == 0 && Current + 4 <= End
1489 && *(Current + 1) ==
'-'
1490 && *(Current + 2) ==
'-'
1491 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1492 return scanDocumentIndicator(
true);
1494 if (Column == 0 && Current + 4 <= End
1496 && *(Current + 1) ==
'.'
1497 && *(Current + 2) ==
'.'
1498 && (Current + 3 == End || isBlankOrBreak(Current + 3)))
1499 return scanDocumentIndicator(
false);
1501 if (*Current ==
'[')
1502 return scanFlowCollectionStart(
true);
1504 if (*Current ==
'{')
1505 return scanFlowCollectionStart(
false);
1507 if (*Current ==
']')
1508 return scanFlowCollectionEnd(
true);
1510 if (*Current ==
'}')
1511 return scanFlowCollectionEnd(
false);
1513 if (*Current ==
',')
1514 return scanFlowEntry();
1516 if (*Current ==
'-' && isBlankOrBreak(Current + 1))
1517 return scanBlockEntry();
1519 if (*Current ==
'?' && (FlowLevel || isBlankOrBreak(Current + 1)))
1522 if (*Current ==
':' && (FlowLevel || isBlankOrBreak(Current + 1)))
1525 if (*Current ==
'*')
1526 return scanAliasOrAnchor(
true);
1528 if (*Current ==
'&')
1529 return scanAliasOrAnchor(
false);
1531 if (*Current ==
'!')
1534 if (*Current ==
'|' && !FlowLevel)
1535 return scanBlockScalar(
true);
1537 if (*Current ==
'>' && !FlowLevel)
1538 return scanBlockScalar(
false);
1540 if (*Current ==
'\'')
1541 return scanFlowScalar(
false);
1543 if (*Current ==
'"')
1544 return scanFlowScalar(
true);
1548 if (!(isBlankOrBreak(Current)
1549 || FirstChar.find_first_of(
"-?:,[]{}#&*!|>'\"%@`") !=
StringRef::npos)
1550 || (*Current ==
'-' && !isBlankOrBreak(Current + 1))
1551 || (!FlowLevel && (*Current ==
'?' || *Current ==
':')
1552 && isBlankOrBreak(Current + 1))
1553 || (!FlowLevel && *Current ==
':'
1554 && Current + 2 < End
1555 && *(Current + 1) ==
':'
1556 && !isBlankOrBreak(Current + 2)))
1557 return scanPlainScalar();
1559 setError(
"Unrecognized character while tokenizing.");
1564 : scanner(new
Scanner(Input, SM))
1568 : scanner(new
Scanner(InputBuffer, SM))
1591 CurrentDoc.reset(
new Document(*
this));
1615 if (!Raw.
empty() && Raw !=
"!") {
1618 Ret =
Doc->getTagMap().find(
"!")->second;
1622 Ret =
Doc->getTagMap().find(
"!!")->second;
1627 std::map<StringRef, StringRef>::const_iterator It =
1628 Doc->getTagMap().find(TagHandle);
1629 if (It !=
Doc->getTagMap().end())
1634 T.
Range = TagHandle;
1644 return "tag:yaml.org,2002:null";
1647 return "tag:yaml.org,2002:str";
1649 return "tag:yaml.org,2002:map";
1651 return "tag:yaml.org,2002:seq";
1658 return Doc->peekNext();
1662 return Doc->getNext();
1670 return Doc->NodeAllocator;
1674 Doc->setError(Msg, Tok);
1677 bool Node::failed()
const {
1678 return Doc->failed();
1685 if (
Value[0] ==
'"') {
1691 return unescapeDoubleQuoted(UnquotedValue, i, Storage);
1692 return UnquotedValue;
1693 }
else if (
Value[0] ==
'\'') {
1703 Storage.
insert(Storage.
end(), Valid.begin(), Valid.end());
1705 UnquotedValue = UnquotedValue.
substr(i + 2);
1710 return UnquotedValue;
1713 return Value.rtrim(
" ");
1726 Storage.
insert(Storage.
end(), Valid.begin(), Valid.end());
1728 UnquotedValue = UnquotedValue.
substr(i);
1730 assert(!UnquotedValue.
empty() &&
"Can't be empty!");
1733 switch (UnquotedValue[0]) {
1737 if ( UnquotedValue.
size() > 1
1738 && (UnquotedValue[1] ==
'\r' || UnquotedValue[1] ==
'\n'))
1739 UnquotedValue = UnquotedValue.
substr(1);
1740 UnquotedValue = UnquotedValue.
substr(1);
1743 if (UnquotedValue.
size() == 1)
1746 UnquotedValue = UnquotedValue.
substr(1);
1747 switch (UnquotedValue[0]) {
1751 setError(
"Unrecognized escape code!", T);
1757 if ( UnquotedValue.
size() > 1
1758 && (UnquotedValue[1] ==
'\r' || UnquotedValue[1] ==
'\n'))
1759 UnquotedValue = UnquotedValue.
substr(1);
1816 if (UnquotedValue.
size() < 3)
1819 unsigned int UnicodeScalarValue;
1822 UnicodeScalarValue = 0xFFFD;
1824 UnquotedValue = UnquotedValue.
substr(2);
1828 if (UnquotedValue.
size() < 5)
1831 unsigned int UnicodeScalarValue;
1834 UnicodeScalarValue = 0xFFFD;
1836 UnquotedValue = UnquotedValue.
substr(4);
1840 if (UnquotedValue.
size() < 9)
1843 unsigned int UnicodeScalarValue;
1846 UnicodeScalarValue = 0xFFFD;
1848 UnquotedValue = UnquotedValue.
substr(8);
1852 UnquotedValue = UnquotedValue.
substr(1);
1864 Token &t = peekNext();
1865 if ( t.
Kind == Token::TK_BlockEnd
1866 || t.
Kind == Token::TK_Value
1867 || t.
Kind == Token::TK_Error) {
1868 return Key =
new (getAllocator())
NullNode(Doc);
1870 if (t.
Kind == Token::TK_Key)
1875 Token &t = peekNext();
1876 if (t.
Kind == Token::TK_BlockEnd || t.
Kind == Token::TK_Value) {
1877 return Key =
new (getAllocator())
NullNode(Doc);
1881 return Key = parseBlockNode();
1893 Token &t = peekNext();
1894 if ( t.
Kind == Token::TK_BlockEnd
1895 || t.
Kind == Token::TK_FlowMappingEnd
1896 || t.
Kind == Token::TK_Key
1897 || t.
Kind == Token::TK_FlowEntry
1898 || t.
Kind == Token::TK_Error) {
1902 if (t.
Kind != Token::TK_Value) {
1903 setError(
"Unexpected token in Key Value.", t);
1910 Token &t = peekNext();
1911 if (t.
Kind == Token::TK_BlockEnd || t.
Kind == Token::TK_Key) {
1916 return Value = parseBlockNode();
1919 void MappingNode::increment() {
1926 CurrentEntry->skip();
1927 if (
Type == MT_Inline) {
1933 Token T = peekNext();
1934 if (T.
Kind == Token::TK_Key || T.
Kind == Token::TK_Scalar) {
1937 }
else if (
Type == MT_Block) {
1939 case Token::TK_BlockEnd:
1945 setError(
"Unexpected token. Expected Key or Block End", T);
1946 case Token::TK_Error:
1952 case Token::TK_FlowEntry:
1956 case Token::TK_FlowMappingEnd:
1958 case Token::TK_Error:
1964 setError(
"Unexpected token. Expected Key, Flow Entry, or Flow "
1973 void SequenceNode::increment() {
1980 CurrentEntry->skip();
1981 Token T = peekNext();
1982 if (SeqType == ST_Block) {
1984 case Token::TK_BlockEntry:
1986 CurrentEntry = parseBlockNode();
1987 if (CurrentEntry == 0) {
1992 case Token::TK_BlockEnd:
1998 setError(
"Unexpected token. Expected Block Entry or Block End."
2000 case Token::TK_Error:
2004 }
else if (SeqType == ST_Indentless) {
2006 case Token::TK_BlockEntry:
2008 CurrentEntry = parseBlockNode();
2009 if (CurrentEntry == 0) {
2015 case Token::TK_Error:
2019 }
else if (SeqType == ST_Flow) {
2021 case Token::TK_FlowEntry:
2024 WasPreviousTokenFlowEntry =
true;
2026 case Token::TK_FlowSequenceEnd:
2028 case Token::TK_Error:
2033 case Token::TK_StreamEnd:
2034 case Token::TK_DocumentEnd:
2035 case Token::TK_DocumentStart:
2036 setError(
"Could not find closing ]!", T);
2042 if (!WasPreviousTokenFlowEntry) {
2043 setError(
"Expected , between entries!", T);
2049 CurrentEntry = parseBlockNode();
2050 if (!CurrentEntry) {
2053 WasPreviousTokenFlowEntry =
false;
2059 Document::Document(
Stream &S) : stream(S), Root(0) {
2062 TagMap[
"!!"] =
"tag:yaml.org,2002:";
2064 if (parseDirectives())
2066 Token &T = peekNext();
2072 if (stream.scanner->failed())
2077 Token &T = peekNext();
2087 Token &Document::peekNext() {
2088 return stream.scanner->peekNext();
2091 Token Document::getNext() {
2092 return stream.scanner->getNext();
2095 void Document::setError(
const Twine &Message,
Token &Location)
const {
2096 stream.scanner->setError(Message, Location.
Range.
begin());
2099 bool Document::failed()
const {
2100 return stream.scanner->failed();
2104 Token T = peekNext();
2115 setError(
"Already encountered an anchor for this node!", T);
2118 AnchorInfo = getNext();
2120 goto parse_property;
2123 setError(
"Already encountered a tag for this node!", T);
2126 TagInfo = getNext();
2128 goto parse_property;
2138 return new (NodeAllocator)
SequenceNode( stream.CurrentDoc
2144 return new (NodeAllocator)
2151 return new (NodeAllocator)
2158 return new (NodeAllocator)
2165 return new (NodeAllocator)
2172 return new (NodeAllocator)
2179 return new (NodeAllocator)
2190 return new (NodeAllocator)
NullNode(stream.CurrentDoc);
2198 bool Document::parseDirectives() {
2199 bool isDirective =
false;
2201 Token T = peekNext();
2203 parseTAGDirective();
2206 parseYAMLDirective();
2214 void Document::parseYAMLDirective() {
2218 void Document::parseTAGDirective() {
2219 Token Tag = getNext();
2226 TagMap[TagHandle] = TagPrefix;
2229 bool Document::expectToken(
int TK) {
2230 Token T = getNext();
2232 setError(
"Unexpected token", T);
static MemoryBuffer * getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
static void destroySentinel(Token *)
Node(unsigned int Type, OwningPtr< Document > &, StringRef Anchor, StringRef Tag)
void push_back(const T &Elt)
OwningPtr< Document > & Doc
std::string getVerbatimTag() const
Get the verbatium tag for a given Node. This performs tag resoluton and substitution.
iplist< Token >::iterator iterator
size_t size() const
size - Get the string size.
const char * getBufferStart() const
Not a valid Unicode encoding.
bool scanTokens(StringRef Input)
Scans all tokens in input without outputting anything. This is used for benchmarking the tokenizer...
size_t find(char C, size_t From=0) const
StringRef getRawTag() const
Get the tag as it was written in the document. This does not perform tag resolution.
Represents a YAML sequence created from either a block sequence for a flow sequence.
static LLVM_ATTRIBUTE_NOINLINE bool wasEscaped(StringRef::iterator First, StringRef::iterator Position)
StringRef substr(size_t Start, size_t N=npos) const
iterator insert(iterator I, const T &Elt)
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
std::pair< uint32_t, unsigned > UTF8Decoded
The Unicode scalar value of a UTF-8 minimal well-formed code unit subsequence and the subsequence's l...
document_iterator begin()
Represents an alias to a Node with an anchor.
void skip(CollectionType &C)
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(const char *reason, bool gen_crash_diag=true)
void setError(const Twine &Message, Token &Location) const
static std::string utohexstr(uint64_t X)
static bool is_ns_hex_digit(const char C)
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
bool failed()
Returns true if an error occurred while parsing.
#define llvm_unreachable(msg)
Token * ensureHead(Token *) const
static EncodingInfo getUnicodeEncoding(StringRef Input)
void addNodeToList(Token *)
static void noteHead(Token *, Token *)
A key and value pair. While not technically a Node under the YAML representation graph, it is easier to treat them this way.
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Node * getRoot()
Parse and return the root level node.
bool dumpTokens(StringRef Input, raw_ostream &)
Dump all the tokens in this stream to OS.
Token * createSentinel() const
void printError(Node *N, const Twine &Msg)
Stream(StringRef Input, SourceMgr &)
This keeps a reference to the string referenced by Input.
iterator insert(iterator where, const NodeTy &val)
Token * createNode(const Token &V)
std::string escape(StringRef Input)
Escape Input for a double quoted scalar.
void printError(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Message, ArrayRef< SMRange > Ranges=None)
Token getNext()
Parse the next token and pop it from the queue.
enable_if_c< std::numeric_limits< T >::is_signed, bool >::type getAsInteger(unsigned Radix, T &Result) const
iterator erase(iterator I)
Scanner(const StringRef Input, SourceMgr &SM)
A scalar node is an opaque datum that can be presented as a series of zero or more Unicode scalar val...
void setError(const Twine &Message, StringRef::iterator Position)
Token * provideInitialHead() const
void removeNodeFromList(Token *)
size_t find_last_of(char C, size_t From=npos) const
void transferNodesFromList(ilist_node_traits &, ilist_iterator< Token >, ilist_iterator< Token >)
bool startswith(StringRef Prefix) const
Check if this string starts with the given Prefix.
bool skip()
Finish parsing the current document and return true if there are more. Return false otherwise...
enum llvm::yaml::Token::TokenKind Kind
Token & peekNext()
Parse the next token and return it without popping it.
This class represents a YAML stream potentially containing multiple documents.
#define LLVM_ATTRIBUTE_NOINLINE
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
ilist< Token > TokenQueueT
static void encodeUTF8(uint32_t UnicodeScalarValue, SmallVectorImpl< char > &Result)
encodeUTF8 - Encode UnicodeScalarValue in UTF-8 and append it to result.
void setError(const Twine &Message)
static UTF8Decoded decodeUTF8(StringRef Range)
static SMLoc getFromPointer(const char *Ptr)
SMRange getSourceRange() const
Token - A single YAML token.
Represents a YAML map created from either a block map for a flow map.
static void deleteNode(Token *V)
size_t find_first_of(char C, size_t From=0) const
const char * getBufferEnd() const
Scans YAML tokens from a MemoryBuffer.
std::pair< UnicodeEncodingForm, unsigned > EncodingInfo
Iterator abstraction for Documents over a Stream.
LLVM Value Representation.
bool operator==(uint64_t V1, const APInt &V2)
void PrintMessage(raw_ostream &OS, SMLoc Loc, DiagKind Kind, const Twine &Msg, ArrayRef< SMRange > Ranges=None, ArrayRef< SMFixIt > FixIts=None, bool ShowColors=true) const
Represents a location in source code.
size_t AddNewSourceBuffer(MemoryBuffer *F, SMLoc IncludeLoc)
An inline mapping node is used for "[key: value]".
Node * parseBlockNode()
Root for parsing a node. Returns a single node.
StringRef ltrim(StringRef Chars=" \t\n\v\f\r") const
void push_back(const NodeTy &val)
bool empty() const
empty - Check if the string is empty.
Abstract base class for all Nodes.
static bool is_ns_word_char(const char C)