LLVM API Documentation

 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringMatcher.cpp
Go to the documentation of this file.
1 //===- StringMatcher.cpp - Generate a matcher for input strings -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the StringMatcher class.
11 //
12 //===----------------------------------------------------------------------===//
13 
16 #include <map>
17 using namespace llvm;
18 
19 /// FindFirstNonCommonLetter - Find the first character in the keys of the
20 /// string pairs that is not shared across the whole set of strings. All
21 /// strings are assumed to have the same length.
22 static unsigned
23 FindFirstNonCommonLetter(const std::vector<const
24  StringMatcher::StringPair*> &Matches) {
25  assert(!Matches.empty());
26  for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) {
27  // Check to see if letter i is the same across the set.
28  char Letter = Matches[0]->first[i];
29 
30  for (unsigned str = 0, e = Matches.size(); str != e; ++str)
31  if (Matches[str]->first[i] != Letter)
32  return i;
33  }
34 
35  return Matches[0]->first.size();
36 }
37 
38 /// EmitStringMatcherForChar - Given a set of strings that are known to be the
39 /// same length and whose characters leading up to CharNo are the same, emit
40 /// code to verify that CharNo and later are the same.
41 ///
42 /// \return - True if control can leave the emitted code fragment.
43 bool StringMatcher::
44 EmitStringMatcherForChar(const std::vector<const StringPair*> &Matches,
45  unsigned CharNo, unsigned IndentCount) const {
46  assert(!Matches.empty() && "Must have at least one string to match!");
47  std::string Indent(IndentCount*2+4, ' ');
48 
49  // If we have verified that the entire string matches, we're done: output the
50  // matching code.
51  if (CharNo == Matches[0]->first.size()) {
52  assert(Matches.size() == 1 && "Had duplicate keys to match on");
53 
54  // If the to-execute code has \n's in it, indent each subsequent line.
55  StringRef Code = Matches[0]->second;
56 
57  std::pair<StringRef, StringRef> Split = Code.split('\n');
58  OS << Indent << Split.first << "\t // \"" << Matches[0]->first << "\"\n";
59 
60  Code = Split.second;
61  while (!Code.empty()) {
62  Split = Code.split('\n');
63  OS << Indent << Split.first << "\n";
64  Code = Split.second;
65  }
66  return false;
67  }
68 
69  // Bucket the matches by the character we are comparing.
70  std::map<char, std::vector<const StringPair*> > MatchesByLetter;
71 
72  for (unsigned i = 0, e = Matches.size(); i != e; ++i)
73  MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
74 
75 
76  // If we have exactly one bucket to match, see how many characters are common
77  // across the whole set and match all of them at once.
78  if (MatchesByLetter.size() == 1) {
79  unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches);
80  unsigned NumChars = FirstNonCommonLetter-CharNo;
81 
82  // Emit code to break out if the prefix doesn't match.
83  if (NumChars == 1) {
84  // Do the comparison with if (Str[1] != 'f')
85  // FIXME: Need to escape general characters.
86  OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] != '"
87  << Matches[0]->first[CharNo] << "')\n";
88  OS << Indent << " break;\n";
89  } else {
90  // Do the comparison with if memcmp(Str.data()+1, "foo", 3).
91  // FIXME: Need to escape general strings.
92  OS << Indent << "if (memcmp(" << StrVariableName << ".data()+" << CharNo
93  << ", \"" << Matches[0]->first.substr(CharNo, NumChars) << "\", "
94  << NumChars << "))\n";
95  OS << Indent << " break;\n";
96  }
97 
98  return EmitStringMatcherForChar(Matches, FirstNonCommonLetter, IndentCount);
99  }
100 
101  // Otherwise, we have multiple possible things, emit a switch on the
102  // character.
103  OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
104  OS << Indent << "default: break;\n";
105 
106  for (std::map<char, std::vector<const StringPair*> >::iterator LI =
107  MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) {
108  // TODO: escape hard stuff (like \n) if we ever care about it.
109  OS << Indent << "case '" << LI->first << "':\t // "
110  << LI->second.size() << " string";
111  if (LI->second.size() != 1) OS << 's';
112  OS << " to match.\n";
113  if (EmitStringMatcherForChar(LI->second, CharNo+1, IndentCount+1))
114  OS << Indent << " break;\n";
115  }
116 
117  OS << Indent << "}\n";
118  return true;
119 }
120 
121 
122 /// Emit - Top level entry point.
123 ///
124 void StringMatcher::Emit(unsigned Indent) const {
125  // If nothing to match, just fall through.
126  if (Matches.empty()) return;
127 
128  // First level categorization: group strings by length.
129  std::map<unsigned, std::vector<const StringPair*> > MatchesByLength;
130 
131  for (unsigned i = 0, e = Matches.size(); i != e; ++i)
132  MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
133 
134  // Output a switch statement on length and categorize the elements within each
135  // bin.
136  OS.indent(Indent*2+2) << "switch (" << StrVariableName << ".size()) {\n";
137  OS.indent(Indent*2+2) << "default: break;\n";
138 
139  for (std::map<unsigned, std::vector<const StringPair*> >::iterator LI =
140  MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) {
141  OS.indent(Indent*2+2) << "case " << LI->first << ":\t // "
142  << LI->second.size()
143  << " string" << (LI->second.size() == 1 ? "" : "s") << " to match.\n";
144  if (EmitStringMatcherForChar(LI->second, 0, Indent))
145  OS.indent(Indent*2+4) << "break;\n";
146  }
147 
148  OS.indent(Indent*2+2) << "}\n";
149 }
std::pair< std::string, std::string > StringPair
Definition: StringMatcher.h:33
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
StringRef substr(size_t Start, size_t N=npos) const
Definition: StringRef.h:392
LoopInfoBase< BlockT, LoopT > * LI
Definition: LoopInfoImpl.h:411
static void Split(std::vector< std::string > &V, const StringRef S)
static unsigned FindFirstNonCommonLetter(const std::vector< const StringMatcher::StringPair * > &Matches)
void Emit(unsigned Indent=0) const