topical media & game development
mobile-query-three-plugins-ogsworkshop-slides-prettify.js / js
// Copyright (C) 2006 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
@fileoverview
some functions for browser-side pretty printing of code contained in html.
<p>
For a fairly comprehensive set of languages see the
<a href="http://google-code-prettify.googlecode.com/svn/trunk/README.html#langs">README</a>
file that came with this source. At a minimum, the lexer should work on a
number of languages including C and friends, Java, Python, Bash, SQL, HTML,
XML, CSS, Javascript, and Makefiles. It works passably on Ruby, PHP and Awk
and a subset of Perl, but, because of commenting conventions, doesn't work on
Smalltalk, Lisp-like, or CAML-like languages without an explicit lang class.
<p>
Usage: <ol>
<li> include this source file in an html page via
{applet.class <script type="text/javascript" src="/path/to/prettify.js"></script>}
<li> define style rules. See the example page for examples.
<li> mark the {applet.class <pre>} and {applet.class <code>} tags in your source with
{applet.class class=prettyprint.}
You can also use the (html deprecated) {applet.class <xmp>} tag, but the pretty
printer needs to do more substantial DOM manipulations to support that, so
some css styles may not be preserved.
</ol>
That's it. I wanted to keep the API as simple as possible, so there's no
need to specify which language the code is in, but if you wish, you can add
another class to the {applet.class <pre>} or {applet.class <code>} element to specify the
language, as in {applet.class <pre class="prettyprint lang-java">}. Any class that
starts with "lang-" followed by a file extension, specifies the file type.
See the "lang-*.js" files in this directory for code that implements
per-language file handlers.
<p>
Change log:<br>
cbeust, 2006/08/22
<blockquote>
Java annotations (start with "@") are now captured as literals ("lit")
</blockquote>
@requires console
// JSLint declarations
/*global console, document, navigator, setTimeout, window */
Split {applet.class prettyPrint} into multiple timeouts so as not to interfere with
UI events.
If set to {applet.class false}, {applet.class prettyPrint()} is synchronous.
window['PR_SHOULD_USE_CONTINUATION'] = true;
the number of characters between tab columns
window['PR_TAB_WIDTH'] = 8;
Contains functions for creating and registering new language handlers.
@type {Object}
window['PR']
Pretty print a chunk of code.
parameter: {string} sourceCodeHtml code as html
returns: {string} code as html, but prettier
= window['prettyPrintOne']
Find all the {applet.class <pre>} and {applet.class <code>} tags in the DOM with
{applet.class class=prettyprint} and prettify them.
parameter: {Function?} opt_whenDone if specified, called when the last entry
has been finished.
= window['prettyPrint'] = void 0;
(function () {
// Keyword lists for various languages.
var FLOW_CONTROL_KEYWORDS =
"break continue do else for if return while ";
var C_KEYWORDS = FLOW_CONTROL_KEYWORDS + "auto case char const default " +
"double enum extern float goto int long register short signed sizeof " +
"static struct switch typedef union unsigned void volatile ";
var COMMON_KEYWORDS = C_KEYWORDS + "catch class delete false import " +
"new operator private protected public this throw true try typeof ";
var CPP_KEYWORDS = COMMON_KEYWORDS + "alignof align_union asm axiom bool " +
"concept concept_map const_cast constexpr decltype " +
"dynamic_cast explicit export friend inline late_check " +
"mutable namespace nullptr reinterpret_cast static_assert static_cast " +
"template typeid typename using virtual wchar_t where ";
var JAVA_KEYWORDS = COMMON_KEYWORDS +
"abstract boolean byte extends final finally implements import " +
"instanceof null native package strictfp super synchronized throws " +
"transient ";
var CSHARP_KEYWORDS = JAVA_KEYWORDS +
"as base by checked decimal delegate descending dynamic event " +
"fixed foreach from group implicit in interface internal into is lock " +
"object out override orderby params partial readonly ref sbyte sealed " +
"stackalloc string select uint ulong unchecked unsafe ushort var ";
var COFFEE_KEYWORDS = "all and by catch class else extends false finally " +
"for if in is isnt loop new no not null of off on or return super then " +
"true try unless until when while yes ";
var JSCRIPT_KEYWORDS = COMMON_KEYWORDS +
"debugger eval export function get null set undefined var with " +
"Infinity NaN ";
var PERL_KEYWORDS = "caller delete die do dump elsif eval exit foreach for " +
"goto if import last local my next no our print package redo require " +
"sub undef unless until use wantarray while BEGIN END ";
var PYTHON_KEYWORDS = FLOW_CONTROL_KEYWORDS + "and as assert class def del " +
"elif except exec finally from global import in is lambda " +
"nonlocal not or pass print raise try with yield " +
"False True None ";
var RUBY_KEYWORDS = FLOW_CONTROL_KEYWORDS + "alias and begin case class def" +
" defined elsif end ensure false in module next nil not or redo rescue " +
"retry self super then true undef unless until when yield BEGIN END ";
var SH_KEYWORDS = FLOW_CONTROL_KEYWORDS + "case done elif esac eval fi " +
"function in local set then until ";
var ALL_KEYWORDS = (
CPP_KEYWORDS + CSHARP_KEYWORDS + JSCRIPT_KEYWORDS + PERL_KEYWORDS +
PYTHON_KEYWORDS + RUBY_KEYWORDS + SH_KEYWORDS);
// token style names. correspond to css classes
token style for a string literal
var PR_STRING = 'str';
token style for a keyword
var PR_KEYWORD = 'kwd';
token style for a comment
var PR_COMMENT = 'com';
token style for a type
var PR_TYPE = 'typ';
token style for a literal value. e.g. 1, null, true.
var PR_LITERAL = 'lit';
token style for a punctuation string.
var PR_PUNCTUATION = 'pun';
token style for a punctuation string.
var PR_PLAIN = 'pln';
token style for an sgml tag.
var PR_TAG = 'tag';
token style for a markup declaration such as a DOCTYPE.
var PR_DECLARATION = 'dec';
token style for embedded source.
var PR_SOURCE = 'src';
token style for an sgml attribute name.
var PR_ATTRIB_NAME = 'atn';
token style for an sgml attribute value.
var PR_ATTRIB_VALUE = 'atv';
A class that indicates a section of markup that is not code, e.g. to allow
embedding of line numbers within code listings.
var PR_NOCODE = 'nocode';
A set of tokens that can precede a regular expression literal in
javascript.
http://www.mozilla.org/js/language/js20/rationale/syntax.html has the full
list, but I've removed ones that might be problematic when seen in
languages that don't support regular expression literals.
<p>Specifically, I've removed any keywords that can't precede a regexp
literal in a syntactically legal javascript program, and I've removed the
"in" keyword since it's not a keyword in many languages, and might be used
as a count of inches.
<p>The link a above does not accurately describe EcmaScript rules since
it fails to distinguish between (a=++/b/i) and (a++/b/i) but it works
very well in practice.
@private
var REGEXP_PRECEDER_PATTERN = function () {
var preceders = [
"!", "!=", "!==", "#", "%", "%=", "&", "&&", "&&=",
"&=", "(", "*", "*=", /* "+", */ "+=", ",", /* "-", */ "-=",
"->", /*".", "..", "...", handled below */ "/", "/=", ":", "::", ";",
"<", "<<", "<<=", "<=", "=", "==", "===", ">",
">=", ">>", ">>=", ">>>", ">>>=", "?", "@", "[",
"^", "^=", "^^", "^^=", "{", "|", "|=", "||",
"||=", "~" /* handles =~ and !~ */,
"break", "case", "continue", "delete",
"do", "else", "finally", "instanceof",
"return", "throw", "try", "typeof"
];
var pattern = '(?:^^|[+-]';
for (var i = 0; i < preceders.length; ++i) {
pattern += '|' + preceders[i].replace(/([^=<>:&a-z])/g, '\$1');
}
pattern += ')\\s*'; // matches at end, and matches empty string
return pattern;
// CAVEAT: this does not properly handle the case where a regular
// expression immediately follows another since a regular expression may
// have flags for case-sensitivity and the like. Having regexp tokens
// adjacent is not valid in any language I'm aware of, so I'm punting.
// TODO: maybe style special characters inside a regexp as punctuation.
}();
Given a group of {gray RegExp}s, returns a {applet.class RegExp} that globally
matches the union of the sets of strings matched by the input RegExp.
Since it matches globally, if the input strings have a start-of-input
anchor (/^.../), it is ignored for the purposes of unioning.
parameter: {Array.} regexs non multiline, non-global regexs.
returns: {RegExp} a global regex.
function combinePrefixPatterns(regexs) {
var capturedGroupIndex = 0;
var needToFoldCase = false;
var ignoreCase = false;
for (var i = 0, n = regexs.length; i < n; ++i) {
var regex = regexs[i];
if (regex.ignoreCase) {
ignoreCase = true;
} else if (/[a-z]/i.test(regex.source.replace(
/\\u[0-9a-f]{4}|\\x[0-9a-f]{2}|\\[^ux]/gi, ''))) {
needToFoldCase = true;
ignoreCase = false;
break;
}
}
function decodeEscape(charsetPart) {
if (charsetPart.charAt(0) !== '\\') { return charsetPart.charCodeAt(0); }
switch (charsetPart.charAt(1)) {
case 'b': return 8;
case 't': return 9;
case 'n': return 0xa;
case 'v': return 0xb;
case 'f': return 0xc;
case 'r': return 0xd;
case 'u': case 'x':
return parseInt(charsetPart.substring(2), 16)
|| charsetPart.charCodeAt(1);
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7':
return parseInt(charsetPart.substring(1), 8);
default: return charsetPart.charCodeAt(1);
}
}
function encodeEscape(charCode) {
if (charCode < 0x20) {
return (charCode < 0x10 ? '\\x0' : '\\x') + charCode.toString(16);
}
var ch = String.fromCharCode(charCode);
if (ch === '\\' || ch === '-' || ch === '[' || ch === ']') {
ch = '\\' + ch;
}
return ch;
}
function caseFoldCharset(charSet) {
var charsetParts = charSet.substring(1, charSet.length - 1).match(
new RegExp(
'\\\\u[0-9A-Fa-f]{4}'
+ '|\\\\x[0-9A-Fa-f]{2}'
+ '|\\\\[0-3][0-7]{0,2}'
+ '|\\\\[0-7]{1,2}'
+ '|\\\\[\\s\\S]'
+ '|-'
+ '|[^-\\\\]',
'g'));
var groups = [];
var ranges = [];
var inverse = charsetParts[0] === '^';
for (var i = inverse ? 1 : 0, n = charsetParts.length; i < n; ++i) {
var p = charsetParts[i];
switch (p) {
case '\\B': case '\\b':
case '\\D': case '\\d':
case '\\S': case '\\s':
case '\\W': case '\\w':
groups.push(p);
continue;
}
var start = decodeEscape(p);
var end;
if (i + 2 < n && '-' === charsetParts[i + 1]) {
end = decodeEscape(charsetParts[i + 2]);
i += 2;
} else {
end = start;
}
ranges.push([start, end]);
// If the range might intersect letters, then expand it.
if (!(end < 65 || start > 122)) {
if (!(end < 65 || start > 90)) {
ranges.push([Math.max(65, start) | 32, Math.min(end, 90) | 32]);
}
if (!(end < 97 || start > 122)) {
ranges.push([Math.max(97, start) & ~32, Math.min(end, 122) & ~32]);
}
}
}
// [[1, 10], [3, 4], [8, 12], [14, 14], [16, 16], [17, 17]]
// -> [[1, 12], [14, 14], [16, 17]]
ranges.sort(function (a, b) { return (a[0] - b[0]) || (b[1] - a[1]); });
var consolidatedRanges = [];
var lastRange = [NaN, NaN];
for (var i = 0; i < ranges.length; ++i) {
var range = ranges[i];
if (range[0] <= lastRange[1] + 1) {
lastRange[1] = Math.max(lastRange[1], range[1]);
} else {
consolidatedRanges.push(lastRange = range);
}
}
var out = ['['];
if (inverse) { out.push('^'); }
out.push.apply(out, groups);
for (var i = 0; i < consolidatedRanges.length; ++i) {
var range = consolidatedRanges[i];
out.push(encodeEscape(range[0]));
if (range[1] > range[0]) {
if (range[1] + 1 > range[0]) { out.push('-'); }
out.push(encodeEscape(range[1]));
}
}
out.push(']');
return out.join('');
}
function allowAnywhereFoldCaseAndRenumberGroups(regex) {
// Split into character sets, escape sequences, punctuation strings
// like ('(', '(?:', ')', '^'), and runs of characters that do not
// include any of the above.
var parts = regex.source.match(
new RegExp(
'(?:'
+ '\\[(?:[^\\x5C\\x5D]|\\\\[\\s\\S])*\\]' // a character set
+ '|\\\\u[A-Fa-f0-9]{4}' // a unicode escape
+ '|\\\\x[A-Fa-f0-9]{2}' // a hex escape
+ '|\\\\[0-9]+' // a back-reference or octal escape
+ '|\\\\[^ux0-9]' // other escape sequence
+ '|\\(\\?[:!=]' // start of a non-capturing group
+ '|[\\(\\)\^]' // start/emd of a group, or line start
+ '|[^\\x5B\\x5C\\(\\)\^]+' // run of other characters
+ ')',
'g'));
var n = parts.length;
// Maps captured group numbers to the number they will occupy in
// the output or to -1 if that has not been determined, or to
// undefined if they need not be capturing in the output.
var capturedGroups = [];
// Walk over and identify back references to build the capturedGroups
// mapping.
for (var i = 0, groupIndex = 0; i < n; ++i) {
var p = parts[i];
if (p === '(') {
// groups are 1-indexed, so max group index is count of '('
++groupIndex;
} else if ('\\' === p.charAt(0)) {
var decimalValue = +p.substring(1);
if (decimalValue && decimalValue <= groupIndex) {
capturedGroups[decimalValue] = -1;
}
}
}
// Renumber groups and reduce capturing groups to non-capturing groups
// where possible.
for (var i = 1; i < capturedGroups.length; ++i) {
if (-1 === capturedGroups[i]) {
capturedGroups[i] = ++capturedGroupIndex;
}
}
for (var i = 0, groupIndex = 0; i < n; ++i) {
var p = parts[i];
if (p === '(') {
++groupIndex;
if (capturedGroups[groupIndex] === undefined) {
parts[i] = '(?:';
}
} else if ('\\' === p.charAt(0)) {
var decimalValue = +p.substring(1);
if (decimalValue && decimalValue <= groupIndex) {
parts[i] = '\\' + capturedGroups[groupIndex];
}
}
}
// Remove any prefix anchors so that the output will match anywhere.
// ^^ really does mean an anchored match though.
for (var i = 0, groupIndex = 0; i < n; ++i) {
if ('^' === parts[i] && '^' !== parts[i + 1]) { parts[i] = ''; }
}
// Expand letters to groups to handle mixing of case-sensitive and
// case-insensitive patterns if necessary.
if (regex.ignoreCase && needToFoldCase) {
for (var i = 0; i < n; ++i) {
var p = parts[i];
var ch0 = p.charAt(0);
if (p.length >= 2 && ch0 === '[') {
parts[i] = caseFoldCharset(p);
} else if (ch0 !== '\\') {
// TODO: handle letters in numeric escapes.
parts[i] = p.replace(
/[a-zA-Z]/g,
function (ch) {
var cc = ch.charCodeAt(0);
return '[' + String.fromCharCode(cc & ~32, cc | 32) + ']';
});
}
}
}
return parts.join('');
}
var rewritten = [];
for (var i = 0, n = regexs.length; i < n; ++i) {
var regex = regexs[i];
if (regex.global || regex.multiline) { throw new Error('' + regex); }
rewritten.push(
'(?:' + allowAnywhereFoldCaseAndRenumberGroups(regex) + ')');
}
return new RegExp(rewritten.join('|'), ignoreCase ? 'gi' : 'g');
}
Split markup into a string of source code and an array mapping ranges in
that string to the text nodes in which they appear.
<p>
The HTML DOM structure:</p>
<pre>
(Element "p"
(Element "b"
(Text "print ")) ; #1
(Text "'Hello '") ; #2
(Element "br") ; #3
(Text " + 'World';")) ; #4
</pre>
<p>
corresponds to the HTML
{applet.class <p><b>print </b>'Hello '<br> + 'World';</p>}.</p>
<p>
It will produce the output:</p>
<pre>
{
source: "print 'Hello '\n + 'World';",
// 1 2
// 012345678901234 5678901234567
spans: [0, #1, 6, #2, 14, #3, 15, #4]
}
</pre>
<p>
where #1 is a reference to the {applet.class "print "} text node above, and so
on for the other text nodes.
</p>
<p>
The {applet.class} spans array is an array of pairs. Even elements are the start
indices of substrings, and odd elements are the text nodes (or BR elements)
that contain the text for those substrings.
Substrings continue until the next index or the end of the source.
</p>
parameter: {Node} node an HTML DOM subtree containing source-code.
returns: {Object} source code and the text nodes in which they occur.
function extractSourceSpans(node) {
var nocode = /(?:^|\s)nocode(?:\s|/, ''),
spans: spans
};
}
Apply the given language handler to sourceCode and add the resulting
decorations to out.
parameter: {number} basePos the index of sourceCode within the chunk of source
whose decorations are already present on out.
function appendDecorations(basePos, sourceCode, langHandler, out) {
if (!sourceCode) { return; }
var job = {
source: sourceCode,
basePos: basePos
};
langHandler(job);
out.push.apply(out, job.decorations);
}
Given triples of [style, pattern, context] returns a lexing function,
The lexing function interprets the patterns to find token boundaries and
returns a decoration list of the form
[index_0, style_0, index_1, style_1, ..., index_n, style_n]
where index_n is an index into the sourceCode, and style_n is a style
constant like PR_PLAIN. index_n-1 <= index_n, and style_n-1 applies to
all characters in sourceCode[index_n-1:index_n].
The stylePatterns is a list whose elements have the form
[style : string, pattern : RegExp, DEPRECATED, shortcut : string].
Style is a style constant like PR_PLAIN, or can be a string of the
form 'lang-FOO', where FOO is a language extension describing the
language of the portion of the token in $1 after pattern executes.
E.g., if style is 'lang-lisp', and group 1 contains the text
'(hello (world))', then that portion of the token will be passed to the
registered lisp handler for formatting.
The text before and after group 1 will be restyled using this decorator
so decorators should take care that this doesn't result in infinite
recursion. For example, the HTML lexer rule for SCRIPT elements looks
something like ['lang-js', /<[s]cript>(.+?)<\/script>/]. This may match
'<script>foo()<\/script>', which would cause the current decorator to
be called with '<script>' which would not match the same rule since
group 1 must not be empty, so it would be instead styled as PR_TAG by
the generic tag rule. The handler registered for the 'js' extension would
then be called with 'foo()', and finally, the current decorator would
be called with '<\/script>' which would not match the original rule and
so the generic tag rule would identify it as a tag.
Pattern must only match prefixes, and if it matches a prefix, then that
match is considered a token with the same style.
Context is applied to the last non-whitespace, non-comment token
recognized.
Shortcut is an optional string of characters, any of which, if the first
character, gurantee that this pattern and only this pattern matches.
parameter: {Array} shortcutStylePatterns patterns that always start with
a known character. Must have a shortcut string.
parameter: {Array} fallthroughStylePatterns patterns that will be tried in
order if the shortcut ones fail. May have shortcuts.
returns: {function (Object)} a
function that takes source code and returns a list of decorations.
function createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns) {
var shortcuts = {};
var tokenizer;
(function () {
var allPatterns = shortcutStylePatterns.concat(fallthroughStylePatterns);
var allRegexs = [];
var regexKeys = {};
for (var i = 0, n = allPatterns.length; i < n; ++i) {
var patternParts = allPatterns[i];
var shortcutChars = patternParts[3];
if (shortcutChars) {
for (var c = shortcutChars.length; --c >= 0;) {
shortcuts[shortcutChars.charAt(c)] = patternParts;
}
}
var regex = patternParts[1];
var k = '' + regex;
if (!regexKeys.hasOwnProperty(k)) {
allRegexs.push(regex);
regexKeys[k] = null;
}
}
allRegexs.push(/[\0-\uffff]/);
tokenizer = combinePrefixPatterns(allRegexs);
})();
var nPatterns = fallthroughStylePatterns.length;
var notWs = /\S/;
Lexes job.source and produces an output array job.decorations of style
classes preceded by the position at which they start in job.source in
order.
parameter: {Object} job an object like {applet.class
source: {string} sourceText plain text,
basePos: {int} position of job.source in the larger chunk of
sourceCode.
}
var decorate = function (job) {
var sourceCode = job.source, basePos = job.basePos;
Even entries are positions in source in ascending order. Odd enties
are style markers (e.g., PR_COMMENT) that run from that position until
the end.
@type {Array.<number|string>}
var decorations = [basePos, PR_PLAIN];
var pos = 0; // index into sourceCode
var tokens = sourceCode.match(tokenizer) || [];
var styleCache = {};
for (var ti = 0, nTokens = tokens.length; ti < nTokens; ++ti) {
var token = tokens[ti];
var style = styleCache[token];
var match = void 0;
var isEmbedded;
if (typeof style === 'string') {
isEmbedded = false;
} else {
var patternParts = shortcuts[token.charAt(0)];
if (patternParts) {
match = token.match(patternParts[1]);
style = patternParts[0];
} else {
for (var i = 0; i < nPatterns; ++i) {
patternParts = fallthroughStylePatterns[i];
match = token.match(patternParts[1]);
if (match) {
style = patternParts[0];
break;
}
}
if (!match) { // make sure that we make progress
style = PR_PLAIN;
}
}
isEmbedded = style.length >= 5 && 'lang-' === style.substring(0, 5);
if (isEmbedded && !(match && typeof match[1] === 'string')) {
isEmbedded = false;
style = PR_SOURCE;
}
if (!isEmbedded) { styleCache[token] = style; }
}
var tokenStart = pos;
pos += token.length;
if (!isEmbedded) {
decorations.push(basePos + tokenStart, style);
} else { // Treat group 1 as an embedded block of source code.
var embeddedSource = match[1];
var embeddedSourceStart = token.indexOf(embeddedSource);
var embeddedSourceEnd = embeddedSourceStart + embeddedSource.length;
if (match[2]) {
// If embeddedSource can be blank, then it would match at the
// beginning which would cause us to infinitely recurse on the
// entire token, so we catch the right context in match[2].
embeddedSourceEnd = token.length - match[2].length;
embeddedSourceStart = embeddedSourceEnd - embeddedSource.length;
}
var lang = style.substring(5);
// Decorate the left of the embedded source
appendDecorations(
basePos + tokenStart,
token.substring(0, embeddedSourceStart),
decorate, decorations);
// Decorate the embedded source
appendDecorations(
basePos + tokenStart + embeddedSourceStart,
embeddedSource,
langHandlerForExtension(lang, embeddedSource),
decorations);
// Decorate the right of the embedded section
appendDecorations(
basePos + tokenStart + embeddedSourceEnd,
token.substring(embeddedSourceEnd),
decorate, decorations);
}
}
job.decorations = decorations;
};
return decorate;
}
returns a function that produces a list of decorations from source text.
This code treats ", ', and ` as string delimiters, and \ as a string
escape. It does not recognize perl's qq() style strings.
It has no special handling for double delimiter escapes as in basic, or
the tripled delimiters used in python, but should work on those regardless
although in those cases a single string literal may be broken up into
multiple adjacent string literals.
It recognizes C, C++, and shell style comments.
parameter: {Object} options a set of optional parameters.
returns: {function (Object)} a function that examines the source code
in the input job and builds the decoration list.
function sourceDecorator(options) {
var shortcutStylePatterns = [], fallthroughStylePatterns = [];
if (options['tripleQuotedStrings']) {
// '''multi-line-string''', 'single-line-string', and double-quoted
shortcutStylePatterns.push(
[PR_STRING, /^(?:\'\'\'(?:[^\'\\]|\\[\s\S]|\'{1,2}(?=[^\']))*(?:\'\'\'|)|\'(?:[^\\\']|\\[\s\S])*(?:\'|))/,
null, '\'"']);
} else if (options['multiLineStrings']) {
// 'multi-line-string', "multi-line-string"
shortcutStylePatterns.push(
[PR_STRING, /^(?:\'(?:[^\\\']|\\[\s\S])*(?:\'|)|\`(?:[^\\\`]|\\[\s\S])*(?:\`|)|\"(?:[^\\\"\r\n]|\\.)*(?:\"|)/, null]);
}
var hc = options['hashComments'];
if (hc) {
if (options['cStyleComments']) {
if (hc > 1) { // multiline hash comments
shortcutStylePatterns.push(
[PR_COMMENT, /^#(?:##(?:[^#]|#(?!##))*(?: |
} else {
// Stop C preprocessor declarations at an unclosed open comment
shortcutStylePatterns.push(
[PR_COMMENT, /^#(?:(?:define|elif|else|endif|error|ifdef|include|ifndef|line|pragma|undef|warning)\b|[^\r\n]*)/,
null, '#']);
}
fallthroughStylePatterns.push(
[PR_STRING,
/^<(?:(?:(?:\.\.\/)*|\/?)(?:[\w-]+(?:\/[\w-]+)+)?[\w-]+\.h|[a-z]\w*)>/,
null]);
} else {
shortcutStylePatterns.push([PR_COMMENT, /^#[^\r\n]*/, null, '#']);
}
}
if (options['cStyleComments']) {
fallthroughStylePatterns.push([PR_COMMENT, /^\/\/[^\r\n]*/, null]);
fallthroughStylePatterns.push(
[PR_COMMENT, /^\/\*[\s\S]*?(?:\*\/|)/, null]);
}
if (options['regexLiterals']) {
var REGEX_LITERAL = (
// A regular expression literal starts with a slash that is
// not followed by * or / so that it is not confused with
// comments.
'/(?=[^/*])'
// and then contains any number of raw characters,
+ '(?:[^/\\x5B\\x5C]'
// escape sequences (\x5C),
+ '|\\x5C[\\s\\S]'
// or non-nesting character sets (\x5B\x5D);
+ '|\\x5B(?:[^\\x5C\\x5D]|\\x5C[\\s\\S])*(?:\\x5D|/g, '');
if (keywords.length) {
fallthroughStylePatterns.push(
[PR_KEYWORD,
new RegExp('^(?:' + keywords.replace(/\s+/g, '|') + ')\\b'), null]);
}
shortcutStylePatterns.push([PR_PLAIN, /^\s+/, null, ' \r\n\t\xA0']);
fallthroughStylePatterns.push(
// TODO(mikesamuel): recognize non-latin letters and numerals in idents
[PR_LITERAL, /^@[a-z_@0-9]*/i, null],
[PR_TYPE, /^@?[A-Z]+[a-z][A-Za-z_][a-z_@\'\"\`\/#\\]*/, null]);
return createSimpleLexer(shortcutStylePatterns, fallthroughStylePatterns);
}
var decorateSource = sourceDecorator({
'keywords': ALL_KEYWORDS,
'hashComments': true,
'cStyleComments': true,
'multiLineStrings': true,
'regexLiterals': true
});
Given a DOM subtree, wraps it in a list, and puts each line into its own
list item.
parameter: {Node} node modified in place. Its content is pulled into an
HTMLOListElement, and each line is moved into a separate list item.
This requires cloning elements, so the input might not have unique
IDs after numbering.
function numberLines(node, opt_startLineNum) {
var nocode = /(?:^|\s)nocode(?:\s|
Breaks {applet.class job.source} around style boundaries in {applet.class job.decorations}
and modifies {applet.class job.sourceNode} in place.
parameter: {Object} job like {
source: {string} source as plain text,
spans: {Array.<number|Node>} alternating span start indices into source
and the text node or element (e.g. {applet.class <BR>}) corresponding to that
span.
decorations: {Array.<number|string} an array of style classes preceded
by the position at which they start in job.source in order
}</pre>
@private
function recombineTagsAndDecorations(job) {
var isIE = /\bMSIE\b/.test(navigator.userAgent);
var newlineRe = /\n/g;
var source = job.source;
var sourceLength = source.length;
// Index into source after the last code-unit recombined.
var sourceIndex = 0;
var spans = job.spans;
var nSpans = spans.length;
// Index into spans after the last span which ends at or before sourceIndex.
var spanIndex = 0;
var decorations = job.decorations;
var nDecorations = decorations.length;
// Index into decorations after the last decoration which ends at or before sourceIndex.
var decorationIndex = 0;
// Simplify decorations.
var decPos = 0;
for (var i = 0; i < nDecorations;) {
// Skip over any zero-length decorations.
var startPos = decorations[i];
var start = i;
while (start + 2 < nDecorations && decorations[start + 2] === startPos) {
start += 2;
}
// Conflate all adjacent decorations that use the same style.
var startDec = decorations[start + 1];
var end = start + 2;
while (end + 2 <= nDecorations
&& (decorations[end + 1] === startDec
|| decorations[end] === decorations[end + 2])) {
end += 2;
}
decorations[decPos++] = startPos;
decorations[decPos++] = startDec;
i = end;
}
// Strip any zero-length decoration at the end.
if (decPos && decorations[decPos - 2] === sourceLength) { decPos -= 2; }
nDecorations = decorations.length = decPos;
var decoration = null;
while (spanIndex < nSpans) {
var spanStart = spans[spanIndex];
var spanEnd = spans[spanIndex + 2] || sourceLength;
var decStart = decorations[decorationIndex];
var decEnd = decorations[decorationIndex + 2] || sourceLength;
var end = Math.min(spanEnd, decEnd);
var textNode = spans[spanIndex + 1];
if (textNode.nodeType !== 1) { // Don't muck with <BR>s or <LI>s
var styledText = source.substring(sourceIndex, end);
// This may seem bizarre, and it is. Emitting LF on IE causes the
// code to display with spaces instead of line breaks.
// Emitting Windows standard issue linebreaks (CRLF) causes a blank
// space to appear at the beginning of every line but the first.
// Emitting an old Mac OS 9 line separator makes everything spiffy.
if (isIE) { styledText = styledText.replace(newlineRe, '\r'); }
textNode.nodeValue = styledText;
var document = textNode.ownerDocument;
var span = document.createElement('SPAN');
span.className = decorations[decorationIndex + 1];
var parentNode = textNode.parentNode;
parentNode.replaceChild(span, textNode);
span.appendChild(textNode);
if (sourceIndex < spanEnd) { // Split off a text node.
spans[spanIndex + 1] = textNode
// TODO: Possibly optimize by using '' if there's no flicker.
= document.createTextNode(source.substring(end, spanEnd));
parentNode.insertBefore(textNode, span.nextSibling);
}
}
sourceIndex = end;
if (sourceIndex >= spanEnd) {
spanIndex += 2;
}
if (sourceIndex >= decEnd) {
decorationIndex += 2;
}
}
}
Maps language-specific file extensions to handlers.
var langHandlerRegistry = {};
Register a language handler for the given file extensions.
parameter: {function (Object)} handler a function from source code to a list
of decorations. Takes a single argument job which describes the
state of the computation. The single parameter has the form
{applet.class {
source: {string} as plain text.
decorations: {Array.<number|string>} an array of style classes
preceded by the position at which they start in
job.source in order.
The language handler should assigned this field.
basePos: {int} the position of source in the larger source chunk.
All positions in the output decorations array are relative
to the larger source chunk.
} }
parameter: {Array.} fileExtensions
function registerLangHandler(handler, fileExtensions) {
for (var i = fileExtensions.length; --i >= 0;) {
var ext = fileExtensions[i];
if (!langHandlerRegistry.hasOwnProperty(ext)) {
langHandlerRegistry[ext] = handler;
} else if ('console' in window) {
console['warn']('cannot override language handler \%s', ext);
}
}
}
function langHandlerForExtension(extension, source) {
if (!(extension && langHandlerRegistry.hasOwnProperty(extension))) {
// Treat it as markup if the first non whitespace character is a < and
// the last non-whitespace character is a >.
extension = /^\s*</.test(source)
? 'default-markup'
: 'default-code';
}
return langHandlerRegistry[extension];
}
registerLangHandler(decorateSource, ['default-code']);
registerLangHandler(
createSimpleLexer(
[],
[
[PR_PLAIN, /^[^<?]+/],
[PR_DECLARATION, /^<!\w[^>]*(?:>|)/],
[PR_COMMENT, /^<\!--[\s\S]*?(?:-\->|)/],
['lang-', /^<%([\s\S]+?)(?:%>|/i],
[PR_ATTRIB_NAME, /^(?!style[\s=]|on)[a-z](?:[\w:-]*\w)?/i],
['lang-uq.val', /^=\s*([^>\'\"\s]*(?:[^>\'\"\s\/]|\/(?=\s)))/],
[PR_PUNCTUATION, /^[=<>\/]+/],
['lang-js', /^on\w+\s*=\s*\"([^\"]+)\"/i],
['lang-js', /^on\w+\s*=\s*\'([^\']+)\'/i],
['lang-js', /^on\w+\s*=\s*([^\"\'>\s]+)/i],
['lang-css', /^style\s*=\s*\"([^\"]+)\"/i],
['lang-css', /^style\s*=\s*\'([^\']+)\'/i],
['lang-css', /^style\s*=\s*([^\"\'>\s]+)/i]
]),
['in.tag']);
registerLangHandler(
createSimpleLexer([], [[PR_ATTRIB_VALUE, /^[\s\S]+/]]), ['uq.val']);
registerLangHandler(sourceDecorator({
'keywords': CPP_KEYWORDS,
'hashComments': true,
'cStyleComments': true
}), ['c', 'cc', 'cpp', 'cxx', 'cyc', 'm']);
registerLangHandler(sourceDecorator({
'keywords': 'null true false'
}), ['json']);
registerLangHandler(sourceDecorator({
'keywords': CSHARP_KEYWORDS,
'hashComments': true,
'cStyleComments': true,
'verbatimStrings': true
}), ['cs']);
registerLangHandler(sourceDecorator({
'keywords': JAVA_KEYWORDS,
'cStyleComments': true
}), ['java']);
registerLangHandler(sourceDecorator({
'keywords': SH_KEYWORDS,
'hashComments': true,
'multiLineStrings': true
}), ['bsh', 'csh', 'sh']);
registerLangHandler(sourceDecorator({
'keywords': PYTHON_KEYWORDS,
'hashComments': true,
'multiLineStrings': true,
'tripleQuotedStrings': true
}), ['cv', 'py']);
registerLangHandler(sourceDecorator({
'keywords': PERL_KEYWORDS,
'hashComments': true,
'multiLineStrings': true,
'regexLiterals': true
}), ['perl', 'pl', 'pm']);
registerLangHandler(sourceDecorator({
'keywords': RUBY_KEYWORDS,
'hashComments': true,
'multiLineStrings': true,
'regexLiterals': true
}), ['rb']);
registerLangHandler(sourceDecorator({
'keywords': JSCRIPT_KEYWORDS,
'cStyleComments': true,
'regexLiterals': true
}), ['js']);
registerLangHandler(sourceDecorator({
'keywords': COFFEE_KEYWORDS,
'hashComments': 3, // style block comments
'cStyleComments': true,
'multilineStrings': true,
'tripleQuotedStrings': true,
'regexLiterals': true
}), ['coffee']);
registerLangHandler(createSimpleLexer([], [[PR_STRING, /^[\s\S]+/]]), ['regex']);
function applyDecorator(job) {
var opt_langExtension = job.langExtension;
try {
// Extract tags, and convert the source code to plain text.
var sourceAndSpans = extractSourceSpans(job.sourceNode);
Plain text. @type {string}
var source = sourceAndSpans.source;
job.source = source;
job.spans = sourceAndSpans.spans;
job.basePos = 0;
// Apply the appropriate language handler
langHandlerForExtension(opt_langExtension, source)(job);
// Integrate the decorations and tags back into the source code,
// modifying the sourceNode in place.
recombineTagsAndDecorations(job);
} catch (e) {
if ('console' in window) {
console['log'](e && e['stack'] ? e['stack'] : e);
}
}
}
parameter: sourceCodeHtml {string} The HTML to pretty print.
parameter: opt_langExtension {string} The language name to use.
Typically, a filename extension like 'cpp' or 'java'.
parameter: opt_numberLines {number|boolean} True to number lines,
or the 1-indexed number of the first line in sourceCodeHtml.
function prettyPrintOne(sourceCodeHtml, opt_langExtension, opt_numberLines) {
var container = document.createElement('PRE');
// This could cause images to load and onload listeners to fire.
// E.g. <img onerror="alert(1337)" src="nosuchimage.png">.
// We assume that the inner HTML is from a trusted source.
container.innerHTML = sourceCodeHtml;
if (opt_numberLines) {
numberLines(container, opt_numberLines);
}
var job = {
langExtension: opt_langExtension,
numberLines: opt_numberLines,
sourceNode: container
};
applyDecorator(job);
return container.innerHTML;
}
function prettyPrint(opt_whenDone) {
function byTagName(tn) { return document.getElementsByTagName(tn); }
// fetch a list of nodes to rewrite
var codeSegments = [byTagName('pre'), byTagName('code'), byTagName('xmp')];
var elements = [];
for (var i = 0; i < codeSegments.length; ++i) {
for (var j = 0, n = codeSegments[i].length; j < n; ++j) {
elements.push(codeSegments[i][j]);
}
}
codeSegments = null;
var clock = Date;
if (!clock['now']) {
clock = { 'now': function () { return (new Date).getTime(); } };
}
// The loop is broken into a series of continuations to make sure that we
// don't make the browser unresponsive when rewriting a large page.
var k = 0;
var prettyPrintingJob;
function doWork() {
var endTime = (window['PR_SHOULD_USE_CONTINUATION'] ?
clock.now() + 250 /* ms */ :
Infinity);
for (; k < elements.length && clock.now() < endTime; k++) {
var cs = elements[k];
if (cs.className && cs.className.indexOf('prettyprint') >= 0) {
// If the classes includes a language extensions, use it.
// Language extensions can be specified like
// <pre class="prettyprint lang-cpp">
// the language extension "cpp" is used to find a language handler as
// passed to PR.registerLangHandler.
var langExtension = cs.className.match(/\blang-(\w+)\b/);
if (langExtension) { langExtension = langExtension[1]; }
// make sure this is not nested in an already prettified element
var nested = false;
for (var p = cs.parentNode; p; p = p.parentNode) {
if ((p.tagName === 'pre' || p.tagName === 'code' ||
p.tagName === 'xmp') &&
p.className && p.className.indexOf('prettyprint') >= 0) {
nested = true;
break;
}
}
if (!nested) {
// Look for a class like linenums or linenums:<n> where <n> is the
// 1-indexed number of the first line.
var lineNums = cs.className.match(/\blinenums\b(?::(\d+))?/);
lineNums = lineNums
? lineNums[1] && lineNums[1].length ? +lineNums[1] : true
: false;
if (lineNums) { numberLines(cs, lineNums); }
// do the pretty printing
prettyPrintingJob = {
langExtension: langExtension,
sourceNode: cs,
numberLines: lineNums
};
applyDecorator(prettyPrintingJob);
}
}
}
if (k < elements.length) {
// finish up in a continuation
setTimeout(doWork, 250);
} else if (opt_whenDone) {
opt_whenDone();
}
}
doWork();
}
window['prettyPrintOne'] = prettyPrintOne;
window['prettyPrint'] = prettyPrint;
window['PR'] = {
'createSimpleLexer': createSimpleLexer,
'registerLangHandler': registerLangHandler,
'sourceDecorator': sourceDecorator,
'PR_ATTRIB_NAME': PR_ATTRIB_NAME,
'PR_ATTRIB_VALUE': PR_ATTRIB_VALUE,
'PR_COMMENT': PR_COMMENT,
'PR_DECLARATION': PR_DECLARATION,
'PR_KEYWORD': PR_KEYWORD,
'PR_LITERAL': PR_LITERAL,
'PR_NOCODE': PR_NOCODE,
'PR_PLAIN': PR_PLAIN,
'PR_PUNCTUATION': PR_PUNCTUATION,
'PR_SOURCE': PR_SOURCE,
'PR_STRING': PR_STRING,
'PR_TAG': PR_TAG,
'PR_TYPE': PR_TYPE
};
})();
(C) Æliens
04/09/2009
You may not copy or print any of this material without explicit permission of the author or the publisher.
In case of other copyright issues, contact the author.