MarkdownSharp
1.12
|
00001 /* 00002 * MarkdownSharp 00003 * ------------- 00004 * a C# Markdown processor 00005 * 00006 * Markdown is a text-to-HTML conversion tool for web writers 00007 * Copyright (c) 2004 John Gruber 00008 * http://daringfireball.net/projects/markdown/ 00009 * 00010 * Markdown.NET 00011 * Copyright (c) 2004-2009 Milan Negovan 00012 * http://www.aspnetresources.com 00013 * http://aspnetresources.com/blog/markdown_announced.aspx 00014 * 00015 * MarkdownSharp 00016 * Copyright (c) 2009-2010 Jeff Atwood 00017 * http://stackoverflow.com 00018 * http://www.codinghorror.com/blog/ 00019 * http://code.google.com/p/markdownsharp/ 00020 * 00021 * History: Milan ported the Markdown processor to C#. He granted license to me so I can open source it 00022 * and let the community contribute to and improve MarkdownSharp. 00023 * 00024 */ 00025 00026 #region Copyright and license 00027 00028 /* 00029 00030 Copyright (c) 2009 - 2010 Jeff Atwood 00031 00032 http://www.opensource.org/licenses/mit-license.php 00033 00034 Permission is hereby granted, free of charge, to any person obtaining a copy 00035 of this software and associated documentation files (the "Software"), to deal 00036 in the Software without restriction, including without limitation the rights 00037 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 00038 copies of the Software, and to permit persons to whom the Software is 00039 furnished to do so, subject to the following conditions: 00040 00041 The above copyright notice and this permission notice shall be included in 00042 all copies or substantial portions of the Software. 00043 00044 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 00045 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00046 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 00047 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00048 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 00049 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 00050 THE SOFTWARE. 00051 00052 Copyright (c) 2003-2004 John Gruber 00053 <http://daringfireball.net/> 00054 All rights reserved. 00055 00056 Redistribution and use in source and binary forms, with or without 00057 modification, are permitted provided that the following conditions are 00058 met: 00059 00060 * Redistributions of source code must retain the above copyright notice, 00061 this list of conditions and the following disclaimer. 00062 00063 * Redistributions in binary form must reproduce the above copyright 00064 notice, this list of conditions and the following disclaimer in the 00065 documentation and/or other materials provided with the distribution. 00066 00067 * Neither the name "Markdown" nor the names of its contributors may 00068 be used to endorse or promote products derived from this software 00069 without specific prior written permission. 00070 00071 This software is provided by the copyright holders and contributors "as 00072 is" and any express or implied warranties, including, but not limited 00073 to, the implied warranties of merchantability and fitness for a 00074 particular purpose are disclaimed. In no event shall the copyright owner 00075 or contributors be liable for any direct, indirect, incidental, special, 00076 exemplary, or consequential damages (including, but not limited to, 00077 procurement of substitute goods or services; loss of use, data, or 00078 profits; or business interruption) however caused and on any theory of 00079 liability, whether in contract, strict liability, or tort (including 00080 negligence or otherwise) arising in any way out of the use of this 00081 software, even if advised of the possibility of such damage. 00082 */ 00083 00084 #endregion 00085 00086 using System; 00087 using System.Collections.Generic; 00088 using System.Configuration; 00089 using System.Text; 00090 using System.Text.RegularExpressions; 00091 00092 namespace MarkdownSharp 00093 { 00094 00095 public class MarkdownOptions 00096 { 00101 public bool AutoHyperlink { get; set; } 00106 public bool AutoNewlines { get; set; } 00110 public string EmptyElementSuffix { get; set; } 00115 public bool EncodeProblemUrlCharacters { get; set; } 00120 public bool LinkEmails { get; set; } 00125 public bool StrictBoldItalic { get; set; } 00126 } 00127 00128 00134 public class Markdown 00135 { 00136 private const string _version = "1.13"; 00137 00138 #region Constructors and Options 00139 00143 public Markdown() : this(false) 00144 { 00145 } 00146 00159 public Markdown(bool loadOptionsFromConfigFile) 00160 { 00161 if (!loadOptionsFromConfigFile) return; 00162 00163 var settings = ConfigurationManager.AppSettings; 00164 foreach (string key in settings.Keys) 00165 { 00166 switch (key) 00167 { 00168 case "Markdown.AutoHyperlink": 00169 _autoHyperlink = Convert.ToBoolean(settings[key]); 00170 break; 00171 case "Markdown.AutoNewlines": 00172 _autoNewlines = Convert.ToBoolean(settings[key]); 00173 break; 00174 case "Markdown.EmptyElementSuffix": 00175 _emptyElementSuffix = settings[key]; 00176 break; 00177 case "Markdown.EncodeProblemUrlCharacters": 00178 _encodeProblemUrlCharacters = Convert.ToBoolean(settings[key]); 00179 break; 00180 case "Markdown.LinkEmails": 00181 _linkEmails = Convert.ToBoolean(settings[key]); 00182 break; 00183 case "Markdown.StrictBoldItalic": 00184 _strictBoldItalic = Convert.ToBoolean(settings[key]); 00185 break; 00186 } 00187 } 00188 } 00189 00193 public Markdown(MarkdownOptions options) 00194 { 00195 _autoHyperlink = options.AutoHyperlink; 00196 _autoNewlines = options.AutoNewlines; 00197 _emptyElementSuffix = options.EmptyElementSuffix; 00198 _encodeProblemUrlCharacters = options.EncodeProblemUrlCharacters; 00199 _linkEmails = options.LinkEmails; 00200 _strictBoldItalic = options.StrictBoldItalic; 00201 } 00202 00203 00207 public string EmptyElementSuffix 00208 { 00209 get { return _emptyElementSuffix; } 00210 set { _emptyElementSuffix = value; } 00211 } 00212 private string _emptyElementSuffix = " />"; 00213 00218 public bool LinkEmails 00219 { 00220 get { return _linkEmails; } 00221 set { _linkEmails = value; } 00222 } 00223 private bool _linkEmails = true; 00224 00229 public bool StrictBoldItalic 00230 { 00231 get { return _strictBoldItalic; } 00232 set { _strictBoldItalic = value; } 00233 } 00234 private bool _strictBoldItalic = false; 00235 00240 public bool AutoNewLines 00241 { 00242 get { return _autoNewlines; } 00243 set { _autoNewlines = value; } 00244 } 00245 private bool _autoNewlines = false; 00246 00251 public bool AutoHyperlink 00252 { 00253 get { return _autoHyperlink; } 00254 set { _autoHyperlink = value; } 00255 } 00256 private bool _autoHyperlink = false; 00257 00262 public bool EncodeProblemUrlCharacters 00263 { 00264 get { return _encodeProblemUrlCharacters; } 00265 set { _encodeProblemUrlCharacters = value; } 00266 } 00267 private bool _encodeProblemUrlCharacters = false; 00268 00269 #endregion 00270 00271 private enum TokenType { Text, Tag } 00272 00273 private struct Token 00274 { 00275 public Token(TokenType type, string value) 00276 { 00277 this.Type = type; 00278 this.Value = value; 00279 } 00280 public TokenType Type; 00281 public string Value; 00282 } 00283 00287 private const int _nestDepth = 6; 00288 00293 private const int _tabWidth = 4; 00294 00295 private const string _markerUL = @"[*+-]"; 00296 private const string _markerOL = @"\d+[.]"; 00297 00298 private static readonly Dictionary<string, string> _escapeTable; 00299 private static readonly Dictionary<string, string> _invertedEscapeTable; 00300 private static readonly Dictionary<string, string> _backslashEscapeTable; 00301 00302 private readonly Dictionary<string, string> _urls = new Dictionary<string, string>(); 00303 private readonly Dictionary<string, string> _titles = new Dictionary<string, string>(); 00304 private readonly Dictionary<string, string> _htmlBlocks = new Dictionary<string, string>(); 00305 00306 private int _listLevel; 00307 00311 static Markdown() 00312 { 00313 // Table of hash values for escaped characters: 00314 _escapeTable = new Dictionary<string, string>(); 00315 _invertedEscapeTable = new Dictionary<string, string>(); 00316 // Table of hash value for backslash escaped characters: 00317 _backslashEscapeTable = new Dictionary<string, string>(); 00318 00319 string backslashPattern = ""; 00320 00321 foreach (char c in @"\`*_{}[]()>#+-.!") 00322 { 00323 string key = c.ToString(); 00324 string hash = GetHashKey(key); 00325 _escapeTable.Add(key, hash); 00326 _invertedEscapeTable.Add(hash, key); 00327 _backslashEscapeTable.Add(@"\" + key, hash); 00328 backslashPattern += Regex.Escape(@"\" + key) + "|"; 00329 } 00330 00331 _backslashEscapes = new Regex(backslashPattern.Substring(0, backslashPattern.Length - 1), RegexOptions.Compiled); 00332 } 00333 00338 public string Version 00339 { 00340 get { return _version; } 00341 } 00342 00353 public string Transform(string text) 00354 { 00355 if (String.IsNullOrEmpty(text)) return ""; 00356 00357 Setup(); 00358 00359 text = Normalize(text); 00360 00361 text = HashHTMLBlocks(text); 00362 text = StripLinkDefinitions(text); 00363 text = RunBlockGamut(text); 00364 text = Unescape(text); 00365 00366 Cleanup(); 00367 00368 return text + "\n"; 00369 } 00370 00371 00375 private string RunBlockGamut(string text) 00376 { 00377 text = DoHeaders(text); 00378 text = DoHorizontalRules(text); 00379 text = DoLists(text); 00380 text = DoCodeBlocks(text); 00381 text = DoBlockQuotes(text); 00382 00383 // We already ran HashHTMLBlocks() before, in Markdown(), but that 00384 // was to escape raw HTML in the original Markdown source. This time, 00385 // we're escaping the markup we've just created, so that we don't wrap 00386 // <p> tags around block-level tags. 00387 text = HashHTMLBlocks(text); 00388 00389 text = FormParagraphs(text); 00390 00391 return text; 00392 } 00393 00394 00398 private string RunSpanGamut(string text) 00399 { 00400 text = DoCodeSpans(text); 00401 text = EscapeSpecialCharsWithinTagAttributes(text); 00402 text = EscapeBackslashes(text); 00403 00404 // Images must come first, because ![foo][f] looks like an anchor. 00405 text = DoImages(text); 00406 text = DoAnchors(text); 00407 00408 // Must come after DoAnchors(), because you can use < and > 00409 // delimiters in inline links like [this](<url>). 00410 text = DoAutoLinks(text); 00411 00412 text = EncodeAmpsAndAngles(text); 00413 text = DoItalicsAndBold(text); 00414 text = DoHardBreaks(text); 00415 00416 return text; 00417 } 00418 00419 private static Regex _newlinesLeadingTrailing = new Regex(@"^\n+|\n+\z", RegexOptions.Compiled); 00420 private static Regex _newlinesMultiple = new Regex(@"\n{2,}", RegexOptions.Compiled); 00421 private static Regex _leadingWhitespace = new Regex(@"^[ ]*", RegexOptions.Compiled); 00422 00427 private string FormParagraphs(string text) 00428 { 00429 // split on two or more newlines 00430 string[] grafs = _newlinesMultiple.Split(_newlinesLeadingTrailing.Replace(text, "")); 00431 00432 for (int i = 0; i < grafs.Length; i++) 00433 { 00434 if (grafs[i].StartsWith("\x1A")) 00435 { 00436 // unhashify HTML blocks 00437 grafs[i] = _htmlBlocks[grafs[i]]; 00438 } 00439 else 00440 { 00441 // do span level processing inside the block, then wrap result in <p> tags 00442 grafs[i] = _leadingWhitespace.Replace(RunSpanGamut(grafs[i]), "<p>") + "</p>"; 00443 } 00444 } 00445 00446 return string.Join("\n\n", grafs); 00447 } 00448 00449 00450 private void Setup() 00451 { 00452 // Clear the global hashes. If we don't clear these, you get conflicts 00453 // from other articles when generating a page which contains more than 00454 // one article (e.g. an index page that shows the N most recent 00455 // articles): 00456 _urls.Clear(); 00457 _titles.Clear(); 00458 _htmlBlocks.Clear(); 00459 _listLevel = 0; 00460 } 00461 00462 private void Cleanup() 00463 { 00464 Setup(); 00465 } 00466 00467 private static string _nestedBracketsPattern; 00468 00473 private static string GetNestedBracketsPattern() 00474 { 00475 // in other words [this] and [this[also]] and [this[also[too]]] 00476 // up to _nestDepth 00477 if (_nestedBracketsPattern == null) 00478 _nestedBracketsPattern = 00479 RepeatString(@" 00480 (?> # Atomic matching 00481 [^\[\]]+ # Anything other than brackets 00482 | 00483 \[ 00484 ", _nestDepth) + RepeatString( 00485 @" \] 00486 )*" 00487 , _nestDepth); 00488 return _nestedBracketsPattern; 00489 } 00490 00491 private static string _nestedParensPattern; 00492 00497 private static string GetNestedParensPattern() 00498 { 00499 // in other words (this) and (this(also)) and (this(also(too))) 00500 // up to _nestDepth 00501 if (_nestedParensPattern == null) 00502 _nestedParensPattern = 00503 RepeatString(@" 00504 (?> # Atomic matching 00505 [^()\s]+ # Anything other than parens or whitespace 00506 | 00507 \( 00508 ", _nestDepth) + RepeatString( 00509 @" \) 00510 )*" 00511 , _nestDepth); 00512 return _nestedParensPattern; 00513 } 00514 00515 private static Regex _linkDef = new Regex(string.Format(@" 00516 ^[ ]{{0,{0}}}\[(.+)\]: # id = $1 00517 [ ]* 00518 \n? # maybe *one* newline 00519 [ ]* 00520 <?(\S+?)>? # url = $2 00521 [ ]* 00522 \n? # maybe one newline 00523 [ ]* 00524 (?: 00525 (?<=\s) # lookbehind for whitespace 00526 [""(] 00527 (.+?) # title = $3 00528 ["")] 00529 [ ]* 00530 )? # title is optional 00531 (?:\n+|\Z)", _tabWidth - 1), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); 00532 00539 private string StripLinkDefinitions(string text) 00540 { 00541 return _linkDef.Replace(text, new MatchEvaluator(LinkEvaluator)); 00542 } 00543 00544 private string LinkEvaluator(Match match) 00545 { 00546 string linkID = match.Groups[1].Value.ToLowerInvariant(); 00547 _urls[linkID] = EncodeAmpsAndAngles(match.Groups[2].Value); 00548 00549 if (match.Groups[3] != null && match.Groups[3].Length > 0) 00550 _titles[linkID] = match.Groups[3].Value.Replace("\"", """); 00551 00552 return ""; 00553 } 00554 00555 // compiling this monster regex results in worse performance. trust me. 00556 private static Regex _blocksHtml = new Regex(GetBlockPattern(), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace); 00557 00558 00562 private static string GetBlockPattern() 00563 { 00564 00565 // Hashify HTML blocks: 00566 // We only want to do this for block-level HTML tags, such as headers, 00567 // lists, and tables. That's because we still want to wrap <p>s around 00568 // "paragraphs" that are wrapped in non-block-level tags, such as anchors, 00569 // phrase emphasis, and spans. The list of tags we're looking for is 00570 // hard-coded: 00571 // 00572 // * List "a" is made of tags which can be both inline or block-level. 00573 // These will be treated block-level when the start tag is alone on 00574 // its line, otherwise they're not matched here and will be taken as 00575 // inline later. 00576 // * List "b" is made of tags which are always block-level; 00577 // 00578 string blockTagsA = "ins|del"; 00579 string blockTagsB = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|script|noscript|form|fieldset|iframe|math"; 00580 00581 // Regular expression for the content of a block tag. 00582 string attr = @" 00583 (?> # optional tag attributes 00584 \s # starts with whitespace 00585 (?> 00586 [^>""/]+ # text outside quotes 00587 | 00588 /+(?!>) # slash not followed by > 00589 | 00590 ""[^""]*"" # text inside double quotes (tolerate >) 00591 | 00592 '[^']*' # text inside single quotes (tolerate >) 00593 )* 00594 )? 00595 "; 00596 00597 string content = RepeatString(@" 00598 (?> 00599 [^<]+ # content without tag 00600 | 00601 <\2 # nested opening tag 00602 " + attr + @" # attributes 00603 (?> 00604 /> 00605 | 00606 >", _nestDepth) + // end of opening tag 00607 ".*?" + // last level nested tag content 00608 RepeatString(@" 00609 </\2\s*> # closing nested tag 00610 ) 00611 | 00612 <(?!/\2\s*> # other tags with a different name 00613 ) 00614 )*", _nestDepth); 00615 00616 string content2 = content.Replace(@"\2", @"\3"); 00617 00618 // First, look for nested blocks, e.g.: 00619 // <div> 00620 // <div> 00621 // tags for inner block must be indented. 00622 // </div> 00623 // </div> 00624 // 00625 // The outermost tags must start at the left margin for this to match, and 00626 // the inner nested divs must be indented. 00627 // We need to do this before the next, more liberal match, because the next 00628 // match will start at the first `<div>` and stop at the first `</div>`. 00629 string pattern = @" 00630 (?> 00631 (?> 00632 (?<=\n) # Starting after a blank line 00633 | # or 00634 \A\n? # the beginning of the doc 00635 ) 00636 ( # save in $1 00637 00638 # Match from `\n<tag>` to `</tag>\n`, handling nested tags 00639 # in between. 00640 00641 [ ]{0,$less_than_tab} 00642 <($block_tags_b_re) # start tag = $2 00643 $attr> # attributes followed by > and \n 00644 $content # content, support nesting 00645 </\2> # the matching end tag 00646 [ ]* # trailing spaces 00647 (?=\n+|\Z) # followed by a newline or end of document 00648 00649 | # Special version for tags of group a. 00650 00651 [ ]{0,$less_than_tab} 00652 <($block_tags_a_re) # start tag = $3 00653 $attr>[ ]*\n # attributes followed by > 00654 $content2 # content, support nesting 00655 </\3> # the matching end tag 00656 [ ]* # trailing spaces 00657 (?=\n+|\Z) # followed by a newline or end of document 00658 00659 | # Special case just for <hr />. It was easier to make a special 00660 # case than to make the other regex more complicated. 00661 00662 [ ]{0,$less_than_tab} 00663 <(hr) # start tag = $2 00664 $attr # attributes 00665 /?> # the matching end tag 00666 [ ]* 00667 (?=\n{2,}|\Z) # followed by a blank line or end of document 00668 00669 | # Special case for standalone HTML comments: 00670 00671 [ ]{0,$less_than_tab} 00672 (?s: 00673 <!-- .*? --> 00674 ) 00675 [ ]* 00676 (?=\n{2,}|\Z) # followed by a blank line or end of document 00677 00678 | # PHP and ASP-style processor instructions (<? and <%) 00679 00680 [ ]{0,$less_than_tab} 00681 (?s: 00682 <([?%]) # $2 00683 .*? 00684 \2> 00685 ) 00686 [ ]* 00687 (?=\n{2,}|\Z) # followed by a blank line or end of document 00688 00689 ) 00690 )"; 00691 00692 pattern = pattern.Replace("$less_than_tab", (_tabWidth - 1).ToString()); 00693 pattern = pattern.Replace("$block_tags_b_re", blockTagsB); 00694 pattern = pattern.Replace("$block_tags_a_re", blockTagsA); 00695 pattern = pattern.Replace("$attr", attr); 00696 pattern = pattern.Replace("$content2", content2); 00697 pattern = pattern.Replace("$content", content); 00698 00699 return pattern; 00700 } 00701 00705 private string HashHTMLBlocks(string text) 00706 { 00707 return _blocksHtml.Replace(text, new MatchEvaluator(HtmlEvaluator)); 00708 } 00709 00710 private string HtmlEvaluator(Match match) 00711 { 00712 string text = match.Groups[1].Value; 00713 string key = GetHashKey(text); 00714 _htmlBlocks[key] = text; 00715 00716 return string.Concat("\n\n", key, "\n\n"); 00717 } 00718 00719 private static string GetHashKey(string s) 00720 { 00721 return "\x1A" + Math.Abs(s.GetHashCode()).ToString() + "\x1A"; 00722 } 00723 00724 private static Regex _htmlTokens = new Regex(@" 00725 (<!(?:--.*?--\s*)+>)| # match <!-- foo --> 00726 (<\?.*?\?>)| # match <?foo?> " + 00727 RepeatString(@" 00728 (<[A-Za-z\/!$](?:[^<>]|", _nestDepth) + RepeatString(@")*>)", _nestDepth) + 00729 " # match <tag> and </tag>", 00730 RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); 00731 00739 private List<Token> TokenizeHTML(string text) 00740 { 00741 int pos = 0; 00742 int tagStart = 0; 00743 var tokens = new List<Token>(); 00744 00745 // this regex is derived from the _tokenize() subroutine in Brad Choate's MTRegex plugin. 00746 // http://www.bradchoate.com/past/mtregex.php 00747 foreach (Match m in _htmlTokens.Matches(text)) 00748 { 00749 tagStart = m.Index; 00750 00751 if (pos < tagStart) 00752 tokens.Add(new Token(TokenType.Text, text.Substring(pos, tagStart - pos))); 00753 00754 tokens.Add(new Token(TokenType.Tag, m.Value)); 00755 pos = tagStart + m.Length; 00756 } 00757 00758 if (pos < text.Length) 00759 tokens.Add(new Token(TokenType.Text, text.Substring(pos, text.Length - pos))); 00760 00761 return tokens; 00762 } 00763 00764 00765 private static Regex _anchorRef = new Regex(string.Format(@" 00766 ( # wrap whole match in $1 00767 \[ 00768 ({0}) # link text = $2 00769 \] 00770 00771 [ ]? # one optional space 00772 (?:\n[ ]*)? # one optional newline followed by spaces 00773 00774 \[ 00775 (.*?) # id = $3 00776 \] 00777 )", GetNestedBracketsPattern()), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); 00778 00779 private static Regex _anchorInline = new Regex(string.Format(@" 00780 ( # wrap whole match in $1 00781 \[ 00782 ({0}) # link text = $2 00783 \] 00784 \( # literal paren 00785 [ ]* 00786 ({1}) # href = $3 00787 [ ]* 00788 ( # $4 00789 (['""]) # quote char = $5 00790 (.*?) # title = $6 00791 \5 # matching quote 00792 [ ]* # ignore any spaces between closing quote and ) 00793 )? # title is optional 00794 \) 00795 )", GetNestedBracketsPattern(), GetNestedParensPattern()), 00796 RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); 00797 00798 private static Regex _anchorRefShortcut = new Regex(@" 00799 ( # wrap whole match in $1 00800 \[ 00801 ([^\[\]]+) # link text = $2; can't contain [ or ] 00802 \] 00803 )", RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); 00804 00813 private string DoAnchors(string text) 00814 { 00815 // First, handle reference-style links: [link text] [id] 00816 text = _anchorRef.Replace(text, new MatchEvaluator(AnchorRefEvaluator)); 00817 00818 // Next, inline-style links: [link text](url "optional title") or [link text](url "optional title") 00819 text = _anchorInline.Replace(text, new MatchEvaluator(AnchorInlineEvaluator)); 00820 00821 // Last, handle reference-style shortcuts: [link text] 00822 // These must come last in case you've also got [link test][1] 00823 // or [link test](/foo) 00824 text = _anchorRefShortcut.Replace(text, new MatchEvaluator(AnchorRefShortcutEvaluator)); 00825 return text; 00826 } 00827 00828 private string AnchorRefEvaluator(Match match) 00829 { 00830 string wholeMatch = match.Groups[1].Value; 00831 string linkText = match.Groups[2].Value; 00832 string linkID = match.Groups[3].Value.ToLowerInvariant(); 00833 00834 string result; 00835 00836 // for shortcut links like [this][]. 00837 if (linkID == "") 00838 linkID = linkText.ToLowerInvariant(); 00839 00840 if (_urls.ContainsKey(linkID)) 00841 { 00842 string url = _urls[linkID]; 00843 00844 url = EncodeProblemUrlChars(url); 00845 url = EscapeBoldItalic(url); 00846 result = "<a href=\"" + url + "\""; 00847 00848 if (_titles.ContainsKey(linkID)) 00849 { 00850 string title = _titles[linkID]; 00851 title = EscapeBoldItalic(title); 00852 result += " title=\"" + title + "\""; 00853 } 00854 00855 result += ">" + linkText + "</a>"; 00856 } 00857 else 00858 result = wholeMatch; 00859 00860 return result; 00861 } 00862 00863 private string AnchorRefShortcutEvaluator(Match match) 00864 { 00865 string wholeMatch = match.Groups[1].Value; 00866 string linkText = match.Groups[2].Value; 00867 string linkID = Regex.Replace(linkText.ToLowerInvariant(), @"[ ]*\n[ ]*", " "); // lower case and remove newlines / extra spaces 00868 00869 string result; 00870 00871 if (_urls.ContainsKey(linkID)) 00872 { 00873 string url = _urls[linkID]; 00874 00875 url = EncodeProblemUrlChars(url); 00876 url = EscapeBoldItalic(url); 00877 result = "<a href=\"" + url + "\""; 00878 00879 if (_titles.ContainsKey(linkID)) 00880 { 00881 string title = _titles[linkID]; 00882 title = EscapeBoldItalic(title); 00883 result += " title=\"" + title + "\""; 00884 } 00885 00886 result += ">" + linkText + "</a>"; 00887 } 00888 else 00889 result = wholeMatch; 00890 00891 return result; 00892 } 00893 00894 00895 private string AnchorInlineEvaluator(Match match) 00896 { 00897 string linkText = match.Groups[2].Value; 00898 string url = match.Groups[3].Value; 00899 string title = match.Groups[6].Value; 00900 string result; 00901 00902 url = EncodeProblemUrlChars(url); 00903 url = EscapeBoldItalic(url); 00904 if (url.StartsWith("<") && url.EndsWith(">")) 00905 url = url.Substring(1, url.Length - 2); // remove <>'s surrounding URL, if present 00906 00907 result = string.Format("<a href=\"{0}\"", url); 00908 00909 if (!String.IsNullOrEmpty(title)) 00910 { 00911 title = title.Replace("\"", """); 00912 title = EscapeBoldItalic(title); 00913 result += string.Format(" title=\"{0}\"", title); 00914 } 00915 00916 result += string.Format(">{0}</a>", linkText); 00917 return result; 00918 } 00919 00920 private static Regex _imagesRef = new Regex(@" 00921 ( # wrap whole match in $1 00922 !\[ 00923 (.*?) # alt text = $2 00924 \] 00925 00926 [ ]? # one optional space 00927 (?:\n[ ]*)? # one optional newline followed by spaces 00928 00929 \[ 00930 (.*?) # id = $3 00931 \] 00932 00933 )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); 00934 00935 private static Regex _imagesInline = new Regex(String.Format(@" 00936 ( # wrap whole match in $1 00937 !\[ 00938 (.*?) # alt text = $2 00939 \] 00940 \s? # one optional whitespace character 00941 \( # literal paren 00942 [ ]* 00943 ({0}) # href = $3 00944 [ ]* 00945 ( # $4 00946 (['""]) # quote char = $5 00947 (.*?) # title = $6 00948 \5 # matching quote 00949 [ ]* 00950 )? # title is optional 00951 \) 00952 )", GetNestedParensPattern()), 00953 RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); 00954 00962 private string DoImages(string text) 00963 { 00964 // First, handle reference-style labeled images: ![alt text][id] 00965 text = _imagesRef.Replace(text, new MatchEvaluator(ImageReferenceEvaluator)); 00966 00967 // Next, handle inline images: ![alt text](url "optional title") 00968 // Don't forget: encode * and _ 00969 text = _imagesInline.Replace(text, new MatchEvaluator(ImageInlineEvaluator)); 00970 00971 return text; 00972 } 00973 00974 private string ImageReferenceEvaluator(Match match) 00975 { 00976 string wholeMatch = match.Groups[1].Value; 00977 string altText = match.Groups[2].Value; 00978 string linkID = match.Groups[3].Value.ToLowerInvariant(); 00979 string result; 00980 00981 // for shortcut links like ![this][]. 00982 if (linkID == "") 00983 linkID = altText.ToLowerInvariant(); 00984 00985 altText = altText.Replace("\"", """); 00986 00987 if (_urls.ContainsKey(linkID)) 00988 { 00989 string url = _urls[linkID]; 00990 url = EncodeProblemUrlChars(url); 00991 url = EscapeBoldItalic(url); 00992 result = string.Format("<img src=\"{0}\" alt=\"{1}\"", url, altText); 00993 00994 if (_titles.ContainsKey(linkID)) 00995 { 00996 string title = _titles[linkID]; 00997 title = EscapeBoldItalic(title); 00998 00999 result += string.Format(" title=\"{0}\"", title); 01000 } 01001 01002 result += _emptyElementSuffix; 01003 } 01004 else 01005 { 01006 // If there's no such link ID, leave intact: 01007 result = wholeMatch; 01008 } 01009 01010 return result; 01011 } 01012 01013 private string ImageInlineEvaluator(Match match) 01014 { 01015 string alt = match.Groups[2].Value; 01016 string url = match.Groups[3].Value; 01017 string title = match.Groups[6].Value; 01018 string result; 01019 01020 alt = alt.Replace("\"", """); 01021 title = title.Replace("\"", """); 01022 01023 if (url.StartsWith("<") && url.EndsWith(">")) 01024 url = url.Substring(1, url.Length - 2); // Remove <>'s surrounding URL, if present 01025 url = EncodeProblemUrlChars(url); 01026 url = EscapeBoldItalic(url); 01027 01028 result = string.Format("<img src=\"{0}\" alt=\"{1}\"", url, alt); 01029 01030 if (!String.IsNullOrEmpty(title)) 01031 { 01032 title = EscapeBoldItalic(title); 01033 result += string.Format(" title=\"{0}\"", title); 01034 } 01035 01036 result += _emptyElementSuffix; 01037 01038 return result; 01039 } 01040 01041 private static Regex _headerSetext = new Regex(@" 01042 ^(.+?) 01043 [ ]* 01044 \n 01045 (=+|-+) # $1 = string of ='s or -'s 01046 [ ]* 01047 \n+", 01048 RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); 01049 01050 private static Regex _headerAtx = new Regex(@" 01051 ^(\#{1,6}) # $1 = string of #'s 01052 [ ]* 01053 (.+?) # $2 = Header text 01054 [ ]* 01055 \#* # optional closing #'s (not counted) 01056 \n+", 01057 RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); 01058 01075 private string DoHeaders(string text) 01076 { 01077 text = _headerSetext.Replace(text, new MatchEvaluator(SetextHeaderEvaluator)); 01078 text = _headerAtx.Replace(text, new MatchEvaluator(AtxHeaderEvaluator)); 01079 return text; 01080 } 01081 01082 private string SetextHeaderEvaluator(Match match) 01083 { 01084 string header = match.Groups[1].Value; 01085 int level = match.Groups[2].Value.StartsWith("=") ? 1 : 2; 01086 return string.Format("<h{1}>{0}</h{1}>\n\n", RunSpanGamut(header), level); 01087 } 01088 01089 private string AtxHeaderEvaluator(Match match) 01090 { 01091 string header = match.Groups[2].Value; 01092 int level = match.Groups[1].Value.Length; 01093 return string.Format("<h{1}>{0}</h{1}>\n\n", RunSpanGamut(header), level); 01094 } 01095 01096 01097 private static Regex _horizontalRules = new Regex(@" 01098 ^[ ]{0,3} # Leading space 01099 ([-*_]) # $1: First marker 01100 (?> # Repeated marker group 01101 [ ]{0,2} # Zero, one, or two spaces. 01102 \1 # Marker character 01103 ){2,} # Group repeated at least twice 01104 [ ]* # Trailing spaces 01105 $ # End of line. 01106 ", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); 01107 01117 private string DoHorizontalRules(string text) 01118 { 01119 return _horizontalRules.Replace(text, "<hr" + _emptyElementSuffix + "\n"); 01120 } 01121 01122 private static string _wholeList = string.Format(@" 01123 ( # $1 = whole list 01124 ( # $2 01125 [ ]{{0,{1}}} 01126 ({0}) # $3 = first list item marker 01127 [ ]+ 01128 ) 01129 (?s:.+?) 01130 ( # $4 01131 \z 01132 | 01133 \n{{2,}} 01134 (?=\S) 01135 (?! # Negative lookahead for another list item marker 01136 [ ]* 01137 {0}[ ]+ 01138 ) 01139 ) 01140 )", string.Format("(?:{0}|{1})", _markerUL, _markerOL), _tabWidth - 1); 01141 01142 private static Regex _listNested = new Regex(@"^" + _wholeList, 01143 RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); 01144 01145 private static Regex _listTopLevel = new Regex(@"(?:(?<=\n\n)|\A\n?)" + _wholeList, 01146 RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); 01147 01151 private string DoLists(string text) 01152 { 01153 // We use a different prefix before nested lists than top-level lists. 01154 // See extended comment in _ProcessListItems(). 01155 if (_listLevel > 0) 01156 text = _listNested.Replace(text, new MatchEvaluator(ListEvaluator)); 01157 else 01158 text = _listTopLevel.Replace(text, new MatchEvaluator(ListEvaluator)); 01159 01160 return text; 01161 } 01162 01163 private string ListEvaluator(Match match) 01164 { 01165 string list = match.Groups[1].Value; 01166 string listType = Regex.IsMatch(match.Groups[3].Value, _markerUL) ? "ul" : "ol"; 01167 string result; 01168 01169 // Turn double returns into triple returns, so that we can make a 01170 // paragraph for the last item in a list, if necessary: 01171 list = Regex.Replace(list, @"\n{2,}", "\n\n\n"); 01172 result = ProcessListItems(list, listType == "ul" ? _markerUL : _markerOL); 01173 01174 result = string.Format("<{0}>\n{1}</{0}>\n", listType, result); 01175 return result; 01176 } 01177 01182 private string ProcessListItems(string list, string marker) 01183 { 01184 // The listLevel global keeps track of when we're inside a list. 01185 // Each time we enter a list, we increment it; when we leave a list, 01186 // we decrement. If it's zero, we're not in a list anymore. 01187 01188 // We do this because when we're not inside a list, we want to treat 01189 // something like this: 01190 01191 // I recommend upgrading to version 01192 // 8. Oops, now this line is treated 01193 // as a sub-list. 01194 01195 // As a single paragraph, despite the fact that the second line starts 01196 // with a digit-period-space sequence. 01197 01198 // Whereas when we're inside a list (or sub-list), that line will be 01199 // treated as the start of a sub-list. What a kludge, huh? This is 01200 // an aspect of Markdown's syntax that's hard to parse perfectly 01201 // without resorting to mind-reading. Perhaps the solution is to 01202 // change the syntax rules such that sub-lists must start with a 01203 // starting cardinal number; e.g. "1." or "a.". 01204 01205 _listLevel++; 01206 01207 // Trim trailing blank lines: 01208 list = Regex.Replace(list, @"\n{2,}\z", "\n"); 01209 01210 string pattern = string.Format( 01211 @"(\n)? # leading line = $1 01212 (^[ ]*) # leading whitespace = $2 01213 ({0}) [ ]+ # list marker = $3 01214 ((?s:.+?) # list item text = $4 01215 (\n{{1,2}})) 01216 (?= \n* (\z | \2 ({0}) [ ]+))", marker); 01217 01218 list = Regex.Replace(list, pattern, new MatchEvaluator(ListItemEvaluator), 01219 RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline); 01220 _listLevel--; 01221 return list; 01222 } 01223 01224 private string ListItemEvaluator(Match match) 01225 { 01226 string item = match.Groups[4].Value; 01227 string leadingLine = match.Groups[1].Value; 01228 01229 if (!String.IsNullOrEmpty(leadingLine) || Regex.IsMatch(item, @"\n{2,}")) 01230 // we could correct any bad indentation here.. 01231 item = RunBlockGamut(Outdent(item) + "\n"); 01232 else 01233 { 01234 // recursion for sub-lists 01235 item = DoLists(Outdent(item)); 01236 item = item.TrimEnd('\n'); 01237 item = RunSpanGamut(item); 01238 } 01239 01240 return string.Format("<li>{0}</li>\n", item); 01241 } 01242 01243 01244 private static Regex _codeBlock = new Regex(string.Format(@" 01245 (?:\n\n|\A\n?) 01246 ( # $1 = the code block -- one or more lines, starting with a space 01247 (?: 01248 (?:[ ]{{{0}}}) # Lines must start with a tab-width of spaces 01249 .*\n+ 01250 )+ 01251 ) 01252 ((?=^[ ]{{0,{0}}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc", 01253 _tabWidth), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled); 01254 01258 private string DoCodeBlocks(string text) 01259 { 01260 text = _codeBlock.Replace(text, new MatchEvaluator(CodeBlockEvaluator)); 01261 return text; 01262 } 01263 01264 private string CodeBlockEvaluator(Match match) 01265 { 01266 string codeBlock = match.Groups[1].Value; 01267 01268 codeBlock = EncodeCode(Outdent(codeBlock)); 01269 codeBlock = _newlinesLeadingTrailing.Replace(codeBlock, ""); 01270 01271 return string.Concat("\n\n<pre><code>", codeBlock, "\n</code></pre>\n\n"); 01272 } 01273 01274 private static Regex _codeSpan = new Regex(@" 01275 (?<!\\) # Character before opening ` can't be a backslash 01276 (`+) # $1 = Opening run of ` 01277 (.+?) # $2 = The code block 01278 (?<!`) 01279 \1 01280 (?!`)", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); 01281 01285 private string DoCodeSpans(string text) 01286 { 01287 // * You can use multiple backticks as the delimiters if you want to 01288 // include literal backticks in the code span. So, this input: 01289 // 01290 // Just type ``foo `bar` baz`` at the prompt. 01291 // 01292 // Will translate to: 01293 // 01294 // <p>Just type <code>foo `bar` baz</code> at the prompt.</p> 01295 // 01296 // There's no arbitrary limit to the number of backticks you 01297 // can use as delimters. If you need three consecutive backticks 01298 // in your code, use four for delimiters, etc. 01299 // 01300 // * You can use spaces to get literal backticks at the edges: 01301 // 01302 // ... type `` `bar` `` ... 01303 // 01304 // Turns to: 01305 // 01306 // ... type <code>`bar`</code> ... 01307 // 01308 01309 return _codeSpan.Replace(text, new MatchEvaluator(CodeSpanEvaluator)); 01310 } 01311 01312 private string CodeSpanEvaluator(Match match) 01313 { 01314 string span = match.Groups[2].Value; 01315 span = Regex.Replace(span, @"^[ ]*", ""); // leading whitespace 01316 span = Regex.Replace(span, @"[ ]*$", ""); // trailing whitespace 01317 span = EncodeCode(span); 01318 01319 return string.Concat("<code>", span, "</code>"); 01320 } 01321 01322 01323 private static Regex _bold = new Regex(@"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1", 01324 RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); 01325 private static Regex _strictBold = new Regex(@"([\W_]|^) (\*\*|__) (?=\S) ([^\r]*?\S[\*_]*) \2 ([\W_]|$)", 01326 RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); 01327 01328 private static Regex _italic = new Regex(@"(\*|_) (?=\S) (.+?) (?<=\S) \1", 01329 RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); 01330 private static Regex _strictItalic = new Regex(@"([\W_]|^) (\*|_) (?=\S) ([^\r\*_]*?\S) \2 ([\W_]|$)", 01331 RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled); 01332 01336 private string DoItalicsAndBold(string text) 01337 { 01338 01339 // <strong> must go first, then <em> 01340 if (_strictBoldItalic) 01341 { 01342 text = _strictBold.Replace(text, "$1<strong>$3</strong>$4"); 01343 text = _strictItalic.Replace(text, "$1<em>$3</em>$4"); 01344 } 01345 else 01346 { 01347 text = _bold.Replace(text, "<strong>$2</strong>"); 01348 text = _italic.Replace(text, "<em>$2</em>"); 01349 } 01350 return text; 01351 } 01352 01356 private string DoHardBreaks(string text) 01357 { 01358 if (_autoNewlines) 01359 text = Regex.Replace(text, @"\n", string.Format("<br{0}\n", _emptyElementSuffix)); 01360 else 01361 text = Regex.Replace(text, @" {2,}\n", string.Format("<br{0}\n", _emptyElementSuffix)); 01362 return text; 01363 } 01364 01365 private static Regex _blockquote = new Regex(@" 01366 ( # Wrap whole match in $1 01367 ( 01368 ^[ ]*>[ ]? # '>' at the start of a line 01369 .+\n # rest of the first line 01370 (.+\n)* # subsequent consecutive lines 01371 \n* # blanks 01372 )+ 01373 )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.Compiled); 01374 01378 private string DoBlockQuotes(string text) 01379 { 01380 return _blockquote.Replace(text, new MatchEvaluator(BlockQuoteEvaluator)); 01381 } 01382 01383 private string BlockQuoteEvaluator(Match match) 01384 { 01385 string bq = match.Groups[1].Value; 01386 01387 bq = Regex.Replace(bq, @"^[ ]*>[ ]?", "", RegexOptions.Multiline); // trim one level of quoting 01388 bq = Regex.Replace(bq, @"^[ ]+$", "", RegexOptions.Multiline); // trim whitespace-only lines 01389 bq = RunBlockGamut(bq); // recurse 01390 01391 bq = Regex.Replace(bq, @"^", " ", RegexOptions.Multiline); 01392 01393 // These leading spaces screw with <pre> content, so we need to fix that: 01394 bq = Regex.Replace(bq, @"(\s*<pre>.+?</pre>)", new MatchEvaluator(BlockQuoteEvaluator2), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline); 01395 01396 return string.Format("<blockquote>\n{0}\n</blockquote>\n\n", bq); 01397 } 01398 01399 private string BlockQuoteEvaluator2(Match match) 01400 { 01401 return Regex.Replace(match.Groups[1].Value, @"^ ", "", RegexOptions.Multiline); 01402 } 01403 01404 private static Regex _autolinkBare = new Regex(@"(^|\s)(https?|ftp)(://[-A-Z0-9+&@#/%?=~_|\[\]\(\)!:,\.;]*[-A-Z0-9+&@#/%=~_|\[\]])($|\W)", 01405 RegexOptions.IgnoreCase | RegexOptions.Compiled); 01406 01413 private string DoAutoLinks(string text) 01414 { 01415 01416 if (_autoHyperlink) 01417 { 01418 // fixup arbitrary URLs by adding Markdown < > so they get linked as well 01419 // note that at this point, all other URL in the text are already hyperlinked as <a href=""></a> 01420 // *except* for the <http://www.foo.com> case 01421 text = _autolinkBare.Replace(text, @"$1<$2$3>$4"); 01422 } 01423 01424 // Hyperlinks: <http://foo.com> 01425 text = Regex.Replace(text, "<((https?|ftp):[^'\">\\s]+)>", new MatchEvaluator(HyperlinkEvaluator)); 01426 01427 if (_linkEmails) 01428 { 01429 // Email addresses: <address@domain.foo> 01430 string pattern = 01431 @"< 01432 (?:mailto:)? 01433 ( 01434 [-.\w]+ 01435 \@ 01436 [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ 01437 ) 01438 >"; 01439 text = Regex.Replace(text, pattern, new MatchEvaluator(EmailEvaluator), RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace); 01440 } 01441 01442 return text; 01443 } 01444 01445 private string HyperlinkEvaluator(Match match) 01446 { 01447 string link = match.Groups[1].Value; 01448 return string.Format("<a href=\"{0}\">{0}</a>", link); 01449 } 01450 01451 private string EmailEvaluator(Match match) 01452 { 01453 string email = Unescape(match.Groups[1].Value); 01454 01455 // 01456 // Input: an email address, e.g. "foo@example.com" 01457 // 01458 // Output: the email address as a mailto link, with each character 01459 // of the address encoded as either a decimal or hex entity, in 01460 // the hopes of foiling most address harvesting spam bots. E.g.: 01461 // 01462 // <a href="mailto:foo@e 01463 // xample.com">foo 01464 // @example.com</a> 01465 // 01466 // Based by a filter by Matthew Wickline, posted to the BBEdit-Talk 01467 // mailing list: <http://tinyurl.com/yu7ue> 01468 // 01469 email = "mailto:" + email; 01470 01471 // leave ':' alone (to spot mailto: later) 01472 email = EncodeEmailAddress(email); 01473 01474 email = string.Format("<a href=\"{0}\">{0}</a>", email); 01475 01476 // strip the mailto: from the visible part 01477 email = Regex.Replace(email, "\">.+?:", "\">"); 01478 return email; 01479 } 01480 01481 01482 private static Regex _outDent = new Regex(@"^[ ]{1," + _tabWidth + @"}", RegexOptions.Multiline | RegexOptions.Compiled); 01483 01487 private string Outdent(string block) 01488 { 01489 return _outDent.Replace(block, ""); 01490 } 01491 01492 01493 #region Encoding and Normalization 01494 01495 01501 private string EncodeEmailAddress(string addr) 01502 { 01503 var sb = new StringBuilder(addr.Length * 5); 01504 var rand = new Random(); 01505 int r; 01506 foreach (char c in addr) 01507 { 01508 r = rand.Next(1, 100); 01509 if ((r > 90 || c == ':') && c != '@') 01510 sb.Append(c); // m 01511 else if (r < 45) 01512 sb.AppendFormat("&#x{0:x};", (int)c); // m 01513 else 01514 sb.AppendFormat("&#{0};", (int)c); // m 01515 } 01516 return sb.ToString(); 01517 } 01518 01519 private static Regex _codeEncoder = new Regex(@"&|<|>|\\|\*|_|\{|\}|\[|\]", RegexOptions.Compiled); 01520 01524 private string EncodeCode(string code) 01525 { 01526 return _codeEncoder.Replace(code, EncodeCodeEvaluator); 01527 } 01528 private string EncodeCodeEvaluator(Match match) 01529 { 01530 switch (match.Value) 01531 { 01532 // Encode all ampersands; HTML entities are not 01533 // entities within a Markdown code span. 01534 case "&": 01535 return "&"; 01536 // Do the angle bracket song and dance 01537 case "<": 01538 return "<"; 01539 case ">": 01540 return ">"; 01541 // escape characters that are magic in Markdown 01542 default: 01543 return _escapeTable[match.Value]; 01544 } 01545 } 01546 01547 01548 private static Regex _amps = new Regex(@"&(?!(#[0-9]+)|(#[xX][a-fA-F0-9])|([a-zA-Z][a-zA-Z0-9]*);)", RegexOptions.ExplicitCapture | RegexOptions.Compiled); 01549 private static Regex _angles = new Regex(@"<(?![A-Za-z/?\$!])", RegexOptions.ExplicitCapture | RegexOptions.Compiled); 01550 01554 private string EncodeAmpsAndAngles(string s) 01555 { 01556 s = _amps.Replace(s, "&"); 01557 s = _angles.Replace(s, "<"); 01558 return s; 01559 } 01560 01561 private static Regex _backslashEscapes; 01562 01566 private string EscapeBackslashes(string s) 01567 { 01568 return _backslashEscapes.Replace(s, new MatchEvaluator(EscapeBackslashesEvaluator)); 01569 } 01570 private string EscapeBackslashesEvaluator(Match match) 01571 { 01572 return _backslashEscapeTable[match.Value]; 01573 } 01574 01575 private static Regex _unescapes = new Regex("\x1A\\d+\x1A", RegexOptions.Compiled); 01576 01580 private string Unescape(string s) 01581 { 01582 return _unescapes.Replace(s, new MatchEvaluator(UnescapeEvaluator)); 01583 } 01584 private string UnescapeEvaluator(Match match) 01585 { 01586 return _invertedEscapeTable[match.Value]; 01587 } 01588 01589 01593 private string EscapeBoldItalic(string s) 01594 { 01595 s = s.Replace("*", _escapeTable["*"]); 01596 s = s.Replace("_", _escapeTable["_"]); 01597 return s; 01598 } 01599 01600 private static char[] _problemUrlChars = @"""'*()[]$:".ToCharArray(); 01601 01605 private string EncodeProblemUrlChars(string url) 01606 { 01607 if (!_encodeProblemUrlCharacters) return url; 01608 01609 var sb = new StringBuilder(url.Length); 01610 bool encode; 01611 char c; 01612 01613 for (int i = 0; i < url.Length; i++) 01614 { 01615 c = url[i]; 01616 encode = Array.IndexOf(_problemUrlChars, c) != -1; 01617 if (encode && c == ':' && i < url.Length - 1) 01618 encode = !(url[i + 1] == '/') && !(url[i + 1] >= '0' && url[i + 1] <= '9'); 01619 01620 if (encode) 01621 sb.Append("%" + String.Format("{0:x}", (byte)c)); 01622 else 01623 sb.Append(c); 01624 } 01625 01626 return sb.ToString(); 01627 } 01628 01629 01637 private string EscapeSpecialCharsWithinTagAttributes(string text) 01638 { 01639 var tokens = TokenizeHTML(text); 01640 01641 // now, rebuild text from the tokens 01642 var sb = new StringBuilder(text.Length); 01643 01644 foreach (var token in tokens) 01645 { 01646 string value = token.Value; 01647 01648 if (token.Type == TokenType.Tag) 01649 { 01650 value = value.Replace(@"\", _escapeTable[@"\"]); 01651 value = Regex.Replace(value, "(?<=.)</?code>(?=.)", _escapeTable[@"`"]); 01652 value = EscapeBoldItalic(value); 01653 } 01654 01655 sb.Append(value); 01656 } 01657 01658 return sb.ToString(); 01659 } 01660 01667 private string Normalize(string text) 01668 { 01669 var output = new StringBuilder(text.Length); 01670 var line = new StringBuilder(); 01671 bool valid = false; 01672 01673 for (int i = 0; i < text.Length; i++) 01674 { 01675 switch (text[i]) 01676 { 01677 case '\n': 01678 if (valid) output.Append(line); 01679 output.Append('\n'); 01680 line.Length = 0; valid = false; 01681 break; 01682 case '\r': 01683 if ((i < text.Length - 1) && (text[i + 1] != '\n')) 01684 { 01685 if (valid) output.Append(line); 01686 output.Append('\n'); 01687 line.Length = 0; valid = false; 01688 } 01689 break; 01690 case '\t': 01691 int width = (_tabWidth - line.Length % _tabWidth); 01692 for (int k = 0; k < width; k++) 01693 line.Append(' '); 01694 break; 01695 case '\x1A': 01696 break; 01697 default: 01698 if (!valid && text[i] != ' ') valid = true; 01699 line.Append(text[i]); 01700 break; 01701 } 01702 } 01703 01704 if (valid) output.Append(line); 01705 output.Append('\n'); 01706 01707 // add two newlines to the end before return 01708 return output.Append("\n\n").ToString(); 01709 } 01710 01711 #endregion 01712 01716 private static string RepeatString(string text, int count) 01717 { 01718 var sb = new StringBuilder(text.Length * count); 01719 for (int i = 0; i < count; i++) 01720 sb.Append(text); 01721 return sb.ToString(); 01722 } 01723 01724 } 01725 }