MarkdownSharp: src/Markdown.cs ソースファイル

00001 /*
00002  * MarkdownSharp
00003  * -------------
00004  * a C# Markdown processor
00005  * 
00006  * Markdown is a text-to-HTML conversion tool for web writers
00007  * Copyright (c) 2004 John Gruber
00008  * http://daringfireball.net/projects/markdown/
00009  * 
00010  * Markdown.NET
00011  * Copyright (c) 2004-2009 Milan Negovan
00012  * http://www.aspnetresources.com
00013  * http://aspnetresources.com/blog/markdown_announced.aspx
00014  * 
00015  * MarkdownSharp
00016  * Copyright (c) 2009-2010 Jeff Atwood
00017  * http://stackoverflow.com
00018  * http://www.codinghorror.com/blog/
00019  * http://code.google.com/p/markdownsharp/
00020  * 
00021  * History: Milan ported the Markdown processor to C#. He granted license to me so I can open source it
00022  * and let the community contribute to and improve MarkdownSharp.
00023  * 
00024  */
00025 
00026 #region Copyright and license
00027 
00028 /*
00029 
00030 Copyright (c) 2009 - 2010 Jeff Atwood
00031 
00032 http://www.opensource.org/licenses/mit-license.php
00033   
00034 Permission is hereby granted, free of charge, to any person obtaining a copy
00035 of this software and associated documentation files (the "Software"), to deal
00036 in the Software without restriction, including without limitation the rights
00037 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00038 copies of the Software, and to permit persons to whom the Software is
00039 furnished to do so, subject to the following conditions:
00040 
00041 The above copyright notice and this permission notice shall be included in
00042 all copies or substantial portions of the Software.
00043 
00044 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00045 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00046 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00047 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00048 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00049 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00050 THE SOFTWARE.
00051 
00052 Copyright (c) 2003-2004 John Gruber
00053 <http://daringfireball.net/>   
00054 All rights reserved.
00055 
00056 Redistribution and use in source and binary forms, with or without
00057 modification, are permitted provided that the following conditions are
00058 met:
00059 
00060 * Redistributions of source code must retain the above copyright notice,
00061   this list of conditions and the following disclaimer.
00062 
00063 * Redistributions in binary form must reproduce the above copyright
00064   notice, this list of conditions and the following disclaimer in the
00065   documentation and/or other materials provided with the distribution.
00066 
00067 * Neither the name "Markdown" nor the names of its contributors may
00068   be used to endorse or promote products derived from this software
00069   without specific prior written permission.
00070 
00071 This software is provided by the copyright holders and contributors "as
00072 is" and any express or implied warranties, including, but not limited
00073 to, the implied warranties of merchantability and fitness for a
00074 particular purpose are disclaimed. In no event shall the copyright owner
00075 or contributors be liable for any direct, indirect, incidental, special,
00076 exemplary, or consequential damages (including, but not limited to,
00077 procurement of substitute goods or services; loss of use, data, or
00078 profits; or business interruption) however caused and on any theory of
00079 liability, whether in contract, strict liability, or tort (including
00080 negligence or otherwise) arising in any way out of the use of this
00081 software, even if advised of the possibility of such damage.
00082 */
00083 
00084 #endregion
00085 
00086 using System;
00087 using System.Collections.Generic;
00088 using System.Configuration;
00089 using System.Text;
00090 using System.Text.RegularExpressions;
00091 
00092 namespace MarkdownSharp
00093 {
00094 
00095     public class MarkdownOptions
00096     {
00101         public bool AutoHyperlink { get; set; }
00106         public bool AutoNewlines { get; set; }
00110         public string EmptyElementSuffix { get; set; }
00115         public bool EncodeProblemUrlCharacters { get; set; }
00120         public bool LinkEmails { get; set; }
00125         public bool StrictBoldItalic { get; set; }
00126     }
00127 
00128 
00134     public class Markdown
00135     {
00136         private const string _version = "1.13";
00137 
00138         #region Constructors and Options
00139 
00143         public Markdown() : this(false)
00144         {
00145         }
00146 
00159         public Markdown(bool loadOptionsFromConfigFile)
00160         {
00161             if (!loadOptionsFromConfigFile) return;
00162 
00163             var settings = ConfigurationManager.AppSettings;
00164             foreach (string key in settings.Keys)
00165             {
00166                 switch (key)
00167                 {
00168                     case "Markdown.AutoHyperlink":
00169                         _autoHyperlink = Convert.ToBoolean(settings[key]);
00170                         break;
00171                     case "Markdown.AutoNewlines":
00172                         _autoNewlines = Convert.ToBoolean(settings[key]);
00173                         break;
00174                     case "Markdown.EmptyElementSuffix":
00175                         _emptyElementSuffix = settings[key];
00176                         break;
00177                     case "Markdown.EncodeProblemUrlCharacters":
00178                         _encodeProblemUrlCharacters = Convert.ToBoolean(settings[key]);
00179                         break;
00180                     case "Markdown.LinkEmails":
00181                         _linkEmails = Convert.ToBoolean(settings[key]);
00182                         break;
00183                     case "Markdown.StrictBoldItalic":
00184                         _strictBoldItalic = Convert.ToBoolean(settings[key]);
00185                         break;
00186                 }
00187             }
00188         }
00189 
00193         public Markdown(MarkdownOptions options)
00194         {
00195             _autoHyperlink = options.AutoHyperlink;
00196             _autoNewlines = options.AutoNewlines;
00197             _emptyElementSuffix = options.EmptyElementSuffix;
00198             _encodeProblemUrlCharacters = options.EncodeProblemUrlCharacters;
00199             _linkEmails = options.LinkEmails;
00200             _strictBoldItalic = options.StrictBoldItalic;
00201         }
00202 
00203 
00207         public string EmptyElementSuffix
00208         {
00209             get { return _emptyElementSuffix; }
00210             set { _emptyElementSuffix = value; }
00211         }
00212         private string _emptyElementSuffix = " />";
00213 
00218         public bool LinkEmails
00219         {
00220             get { return _linkEmails; }
00221             set { _linkEmails = value; }
00222         }
00223         private bool _linkEmails = true;
00224 
00229         public bool StrictBoldItalic
00230         {
00231             get { return _strictBoldItalic; }
00232             set { _strictBoldItalic = value; }
00233         }
00234         private bool _strictBoldItalic = false;
00235 
00240         public bool AutoNewLines
00241         {
00242             get { return _autoNewlines; }
00243             set { _autoNewlines = value; }
00244         }
00245         private bool _autoNewlines = false;
00246 
00251         public bool AutoHyperlink
00252         {
00253             get { return _autoHyperlink; }
00254             set { _autoHyperlink = value; }
00255         }
00256         private bool _autoHyperlink = false;
00257 
00262         public bool EncodeProblemUrlCharacters
00263         {
00264             get { return _encodeProblemUrlCharacters; }
00265             set { _encodeProblemUrlCharacters = value; }
00266         }
00267         private bool _encodeProblemUrlCharacters = false;
00268 
00269         #endregion
00270 
00271         private enum TokenType { Text, Tag }
00272 
00273         private struct Token
00274         {
00275             public Token(TokenType type, string value)
00276             {
00277                 this.Type = type;
00278                 this.Value = value;
00279             }
00280             public TokenType Type;
00281             public string Value;
00282         }
00283 
00287         private const int _nestDepth = 6;
00288 
00293         private const int _tabWidth = 4;
00294 
00295         private const string _markerUL = @"[*+-]";
00296         private const string _markerOL = @"\d+[.]";
00297 
00298         private static readonly Dictionary<string, string> _escapeTable;
00299         private static readonly Dictionary<string, string> _invertedEscapeTable;
00300         private static readonly Dictionary<string, string> _backslashEscapeTable;        
00301 
00302         private readonly Dictionary<string, string> _urls = new Dictionary<string, string>();
00303         private readonly Dictionary<string, string> _titles = new Dictionary<string, string>();
00304         private readonly Dictionary<string, string> _htmlBlocks = new Dictionary<string, string>();
00305 
00306         private int _listLevel;
00307 
00311         static Markdown()
00312         {
00313             // Table of hash values for escaped characters:
00314             _escapeTable = new Dictionary<string, string>();
00315             _invertedEscapeTable = new Dictionary<string, string>();
00316             // Table of hash value for backslash escaped characters:
00317             _backslashEscapeTable = new Dictionary<string, string>();
00318 
00319             string backslashPattern = "";
00320 
00321             foreach (char c in @"\`*_{}[]()>#+-.!")
00322             {
00323                 string key = c.ToString();
00324                 string hash = GetHashKey(key);
00325                 _escapeTable.Add(key, hash);
00326                 _invertedEscapeTable.Add(hash, key);
00327                 _backslashEscapeTable.Add(@"\" + key, hash);
00328                 backslashPattern += Regex.Escape(@"\" + key) + "|";
00329             }
00330 
00331             _backslashEscapes = new Regex(backslashPattern.Substring(0, backslashPattern.Length - 1), RegexOptions.Compiled);
00332         }
00333 
00338         public string Version
00339         {
00340             get { return _version; }
00341         }
00342 
00353         public string Transform(string text)
00354         {
00355             if (String.IsNullOrEmpty(text)) return "";
00356 
00357             Setup();
00358 
00359             text = Normalize(text);
00360            
00361             text = HashHTMLBlocks(text);
00362             text = StripLinkDefinitions(text);
00363             text = RunBlockGamut(text);
00364             text = Unescape(text);
00365 
00366             Cleanup();
00367 
00368             return text + "\n";
00369         }
00370 
00371 
00375         private string RunBlockGamut(string text)
00376         {
00377             text = DoHeaders(text);
00378             text = DoHorizontalRules(text);
00379             text = DoLists(text);
00380             text = DoCodeBlocks(text);
00381             text = DoBlockQuotes(text);
00382 
00383             // We already ran HashHTMLBlocks() before, in Markdown(), but that
00384             // was to escape raw HTML in the original Markdown source. This time,
00385             // we're escaping the markup we've just created, so that we don't wrap
00386             // <p> tags around block-level tags.
00387             text = HashHTMLBlocks(text);
00388 
00389             text = FormParagraphs(text);
00390 
00391             return text;
00392         }
00393 
00394 
00398         private string RunSpanGamut(string text)
00399         {
00400             text = DoCodeSpans(text);
00401             text = EscapeSpecialCharsWithinTagAttributes(text);
00402             text = EscapeBackslashes(text);
00403 
00404             // Images must come first, because ![foo][f] looks like an anchor.
00405             text = DoImages(text);
00406             text = DoAnchors(text);
00407 
00408             // Must come after DoAnchors(), because you can use < and >
00409             // delimiters in inline links like [this](<url>).
00410             text = DoAutoLinks(text);
00411 
00412             text = EncodeAmpsAndAngles(text);
00413             text = DoItalicsAndBold(text);
00414             text = DoHardBreaks(text);
00415 
00416             return text;
00417         }
00418 
00419         private static Regex _newlinesLeadingTrailing = new Regex(@"^\n+|\n+\z", RegexOptions.Compiled);
00420         private static Regex _newlinesMultiple = new Regex(@"\n{2,}", RegexOptions.Compiled);
00421         private static Regex _leadingWhitespace = new Regex(@"^[ ]*", RegexOptions.Compiled);
00422 
00427         private string FormParagraphs(string text)
00428         {
00429             // split on two or more newlines
00430             string[] grafs = _newlinesMultiple.Split(_newlinesLeadingTrailing.Replace(text, ""));
00431             
00432             for (int i = 0; i < grafs.Length; i++)
00433             {
00434                 if (grafs[i].StartsWith("\x1A"))
00435                 {
00436                     // unhashify HTML blocks
00437                     grafs[i] = _htmlBlocks[grafs[i]];
00438                 }
00439                 else
00440                 {
00441                     // do span level processing inside the block, then wrap result in <p> tags
00442                     grafs[i] = _leadingWhitespace.Replace(RunSpanGamut(grafs[i]), "<p>") + "</p>";
00443                 }
00444             }
00445 
00446             return string.Join("\n\n", grafs);
00447         }
00448 
00449 
00450         private void Setup()
00451         {
00452             // Clear the global hashes. If we don't clear these, you get conflicts
00453             // from other articles when generating a page which contains more than
00454             // one article (e.g. an index page that shows the N most recent
00455             // articles):
00456             _urls.Clear();
00457             _titles.Clear();
00458             _htmlBlocks.Clear();
00459             _listLevel = 0;
00460         }
00461 
00462         private void Cleanup()
00463         {
00464             Setup();
00465         }
00466 
00467         private static string _nestedBracketsPattern;
00468 
00473         private static string GetNestedBracketsPattern()
00474         {
00475             // in other words [this] and [this[also]] and [this[also[too]]]
00476             // up to _nestDepth
00477             if (_nestedBracketsPattern == null)
00478                 _nestedBracketsPattern =
00479                     RepeatString(@"
00480                     (?>              # Atomic matching
00481                        [^\[\]]+      # Anything other than brackets
00482                      |
00483                        \[
00484                            ", _nestDepth) + RepeatString(
00485                     @" \]
00486                     )*"
00487                     , _nestDepth);
00488             return _nestedBracketsPattern;
00489         }
00490 
00491         private static string _nestedParensPattern;
00492 
00497         private static string GetNestedParensPattern()
00498         {
00499             // in other words (this) and (this(also)) and (this(also(too)))
00500             // up to _nestDepth
00501             if (_nestedParensPattern == null)
00502                 _nestedParensPattern =
00503                     RepeatString(@"
00504                     (?>              # Atomic matching
00505                        [^()\s]+      # Anything other than parens or whitespace
00506                      |
00507                        \(
00508                            ", _nestDepth) + RepeatString(
00509                     @" \)
00510                     )*"
00511                     , _nestDepth);
00512             return _nestedParensPattern;
00513         }
00514 
00515         private static Regex _linkDef = new Regex(string.Format(@"
00516                         ^[ ]{{0,{0}}}\[(.+)\]:  # id = $1
00517                           [ ]*
00518                           \n?                   # maybe *one* newline
00519                           [ ]*
00520                         <?(\S+?)>?              # url = $2
00521                           [ ]*
00522                           \n?                   # maybe one newline
00523                           [ ]*
00524                         (?:
00525                             (?<=\s)             # lookbehind for whitespace
00526                             [""(]
00527                             (.+?)               # title = $3
00528                             ["")]
00529                             [ ]*
00530                         )?                      # title is optional
00531                         (?:\n+|\Z)", _tabWidth - 1), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
00532 
00539         private string StripLinkDefinitions(string text)
00540         {
00541             return _linkDef.Replace(text, new MatchEvaluator(LinkEvaluator));
00542         }
00543 
00544         private string LinkEvaluator(Match match)
00545         {
00546             string linkID = match.Groups[1].Value.ToLowerInvariant();
00547             _urls[linkID] = EncodeAmpsAndAngles(match.Groups[2].Value);
00548 
00549             if (match.Groups[3] != null && match.Groups[3].Length > 0)
00550                 _titles[linkID] = match.Groups[3].Value.Replace("\"", "&quot;");
00551 
00552             return "";
00553         }
00554 
00555         // compiling this monster regex results in worse performance. trust me.
00556         private static Regex _blocksHtml = new Regex(GetBlockPattern(), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
00557 
00558 
00562         private static string GetBlockPattern()
00563         {
00564 
00565             // Hashify HTML blocks:
00566             // We only want to do this for block-level HTML tags, such as headers,
00567             // lists, and tables. That's because we still want to wrap <p>s around
00568             // "paragraphs" that are wrapped in non-block-level tags, such as anchors,
00569             // phrase emphasis, and spans. The list of tags we're looking for is
00570             // hard-coded:
00571             //
00572             // *  List "a" is made of tags which can be both inline or block-level.
00573             //    These will be treated block-level when the start tag is alone on 
00574             //    its line, otherwise they're not matched here and will be taken as 
00575             //    inline later.
00576             // *  List "b" is made of tags which are always block-level;
00577             //
00578             string blockTagsA = "ins|del";
00579             string blockTagsB = "p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|script|noscript|form|fieldset|iframe|math";
00580 
00581             // Regular expression for the content of a block tag.
00582             string attr = @"
00583             (?>                         # optional tag attributes
00584               \s                        # starts with whitespace
00585               (?>
00586                 [^>""/]+                # text outside quotes
00587               |
00588                 /+(?!>)                 # slash not followed by >
00589               |
00590                 ""[^""]*""              # text inside double quotes (tolerate >)
00591               |
00592                 '[^']*'                 # text inside single quotes (tolerate >)
00593               )*
00594             )?  
00595             ";
00596 
00597             string content = RepeatString(@"
00598                 (?>
00599                   [^<]+                 # content without tag
00600                 |
00601                   <\2                   # nested opening tag
00602                     " + attr + @"       # attributes
00603                   (?>
00604                       />
00605                   |
00606                       >", _nestDepth) +   // end of opening tag
00607                       ".*?" +             // last level nested tag content
00608             RepeatString(@"
00609                       </\2\s*>          # closing nested tag
00610                   )
00611                   |             
00612                   <(?!/\2\s*>           # other tags with a different name
00613                   )
00614                 )*", _nestDepth);
00615 
00616             string content2 = content.Replace(@"\2", @"\3");
00617 
00618             // First, look for nested blocks, e.g.:
00619             //  <div>
00620             //      <div>
00621             //      tags for inner block must be indented.
00622             //      </div>
00623             //  </div>
00624             //
00625             // The outermost tags must start at the left margin for this to match, and
00626             // the inner nested divs must be indented.
00627             // We need to do this before the next, more liberal match, because the next
00628             // match will start at the first `<div>` and stop at the first `</div>`.
00629             string pattern = @"
00630             (?>
00631                   (?>
00632                     (?<=\n)     # Starting after a blank line
00633                     |           # or
00634                     \A\n?       # the beginning of the doc
00635                   )
00636                   (             # save in $1
00637 
00638                     # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
00639                     # in between.
00640                       
00641                         [ ]{0,$less_than_tab}
00642                         <($block_tags_b_re)   # start tag = $2
00643                         $attr>                # attributes followed by > and \n
00644                         $content              # content, support nesting
00645                         </\2>                 # the matching end tag
00646                         [ ]*                  # trailing spaces
00647                         (?=\n+|\Z)            # followed by a newline or end of document
00648 
00649                   | # Special version for tags of group a.
00650 
00651                         [ ]{0,$less_than_tab}
00652                         <($block_tags_a_re)   # start tag = $3
00653                         $attr>[ ]*\n          # attributes followed by >
00654                         $content2             # content, support nesting
00655                         </\3>                 # the matching end tag
00656                         [ ]*                  # trailing spaces
00657                         (?=\n+|\Z)            # followed by a newline or end of document
00658                       
00659                   | # Special case just for <hr />. It was easier to make a special 
00660                     # case than to make the other regex more complicated.
00661                   
00662                         [ ]{0,$less_than_tab}
00663                         <(hr)                 # start tag = $2
00664                         $attr                 # attributes
00665                         /?>                   # the matching end tag
00666                         [ ]*
00667                         (?=\n{2,}|\Z)         # followed by a blank line or end of document
00668                   
00669                   | # Special case for standalone HTML comments:
00670                   
00671                       [ ]{0,$less_than_tab}
00672                       (?s:
00673                         <!-- .*? -->
00674                       )
00675                       [ ]*
00676                       (?=\n{2,}|\Z)            # followed by a blank line or end of document
00677                   
00678                   | # PHP and ASP-style processor instructions (<? and <%)
00679                   
00680                       [ ]{0,$less_than_tab}
00681                       (?s:
00682                         <([?%])                # $2
00683                         .*?
00684                         \2>
00685                       )
00686                       [ ]*
00687                       (?=\n{2,}|\Z)            # followed by a blank line or end of document
00688                       
00689                   )
00690             )";
00691 
00692             pattern = pattern.Replace("$less_than_tab", (_tabWidth - 1).ToString());
00693             pattern = pattern.Replace("$block_tags_b_re", blockTagsB);
00694             pattern = pattern.Replace("$block_tags_a_re", blockTagsA);
00695             pattern = pattern.Replace("$attr", attr);
00696             pattern = pattern.Replace("$content2", content2);
00697             pattern = pattern.Replace("$content", content);
00698 
00699             return pattern;
00700         }
00701 
00705         private string HashHTMLBlocks(string text)
00706         {
00707             return _blocksHtml.Replace(text, new MatchEvaluator(HtmlEvaluator));
00708         }
00709 
00710         private string HtmlEvaluator(Match match)
00711         {
00712             string text = match.Groups[1].Value;
00713             string key = GetHashKey(text);
00714             _htmlBlocks[key] = text;
00715 
00716             return string.Concat("\n\n", key, "\n\n");
00717         }
00718 
00719         private static string GetHashKey(string s)
00720         {
00721             return "\x1A" + Math.Abs(s.GetHashCode()).ToString() + "\x1A";
00722         }
00723 
00724         private static Regex _htmlTokens = new Regex(@"
00725             (<!(?:--.*?--\s*)+>)|        # match <!-- foo -->
00726             (<\?.*?\?>)|                 # match <?foo?> " +
00727             RepeatString(@" 
00728             (<[A-Za-z\/!$](?:[^<>]|", _nestDepth) + RepeatString(@")*>)", _nestDepth) +
00729                                        " # match <tag> and </tag>",
00730             RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.ExplicitCapture | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
00731 
00739         private List<Token> TokenizeHTML(string text)
00740         {
00741             int pos = 0;
00742             int tagStart = 0;
00743             var tokens = new List<Token>();
00744 
00745             // this regex is derived from the _tokenize() subroutine in Brad Choate's MTRegex plugin.
00746             // http://www.bradchoate.com/past/mtregex.php
00747             foreach (Match m in _htmlTokens.Matches(text))
00748             {
00749                 tagStart = m.Index;
00750 
00751                 if (pos < tagStart)
00752                     tokens.Add(new Token(TokenType.Text, text.Substring(pos, tagStart - pos)));
00753 
00754                 tokens.Add(new Token(TokenType.Tag, m.Value));
00755                 pos = tagStart + m.Length;
00756             }
00757 
00758             if (pos < text.Length)
00759                 tokens.Add(new Token(TokenType.Text, text.Substring(pos, text.Length - pos)));
00760 
00761             return tokens;
00762         }
00763 
00764 
00765         private static Regex _anchorRef = new Regex(string.Format(@"
00766             (                               # wrap whole match in $1
00767                 \[
00768                     ({0})                   # link text = $2
00769                 \]
00770 
00771                 [ ]?                        # one optional space
00772                 (?:\n[ ]*)?                 # one optional newline followed by spaces
00773 
00774                 \[
00775                     (.*?)                   # id = $3
00776                 \]
00777             )", GetNestedBracketsPattern()), RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
00778 
00779         private static Regex _anchorInline = new Regex(string.Format(@"
00780                 (                           # wrap whole match in $1
00781                     \[
00782                         ({0})               # link text = $2
00783                     \]
00784                     \(                      # literal paren
00785                         [ ]*
00786                         ({1})               # href = $3
00787                         [ ]*
00788                         (                   # $4
00789                         (['""])           # quote char = $5
00790                         (.*?)               # title = $6
00791                         \5                  # matching quote
00792                         [ ]*                # ignore any spaces between closing quote and )
00793                         )?                  # title is optional
00794                     \)
00795                 )", GetNestedBracketsPattern(), GetNestedParensPattern()),
00796                   RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
00797 
00798         private static Regex _anchorRefShortcut = new Regex(@"
00799             (                               # wrap whole match in $1
00800               \[
00801                  ([^\[\]]+)                 # link text = $2; can't contain [ or ]
00802               \]
00803             )", RegexOptions.Singleline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
00804 
00813         private string DoAnchors(string text)
00814         {
00815             // First, handle reference-style links: [link text] [id]
00816             text = _anchorRef.Replace(text, new MatchEvaluator(AnchorRefEvaluator));
00817 
00818             // Next, inline-style links: [link text](url "optional title") or [link text](url "optional title")
00819             text = _anchorInline.Replace(text, new MatchEvaluator(AnchorInlineEvaluator));
00820 
00821             //  Last, handle reference-style shortcuts: [link text]
00822             //  These must come last in case you've also got [link test][1]
00823             //  or [link test](/foo)
00824             text = _anchorRefShortcut.Replace(text, new MatchEvaluator(AnchorRefShortcutEvaluator));
00825             return text;
00826         }
00827 
00828         private string AnchorRefEvaluator(Match match)
00829         {
00830             string wholeMatch = match.Groups[1].Value;
00831             string linkText = match.Groups[2].Value;
00832             string linkID = match.Groups[3].Value.ToLowerInvariant();
00833 
00834             string result;
00835 
00836             // for shortcut links like [this][].
00837             if (linkID == "")
00838                 linkID = linkText.ToLowerInvariant();
00839 
00840             if (_urls.ContainsKey(linkID))
00841             {
00842                 string url = _urls[linkID];
00843 
00844                 url = EncodeProblemUrlChars(url);
00845                 url = EscapeBoldItalic(url);                
00846                 result = "<a href=\"" + url + "\"";
00847 
00848                 if (_titles.ContainsKey(linkID))
00849                 {
00850                     string title = _titles[linkID];
00851                     title = EscapeBoldItalic(title);
00852                     result += " title=\"" + title + "\"";
00853                 }
00854 
00855                 result += ">" + linkText + "</a>";
00856             }
00857             else
00858                 result = wholeMatch;
00859 
00860             return result;
00861         }
00862 
00863         private string AnchorRefShortcutEvaluator(Match match)
00864         {
00865             string wholeMatch = match.Groups[1].Value;
00866             string linkText = match.Groups[2].Value;
00867             string linkID = Regex.Replace(linkText.ToLowerInvariant(), @"[ ]*\n[ ]*", " ");  // lower case and remove newlines / extra spaces
00868 
00869             string result;
00870 
00871             if (_urls.ContainsKey(linkID))
00872             {
00873                 string url = _urls[linkID];
00874 
00875                 url = EncodeProblemUrlChars(url);
00876                 url = EscapeBoldItalic(url);                
00877                 result = "<a href=\"" + url + "\"";
00878 
00879                 if (_titles.ContainsKey(linkID))
00880                 {
00881                     string title = _titles[linkID];
00882                     title = EscapeBoldItalic(title);
00883                     result += " title=\"" + title + "\"";
00884                 }
00885 
00886                 result += ">" + linkText + "</a>";
00887             }
00888             else
00889                 result = wholeMatch;
00890 
00891             return result;
00892         }
00893 
00894 
00895         private string AnchorInlineEvaluator(Match match)
00896         {
00897             string linkText = match.Groups[2].Value;
00898             string url = match.Groups[3].Value;
00899             string title = match.Groups[6].Value;
00900             string result;
00901 
00902             url = EncodeProblemUrlChars(url);
00903             url = EscapeBoldItalic(url);
00904             if (url.StartsWith("<") && url.EndsWith(">"))
00905                 url = url.Substring(1, url.Length - 2); // remove <>'s surrounding URL, if present            
00906 
00907             result = string.Format("<a href=\"{0}\"", url);
00908 
00909             if (!String.IsNullOrEmpty(title))
00910             {
00911                 title = title.Replace("\"", "&quot;");
00912                 title = EscapeBoldItalic(title);
00913                 result += string.Format(" title=\"{0}\"", title);
00914             }
00915 
00916             result += string.Format(">{0}</a>", linkText);
00917             return result;
00918         }
00919 
00920         private static Regex _imagesRef = new Regex(@"
00921                     (               # wrap whole match in $1
00922                     !\[
00923                         (.*?)       # alt text = $2
00924                     \]
00925 
00926                     [ ]?            # one optional space
00927                     (?:\n[ ]*)?     # one optional newline followed by spaces
00928 
00929                     \[
00930                         (.*?)       # id = $3
00931                     \]
00932 
00933                     )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
00934 
00935         private static Regex _imagesInline = new Regex(String.Format(@"
00936               (                     # wrap whole match in $1
00937                 !\[
00938                     (.*?)           # alt text = $2
00939                 \]
00940                 \s?                 # one optional whitespace character
00941                 \(                  # literal paren
00942                     [ ]*
00943                     ({0})           # href = $3
00944                     [ ]*
00945                     (               # $4
00946                     (['""])       # quote char = $5
00947                     (.*?)           # title = $6
00948                     \5              # matching quote
00949                     [ ]*
00950                     )?              # title is optional
00951                 \)
00952               )", GetNestedParensPattern()),
00953                   RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
00954 
00962         private string DoImages(string text)
00963         {
00964             // First, handle reference-style labeled images: ![alt text][id]
00965             text = _imagesRef.Replace(text, new MatchEvaluator(ImageReferenceEvaluator));
00966 
00967             // Next, handle inline images:  ![alt text](url "optional title")
00968             // Don't forget: encode * and _
00969             text = _imagesInline.Replace(text, new MatchEvaluator(ImageInlineEvaluator));
00970 
00971             return text;
00972         }
00973 
00974         private string ImageReferenceEvaluator(Match match)
00975         {
00976             string wholeMatch = match.Groups[1].Value;
00977             string altText = match.Groups[2].Value;
00978             string linkID = match.Groups[3].Value.ToLowerInvariant();
00979             string result;
00980 
00981             // for shortcut links like ![this][].
00982             if (linkID == "")
00983                 linkID = altText.ToLowerInvariant();
00984 
00985             altText = altText.Replace("\"", "&quot;");
00986 
00987             if (_urls.ContainsKey(linkID))
00988             {
00989                 string url = _urls[linkID];
00990                 url = EncodeProblemUrlChars(url);
00991                 url = EscapeBoldItalic(url);                
00992                 result = string.Format("<img src=\"{0}\" alt=\"{1}\"", url, altText);
00993 
00994                 if (_titles.ContainsKey(linkID))
00995                 {
00996                     string title = _titles[linkID];
00997                     title = EscapeBoldItalic(title);
00998 
00999                     result += string.Format(" title=\"{0}\"", title);
01000                 }
01001 
01002                 result += _emptyElementSuffix;
01003             }
01004             else
01005             {
01006                 // If there's no such link ID, leave intact:
01007                 result = wholeMatch;
01008             }
01009 
01010             return result;
01011         }
01012 
01013         private string ImageInlineEvaluator(Match match)
01014         {
01015             string alt = match.Groups[2].Value;
01016             string url = match.Groups[3].Value;
01017             string title = match.Groups[6].Value;
01018             string result;
01019 
01020             alt = alt.Replace("\"", "&quot;");
01021             title = title.Replace("\"", "&quot;");
01022             
01023             if (url.StartsWith("<") && url.EndsWith(">"))
01024                 url = url.Substring(1, url.Length - 2);    // Remove <>'s surrounding URL, if present
01025             url = EncodeProblemUrlChars(url);
01026             url = EscapeBoldItalic(url);
01027 
01028             result = string.Format("<img src=\"{0}\" alt=\"{1}\"", url, alt);
01029 
01030             if (!String.IsNullOrEmpty(title))
01031             {
01032                 title = EscapeBoldItalic(title);
01033                 result += string.Format(" title=\"{0}\"", title);
01034             }
01035 
01036             result += _emptyElementSuffix;
01037 
01038             return result;
01039         }
01040 
01041         private static Regex _headerSetext = new Regex(@"
01042                 ^(.+?)
01043                 [ ]*
01044                 \n
01045                 (=+|-+)     # $1 = string of ='s or -'s
01046                 [ ]*
01047                 \n+",
01048             RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
01049 
01050         private static Regex _headerAtx = new Regex(@"
01051                 ^(\#{1,6})  # $1 = string of #'s
01052                 [ ]*
01053                 (.+?)       # $2 = Header text
01054                 [ ]*
01055                 \#*         # optional closing #'s (not counted)
01056                 \n+",
01057             RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
01058 
01075         private string DoHeaders(string text)
01076         {
01077             text = _headerSetext.Replace(text, new MatchEvaluator(SetextHeaderEvaluator));
01078             text = _headerAtx.Replace(text, new MatchEvaluator(AtxHeaderEvaluator));
01079             return text;
01080         }
01081 
01082         private string SetextHeaderEvaluator(Match match)
01083         {
01084             string header = match.Groups[1].Value;
01085             int level = match.Groups[2].Value.StartsWith("=") ? 1 : 2;
01086             return string.Format("<h{1}>{0}</h{1}>\n\n", RunSpanGamut(header), level);
01087         }
01088 
01089         private string AtxHeaderEvaluator(Match match)
01090         {
01091             string header = match.Groups[2].Value;
01092             int level = match.Groups[1].Value.Length;
01093             return string.Format("<h{1}>{0}</h{1}>\n\n", RunSpanGamut(header), level);
01094         }
01095 
01096 
01097         private static Regex _horizontalRules = new Regex(@"
01098             ^[ ]{0,3}         # Leading space
01099                 ([-*_])       # $1: First marker
01100                 (?>           # Repeated marker group
01101                     [ ]{0,2}  # Zero, one, or two spaces.
01102                     \1        # Marker character
01103                 ){2,}         # Group repeated at least twice
01104                 [ ]*          # Trailing spaces
01105                 $             # End of line.
01106             ", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
01107 
01117         private string DoHorizontalRules(string text)
01118         {
01119             return _horizontalRules.Replace(text, "<hr" + _emptyElementSuffix + "\n");
01120         }
01121 
01122         private static string _wholeList = string.Format(@"
01123             (                               # $1 = whole list
01124               (                             # $2
01125                 [ ]{{0,{1}}}
01126                 ({0})                       # $3 = first list item marker
01127                 [ ]+
01128               )
01129               (?s:.+?)
01130               (                             # $4
01131                   \z
01132                 |
01133                   \n{{2,}}
01134                   (?=\S)
01135                   (?!                       # Negative lookahead for another list item marker
01136                     [ ]*
01137                     {0}[ ]+
01138                   )
01139               )
01140             )", string.Format("(?:{0}|{1})", _markerUL, _markerOL), _tabWidth - 1);
01141 
01142         private static Regex _listNested = new Regex(@"^" + _wholeList,
01143             RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
01144 
01145         private static Regex _listTopLevel = new Regex(@"(?:(?<=\n\n)|\A\n?)" + _wholeList,
01146             RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
01147 
01151         private string DoLists(string text)
01152         {
01153             // We use a different prefix before nested lists than top-level lists.
01154             // See extended comment in _ProcessListItems().
01155             if (_listLevel > 0)
01156                 text = _listNested.Replace(text, new MatchEvaluator(ListEvaluator));
01157             else
01158                 text = _listTopLevel.Replace(text, new MatchEvaluator(ListEvaluator));
01159 
01160             return text;
01161         }
01162 
01163         private string ListEvaluator(Match match)
01164         {
01165             string list = match.Groups[1].Value;
01166             string listType = Regex.IsMatch(match.Groups[3].Value, _markerUL) ? "ul" : "ol";
01167             string result;
01168 
01169             // Turn double returns into triple returns, so that we can make a
01170             // paragraph for the last item in a list, if necessary:
01171             list = Regex.Replace(list, @"\n{2,}", "\n\n\n");
01172             result = ProcessListItems(list, listType == "ul" ? _markerUL : _markerOL);
01173 
01174             result = string.Format("<{0}>\n{1}</{0}>\n", listType, result);
01175             return result;
01176         }
01177 
01182         private string ProcessListItems(string list, string marker)
01183         {
01184             // The listLevel global keeps track of when we're inside a list.
01185             // Each time we enter a list, we increment it; when we leave a list,
01186             // we decrement. If it's zero, we're not in a list anymore.
01187 
01188             // We do this because when we're not inside a list, we want to treat
01189             // something like this:
01190 
01191             //    I recommend upgrading to version
01192             //    8. Oops, now this line is treated
01193             //    as a sub-list.
01194 
01195             // As a single paragraph, despite the fact that the second line starts
01196             // with a digit-period-space sequence.
01197 
01198             // Whereas when we're inside a list (or sub-list), that line will be
01199             // treated as the start of a sub-list. What a kludge, huh? This is
01200             // an aspect of Markdown's syntax that's hard to parse perfectly
01201             // without resorting to mind-reading. Perhaps the solution is to
01202             // change the syntax rules such that sub-lists must start with a
01203             // starting cardinal number; e.g. "1." or "a.".
01204 
01205             _listLevel++;
01206 
01207             // Trim trailing blank lines:
01208             list = Regex.Replace(list, @"\n{2,}\z", "\n");
01209 
01210             string pattern = string.Format(
01211               @"(\n)?                      # leading line = $1
01212                 (^[ ]*)                    # leading whitespace = $2
01213                 ({0}) [ ]+                 # list marker = $3
01214                 ((?s:.+?)                  # list item text = $4
01215                 (\n{{1,2}}))      
01216                 (?= \n* (\z | \2 ({0}) [ ]+))", marker);
01217 
01218             list = Regex.Replace(list, pattern, new MatchEvaluator(ListItemEvaluator),
01219                                   RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
01220             _listLevel--;
01221             return list;
01222         }
01223 
01224         private string ListItemEvaluator(Match match)
01225         {
01226             string item = match.Groups[4].Value;
01227             string leadingLine = match.Groups[1].Value;
01228 
01229             if (!String.IsNullOrEmpty(leadingLine) || Regex.IsMatch(item, @"\n{2,}"))
01230                 // we could correct any bad indentation here..
01231                 item = RunBlockGamut(Outdent(item) + "\n");
01232             else
01233             {
01234                 // recursion for sub-lists
01235                 item = DoLists(Outdent(item));
01236                 item = item.TrimEnd('\n');
01237                 item = RunSpanGamut(item);
01238             }
01239 
01240             return string.Format("<li>{0}</li>\n", item);
01241         }
01242 
01243 
01244         private static Regex _codeBlock = new Regex(string.Format(@"
01245                     (?:\n\n|\A\n?)
01246                     (                        # $1 = the code block -- one or more lines, starting with a space
01247                     (?:
01248                         (?:[ ]{{{0}}})       # Lines must start with a tab-width of spaces
01249                         .*\n+
01250                     )+
01251                     )
01252                     ((?=^[ ]{{0,{0}}}\S)|\Z) # Lookahead for non-space at line-start, or end of doc",
01253                     _tabWidth), RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.Compiled);
01254 
01258         private string DoCodeBlocks(string text)
01259         {
01260             text = _codeBlock.Replace(text, new MatchEvaluator(CodeBlockEvaluator));
01261             return text;
01262         }
01263 
01264         private string CodeBlockEvaluator(Match match)
01265         {
01266             string codeBlock = match.Groups[1].Value;
01267 
01268             codeBlock = EncodeCode(Outdent(codeBlock));
01269             codeBlock = _newlinesLeadingTrailing.Replace(codeBlock, "");
01270 
01271             return string.Concat("\n\n<pre><code>", codeBlock, "\n</code></pre>\n\n");
01272         }
01273 
01274         private static Regex _codeSpan = new Regex(@"
01275                     (?<!\\)   # Character before opening ` can't be a backslash
01276                     (`+)      # $1 = Opening run of `
01277                     (.+?)     # $2 = The code block
01278                     (?<!`)
01279                     \1
01280                     (?!`)", RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
01281 
01285         private string DoCodeSpans(string text)
01286         {
01287             //    * You can use multiple backticks as the delimiters if you want to
01288             //        include literal backticks in the code span. So, this input:
01289             //
01290             //        Just type ``foo `bar` baz`` at the prompt.
01291             //
01292             //        Will translate to:
01293             //
01294             //          <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
01295             //
01296             //        There's no arbitrary limit to the number of backticks you
01297             //        can use as delimters. If you need three consecutive backticks
01298             //        in your code, use four for delimiters, etc.
01299             //
01300             //    * You can use spaces to get literal backticks at the edges:
01301             //
01302             //          ... type `` `bar` `` ...
01303             //
01304             //        Turns to:
01305             //
01306             //          ... type <code>`bar`</code> ...         
01307             //
01308 
01309             return _codeSpan.Replace(text, new MatchEvaluator(CodeSpanEvaluator));
01310         }
01311 
01312         private string CodeSpanEvaluator(Match match)
01313         {
01314             string span = match.Groups[2].Value;
01315             span = Regex.Replace(span, @"^[ ]*", ""); // leading whitespace
01316             span = Regex.Replace(span, @"[ ]*$", ""); // trailing whitespace
01317             span = EncodeCode(span);
01318 
01319             return string.Concat("<code>", span, "</code>");
01320         }
01321 
01322 
01323         private static Regex _bold = new Regex(@"(\*\*|__) (?=\S) (.+?[*_]*) (?<=\S) \1",
01324             RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
01325         private static Regex _strictBold = new Regex(@"([\W_]|^) (\*\*|__) (?=\S) ([^\r]*?\S[\*_]*) \2 ([\W_]|$)",
01326             RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
01327 
01328         private static Regex _italic = new Regex(@"(\*|_) (?=\S) (.+?) (?<=\S) \1",
01329             RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
01330         private static Regex _strictItalic = new Regex(@"([\W_]|^) (\*|_) (?=\S) ([^\r\*_]*?\S) \2 ([\W_]|$)",
01331             RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline | RegexOptions.Compiled);
01332 
01336         private string DoItalicsAndBold(string text)
01337         {
01338 
01339             // <strong> must go first, then <em>
01340             if (_strictBoldItalic)
01341             {
01342                 text = _strictBold.Replace(text, "$1<strong>$3</strong>$4");
01343                 text = _strictItalic.Replace(text, "$1<em>$3</em>$4");
01344             }
01345             else
01346             {
01347                 text = _bold.Replace(text, "<strong>$2</strong>");
01348                 text = _italic.Replace(text, "<em>$2</em>");
01349             }
01350             return text;
01351         }
01352 
01356         private string DoHardBreaks(string text)
01357         {
01358             if (_autoNewlines)
01359                 text = Regex.Replace(text, @"\n", string.Format("<br{0}\n", _emptyElementSuffix));
01360             else
01361                 text = Regex.Replace(text, @" {2,}\n", string.Format("<br{0}\n", _emptyElementSuffix));
01362             return text;
01363         }
01364 
01365         private static Regex _blockquote = new Regex(@"
01366             (                           # Wrap whole match in $1
01367                 (
01368                 ^[ ]*>[ ]?              # '>' at the start of a line
01369                     .+\n                # rest of the first line
01370                 (.+\n)*                 # subsequent consecutive lines
01371                 \n*                     # blanks
01372                 )+
01373             )", RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline | RegexOptions.Compiled);
01374 
01378         private string DoBlockQuotes(string text)
01379         {
01380             return _blockquote.Replace(text, new MatchEvaluator(BlockQuoteEvaluator));
01381         }
01382 
01383         private string BlockQuoteEvaluator(Match match)
01384         {
01385             string bq = match.Groups[1].Value;
01386 
01387             bq = Regex.Replace(bq, @"^[ ]*>[ ]?", "", RegexOptions.Multiline);       // trim one level of quoting
01388             bq = Regex.Replace(bq, @"^[ ]+$", "", RegexOptions.Multiline);           // trim whitespace-only lines
01389             bq = RunBlockGamut(bq);                                                  // recurse
01390 
01391             bq = Regex.Replace(bq, @"^", "  ", RegexOptions.Multiline);
01392 
01393             // These leading spaces screw with <pre> content, so we need to fix that:
01394             bq = Regex.Replace(bq, @"(\s*<pre>.+?</pre>)", new MatchEvaluator(BlockQuoteEvaluator2), RegexOptions.IgnorePatternWhitespace | RegexOptions.Singleline);
01395 
01396             return string.Format("<blockquote>\n{0}\n</blockquote>\n\n", bq);
01397         }
01398 
01399         private string BlockQuoteEvaluator2(Match match)
01400         {
01401             return Regex.Replace(match.Groups[1].Value, @"^  ", "", RegexOptions.Multiline);
01402         }
01403 
01404         private static Regex _autolinkBare = new Regex(@"(^|\s)(https?|ftp)(://[-A-Z0-9+&@#/%?=~_|\[\]\(\)!:,\.;]*[-A-Z0-9+&@#/%=~_|\[\]])($|\W)",
01405             RegexOptions.IgnoreCase | RegexOptions.Compiled);
01406 
01413         private string DoAutoLinks(string text)
01414         {
01415 
01416             if (_autoHyperlink)
01417             {
01418                 // fixup arbitrary URLs by adding Markdown < > so they get linked as well
01419                 // note that at this point, all other URL in the text are already hyperlinked as <a href=""></a>
01420                 // *except* for the <http://www.foo.com> case
01421                 text = _autolinkBare.Replace(text, @"$1<$2$3>$4");
01422             }
01423 
01424             // Hyperlinks: <http://foo.com>
01425             text = Regex.Replace(text, "<((https?|ftp):[^'\">\\s]+)>", new MatchEvaluator(HyperlinkEvaluator));
01426 
01427             if (_linkEmails)
01428             {
01429                 // Email addresses: <address@domain.foo>
01430                 string pattern =
01431                     @"<
01432                       (?:mailto:)?
01433                       (
01434                         [-.\w]+
01435                         \@
01436                         [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
01437                       )
01438                       >";
01439                 text = Regex.Replace(text, pattern, new MatchEvaluator(EmailEvaluator), RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
01440             }
01441 
01442             return text;
01443         }
01444 
01445         private string HyperlinkEvaluator(Match match)
01446         {
01447             string link = match.Groups[1].Value;
01448             return string.Format("<a href=\"{0}\">{0}</a>", link);
01449         }
01450 
01451         private string EmailEvaluator(Match match)
01452         {
01453             string email = Unescape(match.Groups[1].Value);
01454 
01455             //
01456             //    Input: an email address, e.g. "foo@example.com"
01457             //
01458             //    Output: the email address as a mailto link, with each character
01459             //            of the address encoded as either a decimal or hex entity, in
01460             //            the hopes of foiling most address harvesting spam bots. E.g.:
01461             //
01462             //      <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
01463             //        x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
01464             //        &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
01465             //
01466             //    Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
01467             //    mailing list: <http://tinyurl.com/yu7ue>
01468             //
01469             email = "mailto:" + email;
01470 
01471             // leave ':' alone (to spot mailto: later) 
01472             email = EncodeEmailAddress(email);
01473 
01474             email = string.Format("<a href=\"{0}\">{0}</a>", email);
01475 
01476             // strip the mailto: from the visible part
01477             email = Regex.Replace(email, "\">.+?:", "\">");
01478             return email;
01479         }
01480 
01481 
01482         private static Regex _outDent = new Regex(@"^[ ]{1," + _tabWidth + @"}", RegexOptions.Multiline | RegexOptions.Compiled);
01483 
01487         private string Outdent(string block)
01488         {
01489             return _outDent.Replace(block, "");
01490         }
01491 
01492 
01493         #region Encoding and Normalization
01494 
01495 
01501         private string EncodeEmailAddress(string addr)
01502         {
01503             var sb = new StringBuilder(addr.Length * 5);
01504             var rand = new Random();
01505             int r;
01506             foreach (char c in addr)
01507             {
01508                 r = rand.Next(1, 100);
01509                 if ((r > 90 || c == ':') && c != '@')
01510                     sb.Append(c);                         // m
01511                 else if (r < 45)
01512                     sb.AppendFormat("&#x{0:x};", (int)c); // &#x6D
01513                 else
01514                     sb.AppendFormat("&#{0};", (int)c);    // &#109
01515             }
01516             return sb.ToString();
01517         }
01518 
01519         private static Regex _codeEncoder = new Regex(@"&|<|>|\\|\*|_|\{|\}|\[|\]", RegexOptions.Compiled);
01520 
01524         private string EncodeCode(string code)
01525         {
01526             return _codeEncoder.Replace(code, EncodeCodeEvaluator);
01527         }
01528         private string EncodeCodeEvaluator(Match match)
01529         {
01530             switch (match.Value)
01531             {
01532                 // Encode all ampersands; HTML entities are not
01533                 // entities within a Markdown code span.
01534                 case "&":
01535                     return "&amp;";
01536                 // Do the angle bracket song and dance
01537                 case "<":
01538                     return "&lt;";
01539                 case ">":
01540                     return "&gt;";
01541                 // escape characters that are magic in Markdown
01542                 default:
01543                     return _escapeTable[match.Value];
01544             }
01545         }
01546 
01547 
01548         private static Regex _amps = new Regex(@"&(?!(#[0-9]+)|(#[xX][a-fA-F0-9])|([a-zA-Z][a-zA-Z0-9]*);)", RegexOptions.ExplicitCapture | RegexOptions.Compiled);
01549         private static Regex _angles = new Regex(@"<(?![A-Za-z/?\$!])", RegexOptions.ExplicitCapture | RegexOptions.Compiled);
01550 
01554         private string EncodeAmpsAndAngles(string s)
01555         {
01556             s = _amps.Replace(s, "&amp;");
01557             s = _angles.Replace(s, "&lt;");
01558             return s;
01559         }
01560 
01561         private static Regex _backslashEscapes; 
01562 
01566         private string EscapeBackslashes(string s)
01567         {
01568             return _backslashEscapes.Replace(s, new MatchEvaluator(EscapeBackslashesEvaluator));
01569         }
01570         private string EscapeBackslashesEvaluator(Match match)
01571         {
01572             return _backslashEscapeTable[match.Value];
01573         }
01574        
01575         private static Regex _unescapes = new Regex("\x1A\\d+\x1A", RegexOptions.Compiled);
01576 
01580         private string Unescape(string s)
01581         {
01582             return _unescapes.Replace(s, new MatchEvaluator(UnescapeEvaluator));
01583         }
01584         private string UnescapeEvaluator(Match match)
01585         {
01586             return _invertedEscapeTable[match.Value];
01587         }
01588 
01589 
01593         private string EscapeBoldItalic(string s)
01594         {
01595             s = s.Replace("*", _escapeTable["*"]);
01596             s = s.Replace("_", _escapeTable["_"]);
01597             return s;
01598         }
01599 
01600         private static char[] _problemUrlChars = @"""'*()[]$:".ToCharArray();
01601 
01605         private string EncodeProblemUrlChars(string url)
01606         {
01607             if (!_encodeProblemUrlCharacters) return url;
01608 
01609             var sb = new StringBuilder(url.Length);
01610             bool encode;
01611             char c;
01612 
01613             for (int i = 0; i < url.Length; i++)
01614             {
01615                 c = url[i];
01616                 encode = Array.IndexOf(_problemUrlChars, c) != -1;
01617                 if (encode && c == ':' && i < url.Length - 1)
01618                     encode = !(url[i + 1] == '/') && !(url[i + 1] >= '0' && url[i + 1] <= '9');
01619 
01620                 if (encode)
01621                     sb.Append("%" + String.Format("{0:x}", (byte)c));
01622                 else
01623                     sb.Append(c);                
01624             }
01625 
01626             return sb.ToString();
01627         }
01628 
01629 
01637         private string EscapeSpecialCharsWithinTagAttributes(string text)
01638         {
01639             var tokens = TokenizeHTML(text);
01640 
01641             // now, rebuild text from the tokens
01642             var sb = new StringBuilder(text.Length);
01643 
01644             foreach (var token in tokens)
01645             {
01646                 string value = token.Value;
01647 
01648                 if (token.Type == TokenType.Tag)
01649                 {
01650                     value = value.Replace(@"\", _escapeTable[@"\"]);
01651                     value = Regex.Replace(value, "(?<=.)</?code>(?=.)", _escapeTable[@"`"]);
01652                     value = EscapeBoldItalic(value);
01653                 }
01654 
01655                 sb.Append(value);
01656             }
01657 
01658             return sb.ToString();
01659         }
01660 
01667         private string Normalize(string text)
01668         {            
01669             var output = new StringBuilder(text.Length);
01670             var line = new StringBuilder();
01671             bool valid = false;
01672 
01673             for (int i = 0; i < text.Length; i++)
01674             {
01675                 switch (text[i])
01676                 {
01677                     case '\n':
01678                         if (valid) output.Append(line);
01679                         output.Append('\n');
01680                         line.Length = 0; valid = false;
01681                         break;
01682                     case '\r':
01683                         if ((i < text.Length - 1) && (text[i + 1] != '\n'))
01684                         {
01685                             if (valid) output.Append(line);
01686                             output.Append('\n');
01687                             line.Length = 0; valid = false;
01688                         }
01689                         break;
01690                     case '\t':
01691                         int width = (_tabWidth - line.Length % _tabWidth);
01692                         for (int k = 0; k < width; k++)
01693                             line.Append(' ');
01694                         break;
01695                     case '\x1A':
01696                         break;
01697                     default:
01698                         if (!valid && text[i] != ' ') valid = true;
01699                         line.Append(text[i]);
01700                         break;
01701                 }
01702             }
01703 
01704             if (valid) output.Append(line);
01705             output.Append('\n');
01706 
01707             // add two newlines to the end before return
01708             return output.Append("\n\n").ToString();
01709         }
01710 
01711         #endregion
01712 
01716         private static string RepeatString(string text, int count)
01717         {
01718             var sb = new StringBuilder(text.Length * count);
01719             for (int i = 0; i < count; i++)
01720                 sb.Append(text);
01721             return sb.ToString();
01722         }
01723 
01724     }
01725 }