From 8d669d2263bdcfb870a0f734de9fe7c76e2ec266 Mon Sep 17 00:00:00 2001 From: Christian Neukirchen Date: Wed, 17 Sep 2008 16:31:21 +0200 Subject: Initial import of Trivium --- vendor/bluecloth.rb | 1144 +++++++++++++++++++++++++++++++++++++++++++++++++++ vendor/htemplate.rb | 68 +++ vendor/rubypants.rb | 490 ++++++++++++++++++++++ 3 files changed, 1702 insertions(+) create mode 100644 vendor/bluecloth.rb create mode 100644 vendor/htemplate.rb create mode 100644 vendor/rubypants.rb (limited to 'vendor') diff --git a/vendor/bluecloth.rb b/vendor/bluecloth.rb new file mode 100644 index 0000000..96266f2 --- /dev/null +++ b/vendor/bluecloth.rb @@ -0,0 +1,1144 @@ +#!/usr/bin/ruby +# +# Bluecloth is a Ruby implementation of Markdown, a text-to-HTML conversion +# tool. +# +# == Synopsis +# +# doc = BlueCloth::new " +# ## Test document ## +# +# Just a simple test. +# " +# +# puts doc.to_html +# +# == Authors +# +# * Michael Granger +# +# == Contributors +# +# * Martin Chase - Peer review, helpful suggestions +# * Florian Gross - Filter options, suggestions +# +# == Copyright +# +# Original version: +# Copyright (c) 2003-2004 John Gruber +# +# All rights reserved. +# +# Ruby port: +# Copyright (c) 2004 The FaerieMUD Consortium. +# +# BlueCloth is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any later +# version. +# +# BlueCloth is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# == To-do +# +# * Refactor some of the larger uglier methods that have to do their own +# brute-force scanning because of lack of Perl features in Ruby's Regexp +# class. Alternately, could add a dependency on 'pcre' and use most Perl +# regexps. +# +# * Put the StringScanner in the render state for thread-safety. +# +# == Version +# +# $Id: bluecloth.rb 69 2004-08-25 05:27:15Z ged $ +# + +require 'digest/md5' +require 'logger' +require 'strscan' + + +### BlueCloth is a Ruby implementation of Markdown, a text-to-HTML conversion +### tool. +class BlueCloth < String + + ### Exception class for formatting errors. + class FormatError < RuntimeError + + ### Create a new FormatError with the given source +str+ and an optional + ### message about the +specific+ error. + def initialize( str, specific=nil ) + if specific + msg = "Bad markdown format near %p: %s" % [ str, specific ] + else + msg = "Bad markdown format near %p" % str + end + + super( msg ) + end + end + + + # Release Version + Version = '0.0.3' + + # SVN Revision + SvnRev = %q$Rev: 69 $ + + # SVN Id tag + SvnId = %q$Id: bluecloth.rb 69 2004-08-25 05:27:15Z ged $ + + # SVN URL + SvnUrl = %q$URL: svn+ssh://svn.faeriemud.org/usr/local/svn/BlueCloth/trunk/lib/bluecloth.rb $ + + + # Rendering state struct. Keeps track of URLs, titles, and HTML blocks + # midway through a render. I prefer this to the globals of the Perl version + # because globals make me break out in hives. Or something. + RenderState = Struct::new( "RenderState", :urls, :titles, :html_blocks, :log ) + + # Tab width for #detab! if none is specified + TabWidth = 4 + + # The tag-closing string -- set to '>' for HTML + EmptyElementSuffix = "/>"; + + # Table of MD5 sums for escaped characters + EscapeTable = {} + '\\`*_{}[]()#.!'.split(//).each {|char| + hash = Digest::MD5::hexdigest( char ) + + EscapeTable[ char ] = { + :md5 => hash, + :md5re => Regexp::new( hash ), + :re => Regexp::new( '\\\\' + Regexp::escape(char) ), + } + } + + + ################################################################# + ### I N S T A N C E M E T H O D S + ################################################################# + + ### Create a new BlueCloth string. + def initialize( content="", *restrictions ) + @log = Logger::new( $deferr ) + @log.level = $DEBUG ? + Logger::DEBUG : + ($VERBOSE ? Logger::INFO : Logger::WARN) + @scanner = nil + + # Add any restrictions, and set the line-folding attribute to reflect + # what happens by default. + @filter_html = nil + @filter_styles = nil + restrictions.flatten.each {|r| __send__("#{r}=", true) } + @fold_lines = true + + super( content ) + + @log.debug "String is: %p" % self + end + + + ###### + public + ###### + + # Filters for controlling what gets output for untrusted input. (But really, + # you're filtering bad stuff out of untrusted input at submission-time via + # untainting, aren't you?) + attr_accessor :filter_html, :filter_styles + + # RedCloth-compatibility accessor. Line-folding is part of Markdown syntax, + # so this isn't used by anything. + attr_accessor :fold_lines + + + ### Render Markdown-formatted text in this string object as HTML and return + ### it. The parameter is for compatibility with RedCloth, and is currently + ### unused, though that may change in the future. + def to_html( lite=false ) + + # Create a StringScanner we can reuse for various lexing tasks + @scanner = StringScanner::new( '' ) + + # Make a structure to carry around stuff that gets placeholdered out of + # the source. + rs = RenderState::new( {}, {}, {} ) + + # Make a copy of the string with normalized line endings, tabs turned to + # spaces, and a couple of guaranteed newlines at the end + text = self.gsub( /\r\n?/, "\n" ).detab + text += "\n\n" + @log.debug "Normalized line-endings: %p" % text + + # Filter HTML if we're asked to do so + if self.filter_html + text.gsub!( "<", "<" ) + text.gsub!( ">", ">" ) + @log.debug "Filtered HTML: %p" % text + end + + # Simplify blank lines + text.gsub!( /^ +$/, '' ) + @log.debug "Tabs -> spaces/blank lines stripped: %p" % text + + # Replace HTML blocks with placeholders + text = hide_html_blocks( text, rs ) + @log.debug "Hid HTML blocks: %p" % text + @log.debug "Render state: %p" % rs + + # Strip link definitions, store in render state + text = strip_link_definitions( text, rs ) + @log.debug "Stripped link definitions: %p" % text + @log.debug "Render state: %p" % rs + + # Escape meta-characters + text = escape_special_chars( text ) + @log.debug "Escaped special characters: %p" % text + + # Transform block-level constructs + text = apply_block_transforms( text, rs ) + @log.debug "After block-level transforms: %p" % text + + # Now swap back in all the escaped characters + text = unescape_special_chars( text ) + @log.debug "After unescaping special characters: %p" % text + + return text + end + + + ### Convert tabs in +str+ to spaces. + def detab( tabwidth=TabWidth ) + copy = self.dup + copy.detab!( tabwidth ) + return copy + end + + + ### Convert tabs to spaces in place and return self if any were converted. + def detab!( tabwidth=TabWidth ) + newstr = self.split( /\n/ ).collect {|line| + line.gsub( /(.*?)\t/ ) do + $1 + ' ' * (tabwidth - $1.length % tabwidth) + end + }.join("\n") + self.replace( newstr ) + end + + + ####### + #private + ####### + + ### Do block-level transforms on a copy of +str+ using the specified render + ### state +rs+ and return the results. + def apply_block_transforms( str, rs ) + # Port: This was called '_runBlockGamut' in the original + + @log.debug "Applying block transforms to:\n %p" % str + text = transform_headers( str, rs ) + text = transform_hrules( text, rs ) + text = transform_lists( text, rs ) + text = transform_code_blocks( text, rs ) + text = transform_block_quotes( text, rs ) + text = transform_auto_links( text, rs ) + text = hide_html_blocks( text, rs ) + + text = form_paragraphs( text, rs ) + + @log.debug "Done with block transforms:\n %p" % text + return text + end + + + ### Apply Markdown span transforms to a copy of the specified +str+ with the + ### given render state +rs+ and return it. + def apply_span_transforms( str, rs ) + @log.debug "Applying span transforms to:\n %p" % str + + str = transform_code_spans( str, rs ) + str = encode_html( str ) + str = transform_images( str, rs ) + str = transform_anchors( str, rs ) + str = transform_italic_and_bold( str, rs ) + + # Hard breaks + str.gsub!( / {2,}\n/, " + #
+ # tags for inner block must be indented. + #
+ # + StrictBlockRegex = %r{ + ^ # Start of line + <(#{StrictTagPattern}) # Start tag: \2 + \b # word break + (.*\n)*? # Any number of lines, minimal match + # Matching end tag + [ ]* # trailing spaces + $ # End of line or document + }ix + + # More-liberal block-matching + LooseBlockRegex = %r{ + ^ # Start of line + <(#{LooseTagPattern}) # start tag: \2 + \b # word break + (.*\n)*? # Any number of lines, minimal match + .* # Anything + Matching end tag + [ ]* # trailing spaces + $ # End of line or document + }ix + + # Special case for
. + HruleBlockRegex = %r{ + ( # $1 + \A\n? # Start of doc + optional \n + | # or + .*\n\n # anything + blank line + ) + ( # save in $2 + [ ]* # Any spaces +
])*? # Attributes + /?> # Tag close + $ # followed by a blank line or end of document + ) + }ix + + ### Replace all blocks of HTML in +str+ that start in the left margin with + ### tokens. + def hide_html_blocks( str, rs ) + @log.debug "Hiding HTML blocks in %p" % str + + # Tokenizer proc to pass to gsub + tokenize = lambda {|match| + key = Digest::MD5::hexdigest( match ) + rs.html_blocks[ key ] = match + @log.debug "Replacing %p with %p" % [ match, key ] + "\n\n#{key}\n\n" + } + + rval = str.dup + + @log.debug "Finding blocks with the strict regex..." + rval.gsub!( StrictBlockRegex, &tokenize ) + + @log.debug "Finding blocks with the loose regex..." + rval.gsub!( LooseBlockRegex, &tokenize ) + + @log.debug "Finding hrules..." + rval.gsub!( HruleBlockRegex ) {|match| $1 + tokenize[$2] } + + return rval + end + + + # Link defs are in the form: ^[id]: url "optional title" + LinkRegex = %r{ + ^[ ]*\[(.+)\]: # id = $1 + [ ]* + \n? # maybe *one* newline + [ ]* + ? # url = $2 + [ ]* + \n? # maybe one newline + [ ]* + (?: + # Titles are delimited by "quotes" or (parens). + ["(] + (.+?) # title = $3 + [")] # Matching ) or " + [ ]* + )? # title is optional + (?:\n+|\Z) + }x + + ### Strip link definitions from +str+, storing them in the given RenderState + ### +rs+. + def strip_link_definitions( str, rs ) + str.gsub( LinkRegex ) {|match| + id, url, title = $1, $2, $3 + + rs.urls[ id.downcase ] = encode_html( url ) + unless title.nil? + rs.titles[ id.downcase ] = title.gsub( /"/, """ ) + end + "" + } + end + + + ### Escape special characters in the given +str+ + def escape_special_chars( str ) + @log.debug " Escaping special characters" + text = '' + + # The original Markdown source has something called '$tags_to_skip' + # declared here, but it's never used, so I don't define it. + + tokenize_html( str ) {|token, str| + @log.debug " Adding %p token %p" % [ token, str ] + case token + + # Within tags, encode * and _ + when :tag + text += str. + gsub( /\*/, EscapeTable['*'][:md5] ). + gsub( /_/, EscapeTable['_'][:md5] ) + + # Encode backslashed stuff in regular text + when :text + text += encode_backslash_escapes( str ) + else + raise TypeError, "Unknown token type %p" % token + end + } + + @log.debug " Text with escapes is now: %p" % text + return text + end + + + ### Swap escaped special characters in a copy of the given +str+ and return + ### it. + def unescape_special_chars( str ) + EscapeTable.each {|char, hash| + @log.debug "Unescaping escaped %p with %p" % [ char, hash[:md5re] ] + str.gsub!( hash[:md5re], char ) + } + + return str + end + + + ### Return a copy of the given +str+ with any backslashed special character + ### in it replaced with MD5 placeholders. + def encode_backslash_escapes( str ) + # Make a copy with any double-escaped backslashes encoded + text = str.gsub( /\\\\/, EscapeTable['\\'][:md5] ) + + EscapeTable.each_pair {|char, esc| + next if char == '\\' + text.gsub!( esc[:re], esc[:md5] ) + } + + return text + end + + + ### Transform any Markdown-style horizontal rules in a copy of the specified + ### +str+ and return it. + def transform_hrules( str, rs ) + @log.debug " Transforming horizontal rules" + str.gsub( /^( ?[\-\*_] ?){3,}$/, "\n\n%s\n} % [ + list_type, + transform_list_items( list, rs ), + list_type, + ] + } + end + + + # Pattern for transforming list items + ListItemRegexp = %r{ + (\n)? # leading line = $1 + (^[ ]*) # leading whitespace = $2 + (#{ListMarkerAny}) [ ]+ # list marker = $3 + ((?m:.+?) # list item text = $4 + (\n{1,2})) + (?= \n* (\z | \2 (#{ListMarkerAny}) [ ]+)) + }x + + ### Transform list items in a copy of the given +str+ and return it. + def transform_list_items( str, rs ) + @log.debug " Transforming list items" + + # Trim trailing blank lines + str = str.sub( /\n{2,}\z/, "\n" ) + + str.gsub( ListItemRegexp ) {|line| + @log.debug " Found item line %p" % line + leading_line, item = $1, $4 + + if leading_line or /\n{2,}/.match( item ) + @log.debug " Found leading line or item has a blank" + item = apply_block_transforms( outdent(item), rs ) + else + # Recursion for sub-lists + @log.debug " Recursing for sublist" + item = transform_lists( outdent(item), rs ).chomp + item = apply_span_transforms( item, rs ) + end + + %{
  • %s
  • \n} % item + } + end + + + # Pattern for matching codeblocks + CodeBlockRegexp = %r{ + (?:\n\n|\A) + ( # $1 = the code block + (?: + (?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces + .*\n+ + )+ + ) + (^[ ]{0,#{TabWidth - 1}}\S|\Z) # Lookahead for non-space at + # line-start, or end of doc + }x + + ### Transform Markdown-style codeblocks in a copy of the specified +str+ and + ### return it. + def transform_code_blocks( str, rs ) + @log.debug " Transforming code blocks" + + str.gsub( CodeBlockRegexp ) {|block| + codeblock = $1 + remainder = $2 + + # Generate the codeblock + %{\n\n
    %s\n
    \n\n%s} % + [ encode_code( outdent(codeblock), rs ).rstrip, remainder ] + } + end + + + # Pattern for matching Markdown blockquote blocks + BlockQuoteRegexp = %r{ + (?: + ^[ ]*>[ ]? # '>' at the start of a line + .+\n # rest of the first line + (?:.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + }x + PreChunk = %r{ ( ^ \s*
     .+? 
    ) }xm + + ### Transform Markdown-style blockquotes in a copy of the specified +str+ + ### and return it. + def transform_block_quotes( str, rs ) + @log.debug " Transforming block quotes" + + str.gsub( BlockQuoteRegexp ) {|quote| + @log.debug "Making blockquote from %p" % quote + + quote.gsub!( /^ *> ?/, '' ) # Trim one level of quoting + quote.gsub!( /^ +$/, '' ) # Trim whitespace-only lines + + indent = " " * TabWidth + quoted = %{
    \n%s\n
    \n\n} % + apply_block_transforms( quote, rs ). + gsub( /^/, indent ). + gsub( PreChunk ) {|m| m.gsub(/^#{indent}/o, '') } + @log.debug "Blockquoted chunk is: %p" % quoted + quoted + } + end + + + AutoAnchorURLRegexp = /<((https?|ftp):[^'">\s]+)>/ + AutoAnchorEmailRegexp = %r{ + < + ( + [-.\w]+ + \@ + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ + ) + > + }xi + + ### Transform URLs in a copy of the specified +str+ into links and return + ### it. + def transform_auto_links( str, rs ) + @log.debug " Transforming auto-links" + str.gsub( AutoAnchorURLRegexp, %{\\1}). + gsub( AutoAnchorEmailRegexp ) {|addr| + encode_email_address( unescape_special_chars($1) ) + } + end + + + # Encoder functions to turn characters of an email address into encoded + # entities. + Encoders = [ + lambda {|char| "&#%03d;" % char}, + lambda {|char| "&#x%X;" % char}, + lambda {|char| char.chr }, + ] + + ### Transform a copy of the given email +addr+ into an escaped version safer + ### for posting publicly. + def encode_email_address( addr ) + + rval = '' + ("mailto:" + addr).each_byte {|b| + case b + when ?: + rval += ":" + when ?@ + rval += Encoders[ rand(2) ][ b ] + else + r = rand(100) + rval += ( + r > 90 ? Encoders[2][ b ] : + r < 45 ? Encoders[1][ b ] : + Encoders[0][ b ] + ) + end + } + + return %{%s} % [ rval, rval.sub(/.+?:/, '') ] + end + + + # Regex for matching Setext-style headers + SetextHeaderRegexp = %r{ + (.+) # The title text ($1) + \n + ([\-=])+ # Match a line of = or -. Save only one in $2. + [ ]*\n+ + }x + + # Regexp for matching ATX-style headers + AtxHeaderRegexp = %r{ + ^(\#{1,6}) # $1 = string of #'s + [ ]* + (.+?) # $2 = Header text + [ ]* + \#* # optional closing #'s (not counted) + \n+ + }x + + ### Apply Markdown header transforms to a copy of the given +str+ amd render + ### state +rs+ and return the result. + def transform_headers( str, rs ) + @log.debug " Transforming headers" + + # Setext-style headers: + # Header 1 + # ======== + # + # Header 2 + # -------- + # + str. + gsub( SetextHeaderRegexp ) {|m| + @log.debug "Found setext-style header" + title, hdrchar = $1, $2 + title = apply_span_transforms( title, rs ) + + case hdrchar + when '=' + %[

    #{title}

    \n\n] + when '-' + %[

    #{title}

    \n\n] + else + title + end + }. + + gsub( AtxHeaderRegexp ) {|m| + @log.debug "Found ATX-style header" + hdrchars, title = $1, $2 + title = apply_span_transforms( title, rs ) + + level = hdrchars.length + %{%s\n\n} % [ level, title, level ] + } + end + + + ### Wrap all remaining paragraph-looking text in a copy of +str+ inside

    + ### tags and return it. + def form_paragraphs( str, rs ) + @log.debug " Forming paragraphs" + grafs = str. + sub( /\A\n+/, '' ). + sub( /\n+\z/, '' ). + split( /\n{2,}/ ) + + rval = grafs.collect {|graf| + + # Unhashify HTML blocks if this is a placeholder + if rs.html_blocks.key?( graf ) + rs.html_blocks[ graf ] + + # Otherwise, wrap in

    tags + else + apply_span_transforms(graf, rs). + sub( /^[ ]*/, '

    ' ) + '

    ' + end + }.join( "\n\n" ) + + @log.debug " Formed paragraphs: %p" % rval + return rval + end + + + # Pattern to match the linkid part of an anchor tag for reference-style + # links. + RefLinkIdRegex = %r{ + [ ]? # Optional leading space + (?:\n[ ]*)? # Optional newline + spaces + \[ + (.*?) # Id = $1 + \] + }x + + InlineLinkRegex = %r{ + \( # Literal paren + [ ]* # Zero or more spaces + ? # URI = $1 + [ ]* # Zero or more spaces + (?: # + ([\"\']) # Opening quote char = $2 + (.*?) # Title = $3 + \2 # Matching quote char + )? # Title is optional + \) + }x + + ### Apply Markdown anchor transforms to a copy of the specified +str+ with + ### the given render state +rs+ and return it. + def transform_anchors( str, rs ) + @log.debug " Transforming anchors" + @scanner.string = str.dup + text = '' + + # Scan the whole string + until @scanner.empty? + + if @scanner.scan( /\[/ ) + link = ''; linkid = '' + depth = 1 + startpos = @scanner.pos + @log.debug " Found a bracket-open at %d" % startpos + + # Scan the rest of the tag, allowing unlimited nested []s. If + # the scanner runs out of text before the opening bracket is + # closed, append the text and return (wasn't a valid anchor). + while depth.nonzero? + linktext = @scanner.scan_until( /\]|\[/ ) + + if linktext + @log.debug " Found a bracket at depth %d: %p" % [ depth, linktext ] + link += linktext + + # Decrement depth for each closing bracket + depth += ( linktext[-1, 1] == ']' ? -1 : 1 ) + @log.debug " Depth is now #{depth}" + + # If there's no more brackets, it must not be an anchor, so + # just abort. + else + @log.debug " Missing closing brace, assuming non-link." + link += @scanner.rest + @scanner.terminate + return text + '[' + link + end + end + link.slice!( -1 ) # Trim final ']' + @log.debug " Found leading link %p" % link + + # Look for a reference-style second part + if @scanner.scan( RefLinkIdRegex ) + linkid = @scanner[1] + linkid = link.dup if linkid.empty? + linkid.downcase! + @log.debug " Found a linkid: %p" % linkid + + # If there's a matching link in the link table, build an + # anchor tag for it. + if rs.urls.key?( linkid ) + @log.debug " Found link key in the link table: %p" % rs.urls[linkid] + url = escape_md( rs.urls[linkid] ) + + text += %{#{link}} + + # If the link referred to doesn't exist, just append the raw + # source to the result + else + @log.debug " Linkid %p not found in link table" % linkid + @log.debug " Appending original string instead: " + @log.debug "%p" % @scanner.string[ startpos-1 .. @scanner.pos-1 ] + text += @scanner.string[ startpos-1 .. @scanner.pos-1 ] + end + + # ...or for an inline style second part + elsif @scanner.scan( InlineLinkRegex ) + url = @scanner[1] + title = @scanner[3] + @log.debug " Found an inline link to %p" % url + + text += %{#{link}} + + # No linkid part: just append the first part as-is. + else + @log.debug "No linkid, so no anchor. Appending literal text." + text += @scanner.string[ startpos-1 .. @scanner.pos-1 ] + end # if linkid + + # Plain text + else + @log.debug " Scanning to the next link from %p" % @scanner.rest + text += @scanner.scan( /[^\[]+/ ) + end + + end # until @scanner.empty? + + return text + end + + + # Pattern to match strong emphasis in Markdown text + BoldRegexp = %r{ (\*\*|__) (\S|\S.+?\S) \1 }x + + # Pattern to match normal emphasis in Markdown text + ItalicRegexp = %r{ (\*|_) (\S|\S.+?\S) \1 }x + + ### Transform italic- and bold-encoded text in a copy of the specified +str+ + ### and return it. + def transform_italic_and_bold( str, rs ) + @log.debug " Transforming italic and bold" + + str. + gsub( BoldRegexp, %{\\2} ). + gsub( ItalicRegexp, %{\\2} ) + end + + + ### Transform backticked spans into spans. + def transform_code_spans( str, rs ) + @log.debug " Transforming code spans" + + # Set up the string scanner and just return the string unless there's at + # least one backtick. + @scanner.string = str.dup + unless @scanner.exist?( /`/ ) + @scanner.terminate + @log.debug "No backticks found for code span in %p" % str + return str + end + + @log.debug "Transforming code spans in %p" % str + + # Build the transformed text anew + text = '' + + # Scan to the end of the string + until @scanner.empty? + + # Scan up to an opening backtick + if pre = @scanner.scan_until( /.?(?=`)/m ) + text += pre + @log.debug "Found backtick at %d after '...%s'" % [ @scanner.pos, text[-10, 10] ] + + # Make a pattern to find the end of the span + opener = @scanner.scan( /`+/ ) + len = opener.length + closer = Regexp::new( opener ) + @log.debug "Scanning for end of code span with %p" % closer + + # Scan until the end of the closing backtick sequence. Chop the + # backticks off the resultant string, strip leading and trailing + # whitespace, and encode any enitites contained in it. + codespan = @scanner.scan_until( closer ) or + raise FormatError::new( @scanner.rest[0,20], + "No %p found before end" % opener ) + + @log.debug "Found close of code span at %d: %p" % [ @scanner.pos - len, codespan ] + codespan.slice!( -len, len ) + text += "%s" % + encode_code( codespan.strip, rs ) + + # If there's no more backticks, just append the rest of the string + # and move the scan pointer to the end + else + text += @scanner.rest + @scanner.terminate + end + end + + return text + end + + + # Next, handle inline images: ![alt text](url "optional title") + # Don't forget: encode * and _ + InlineImageRegexp = %r{ + ( # Whole match = $1 + !\[ (.*?) \] # alt text = $2 + \([ ]* + ? # source url = $3 + [ ]* + (?: # + (["']) # quote char = $4 + (.*?) # title = $5 + \4 # matching quote + [ ]* + )? # title is optional + \) + ) + }xs #" + + + # Reference-style images + ReferenceImageRegexp = %r{ + ( # Whole match = $1 + !\[ (.*?) \] # Alt text = $2 + [ ]? # Optional space + (?:\n[ ]*)? # One optional newline + spaces + \[ (.*?) \] # id = $3 + ) + }xs + + ### Turn image markup into image tags. + def transform_images( str, rs ) + @log.debug " Transforming images" % str + + # Handle reference-style labeled images: ![alt text][id] + str. + gsub( ReferenceImageRegexp ) {|match| + whole, alt, linkid = $1, $2, $3.downcase + @log.debug "Matched %p" % match + res = nil + alt.gsub!( /"/, '"' ) + + # for shortcut links like ![this][]. + linkid = alt.downcase if linkid.empty? + + if rs.urls.key?( linkid ) + url = escape_md( rs.urls[linkid] ) + @log.debug "Found url '%s' for linkid '%s' " % [ url, linkid ] + + # Build the tag + result = %{%s}, '>' ). + gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]} + end + + + + ################################################################# + ### U T I L I T Y F U N C T I O N S + ################################################################# + + ### Escape any markdown characters in a copy of the given +str+ and return + ### it. + def escape_md( str ) + str. + gsub( /\*/, EscapeTable['*'][:md5] ). + gsub( /_/, EscapeTable['_'][:md5] ) + end + + + # Matching constructs for tokenizing X/HTML + HTMLCommentRegexp = %r{ }mx + XMLProcInstRegexp = %r{ <\? .*? \?> }mx + MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp ) + + HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }imx + HTMLTagCloseRegexp = %r{ > }x + HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp ) + + ### Break the HTML source in +str+ into a series of tokens and return + ### them. The tokens are just 2-element Array tuples with a type and the + ### actual content. If this function is called with a block, the type and + ### text parts of each token will be yielded to it one at a time as they are + ### extracted. + def tokenize_html( str ) + depth = 0 + tokens = [] + @scanner.string = str.dup + type, token = nil, nil + + until @scanner.empty? + @log.debug "Scanning from %p" % @scanner.rest + + # Match comments and PIs without nesting + if (( token = @scanner.scan(MetaTag) )) + type = :tag + + # Do nested matching for HTML tags + elsif (( token = @scanner.scan(HTMLTagOpenRegexp) )) + tagstart = @scanner.pos + @log.debug " Found the start of a plain tag at %d" % tagstart + + # Start the token with the opening angle + depth = 1 + type = :tag + + # Scan the rest of the tag, allowing unlimited nested <>s. If + # the scanner runs out of text before the tag is closed, raise + # an error. + while depth.nonzero? + + # Scan either an opener or a closer + chunk = @scanner.scan( HTMLTagPart ) or + raise "Malformed tag at character %d: %p" % + [ tagstart, token + @scanner.rest ] + + @log.debug " Found another part of the tag at depth %d: %p" % [ depth, chunk ] + + token += chunk + + # If the last character of the token so far is a closing + # angle bracket, decrement the depth. Otherwise increment + # it for a nested tag. + depth += ( token[-1, 1] == '>' ? -1 : 1 ) + @log.debug " Depth is now #{depth}" + end + + # Match text segments + else + @log.debug " Looking for a chunk of text" + type = :text + + # Scan forward, always matching at least one character to move + # the pointer beyond any non-tag '<'. + token = @scanner.scan_until( /[^<]+/m ) + end + + @log.debug " type: %p, token: %p" % [ type, token ] + + # If a block is given, feed it one token at a time. Add the token to + # the token list to be returned regardless. + if block_given? + yield( type, token ) + end + tokens << [ type, token ] + end + + return tokens + end + + + ### Return a copy of +str+ with angle brackets and ampersands HTML-encoded. + def encode_html( str ) + str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&" ). + gsub( %r{<(?![a-z/?\$!])}i, "<" ) + end + + + ### Return one level of line-leading tabs or spaces from a copy of +str+ and + ### return it. + def outdent( str ) + str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '') + end + +end # class BlueCloth + diff --git a/vendor/htemplate.rb b/vendor/htemplate.rb new file mode 100644 index 0000000..5a429a1 --- /dev/null +++ b/vendor/htemplate.rb @@ -0,0 +1,68 @@ +require 'strscan' + +class HTemplate + def initialize(source, filename=nil) + @source = source + @filename = filename + compile + end + + if defined? Rack::Utils + def escape_html(s) + Rack::Utils.escape_html s + end + else + require 'cgi' + def escape_html(s) + CGI.escapeHTML s + end + end + + def expand(data, output="", escape=nil, &block) + escape ||= lambda { |v| + escape_html v.to_s + } + @code.call(output, data, block, escape) + output + end + + def compile + code = "lambda { |output, data, block, __escape| data.instance_eval {\n" + + scanner = StringScanner.new(@source) + + until scanner.eos? + if scanner.bol? and scanner.scan(/\s*\$ (.*\n)/) + # raw line of code, $ if bla + code << scanner[1] + elsif scanner.scan(/\$(:?)(\{(.*?)\}|(@?[\w.!?]+))/) + # expression, ${foo} or $foo.bar + expr = scanner[3] || scanner[4] + + if scanner[1] == ":" # disable escaping? + code << %Q{output << (#{expr}).to_s;} + else + code << %Q{output << __escape[#{expr}];} + end + elsif scanner.scan(/\$\$/) # plain $ + code << %Q{output << '$';} + elsif scanner.scan(/\$#.*?(?:#\$|$\n?)/) # comment $#...#$ or $#... + # nothing + elsif scanner.scan(/([^\n$]+\n?)|([^\n$]*\n)/) # text + if scanner.matched =~ /\\$/ && scanner.bol? + code << %Q{output << #{scanner.matched.chop.chop.dump};} + else + code << %Q{output << #{scanner.matched.dump};} + end + + code << "\n" if scanner.bol? + else + raise "can't parse template: #{scanner.rest[0..20].dump}" + end + end + + code << "}}" + + @code = eval(code, nil, @filename || '(template)', 0) + end +end diff --git a/vendor/rubypants.rb b/vendor/rubypants.rb new file mode 100644 index 0000000..6c21324 --- /dev/null +++ b/vendor/rubypants.rb @@ -0,0 +1,490 @@ +# +# = RubyPants -- SmartyPants ported to Ruby +# +# Ported by Christian Neukirchen +# Copyright (C) 2004, 2006 Christian Neukirchen +# +# Incooporates ideas, comments and documentation by Chad Miller +# Copyright (C) 2004 Chad Miller +# +# Original SmartyPants by John Gruber +# Copyright (C) 2003 John Gruber +# + +# +# = RubyPants -- SmartyPants ported to Ruby +# +# == Synopsis +# +# RubyPants is a Ruby port of the smart-quotes library SmartyPants. +# +# The original "SmartyPants" is a free web publishing plug-in for +# Movable Type, Blosxom, and BBEdit that easily translates plain ASCII +# punctuation characters into "smart" typographic punctuation HTML +# entities. +# +# +# == Description +# +# RubyPants can perform the following transformations: +# +# * Straight quotes (" and ') into "curly" quote +# HTML entities +# * Backticks-style quotes (``like this'') into "curly" quote +# HTML entities +# * Dashes (-- and ---) into en- and em-dash +# entities +# * Three consecutive dots (... or . . .) into an +# ellipsis entity +# +# This means you can write, edit, and save your posts using plain old +# ASCII straight quotes, plain dashes, and plain dots, but your +# published posts (and final HTML output) will appear with smart +# quotes, em-dashes, and proper ellipses. +# +# RubyPants does not modify characters within
    ,
    +# , ,  or
    +#