summary refs log tree commit diff
path: root/vendor/bluecloth.rb
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/bluecloth.rb')
-rw-r--r--vendor/bluecloth.rb1144
1 files changed, 1144 insertions, 0 deletions
diff --git a/vendor/bluecloth.rb b/vendor/bluecloth.rb
new file mode 100644
index 0000000..96266f2
--- /dev/null
+++ b/vendor/bluecloth.rb
@@ -0,0 +1,1144 @@
+#!/usr/bin/ruby
+# 
+# Bluecloth is a Ruby implementation of Markdown, a text-to-HTML conversion
+# tool.
+# 
+# == Synopsis
+# 
+#   doc = BlueCloth::new "
+#     ## Test document ##
+#
+#     Just a simple test.
+#   "
+#
+#   puts doc.to_html
+# 
+# == Authors
+# 
+# * Michael Granger <ged@FaerieMUD.org>
+# 
+# == Contributors
+#
+# * Martin Chase <stillflame@FaerieMUD.org> - Peer review, helpful suggestions
+# * Florian Gross <flgr@ccan.de> - Filter options, suggestions
+#
+# == Copyright
+#
+# Original version:
+#   Copyright (c) 2003-2004 John Gruber
+#   <http://daringfireball.net/>  
+#   All rights reserved.
+#
+# Ruby port:
+#   Copyright (c) 2004 The FaerieMUD Consortium.
+# 
+# BlueCloth is free software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the Free Software
+# Foundation; either version 2 of the License, or (at your option) any later
+# version.
+# 
+# BlueCloth is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
+# 
+# == To-do
+#
+# * Refactor some of the larger uglier methods that have to do their own
+#   brute-force scanning because of lack of Perl features in Ruby's Regexp
+#   class. Alternately, could add a dependency on 'pcre' and use most Perl
+#   regexps.
+#
+# * Put the StringScanner in the render state for thread-safety.
+#
+# == Version
+#
+#  $Id: bluecloth.rb 69 2004-08-25 05:27:15Z ged $
+# 
+
+require 'digest/md5'
+require 'logger'
+require 'strscan'
+
+
+### BlueCloth is a Ruby implementation of Markdown, a text-to-HTML conversion
+### tool.
+class BlueCloth < String
+
+	### Exception class for formatting errors.
+	class FormatError < RuntimeError
+
+		### Create a new FormatError with the given source +str+ and an optional
+		### message about the +specific+ error.
+		def initialize( str, specific=nil )
+			if specific
+				msg = "Bad markdown format near %p: %s" % [ str, specific ]
+			else
+				msg = "Bad markdown format near %p" % str
+			end
+
+			super( msg )
+		end
+	end
+
+
+	# Release Version
+	Version = '0.0.3'
+
+	# SVN Revision
+	SvnRev = %q$Rev: 69 $
+
+	# SVN Id tag
+	SvnId = %q$Id: bluecloth.rb 69 2004-08-25 05:27:15Z ged $
+
+	# SVN URL
+	SvnUrl = %q$URL: svn+ssh://svn.faeriemud.org/usr/local/svn/BlueCloth/trunk/lib/bluecloth.rb $
+
+
+	# Rendering state struct. Keeps track of URLs, titles, and HTML blocks
+	# midway through a render. I prefer this to the globals of the Perl version
+	# because globals make me break out in hives. Or something.
+	RenderState = Struct::new( "RenderState", :urls, :titles, :html_blocks, :log )
+
+	# Tab width for #detab! if none is specified
+	TabWidth = 4
+
+	# The tag-closing string -- set to '>' for HTML
+	EmptyElementSuffix = "/>";
+
+	# Table of MD5 sums for escaped characters
+	EscapeTable = {}
+	'\\`*_{}[]()#.!'.split(//).each {|char|
+		hash = Digest::MD5::hexdigest( char )
+
+		EscapeTable[ char ] = {
+ 			:md5 => hash,
+			:md5re => Regexp::new( hash ),
+			:re  => Regexp::new( '\\\\' + Regexp::escape(char) ),
+		}
+	}
+
+
+	#################################################################
+	###	I N S T A N C E   M E T H O D S
+	#################################################################
+
+	### Create a new BlueCloth string.
+	def initialize( content="", *restrictions )
+		@log = Logger::new( $deferr )
+		@log.level = $DEBUG ?
+			Logger::DEBUG :
+			($VERBOSE ? Logger::INFO : Logger::WARN)
+		@scanner = nil
+
+		# Add any restrictions, and set the line-folding attribute to reflect
+		# what happens by default.
+		@filter_html = nil
+		@filter_styles = nil
+		restrictions.flatten.each {|r| __send__("#{r}=", true) }
+		@fold_lines = true
+
+		super( content )
+
+		@log.debug "String is: %p" % self
+	end
+
+
+	######
+	public
+	######
+
+	# Filters for controlling what gets output for untrusted input. (But really,
+	# you're filtering bad stuff out of untrusted input at submission-time via
+	# untainting, aren't you?)
+	attr_accessor :filter_html, :filter_styles
+
+	# RedCloth-compatibility accessor. Line-folding is part of Markdown syntax,
+	# so this isn't used by anything.
+	attr_accessor :fold_lines
+
+
+	### Render Markdown-formatted text in this string object as HTML and return
+	### it. The parameter is for compatibility with RedCloth, and is currently
+	### unused, though that may change in the future.
+	def to_html( lite=false )
+
+		# Create a StringScanner we can reuse for various lexing tasks
+		@scanner = StringScanner::new( '' )
+
+		# Make a structure to carry around stuff that gets placeholdered out of
+		# the source.
+		rs = RenderState::new( {}, {}, {} )
+
+		# Make a copy of the string with normalized line endings, tabs turned to
+		# spaces, and a couple of guaranteed newlines at the end
+		text = self.gsub( /\r\n?/, "\n" ).detab
+		text += "\n\n"
+		@log.debug "Normalized line-endings: %p" % text
+
+		# Filter HTML if we're asked to do so
+		if self.filter_html
+			text.gsub!( "<", "&lt;" )
+			text.gsub!( ">", "&gt;" )
+			@log.debug "Filtered HTML: %p" % text
+		end
+
+		# Simplify blank lines
+		text.gsub!( /^ +$/, '' )
+		@log.debug "Tabs -> spaces/blank lines stripped: %p" % text
+
+		# Replace HTML blocks with placeholders
+		text = hide_html_blocks( text, rs )
+		@log.debug "Hid HTML blocks: %p" % text
+		@log.debug "Render state: %p" % rs
+
+		# Strip link definitions, store in render state
+		text = strip_link_definitions( text, rs )
+		@log.debug "Stripped link definitions: %p" % text
+		@log.debug "Render state: %p" % rs
+
+		# Escape meta-characters
+		text = escape_special_chars( text )
+		@log.debug "Escaped special characters: %p" % text
+
+		# Transform block-level constructs
+		text = apply_block_transforms( text, rs )
+		@log.debug "After block-level transforms: %p" % text
+
+		# Now swap back in all the escaped characters
+		text = unescape_special_chars( text )
+		@log.debug "After unescaping special characters: %p" % text
+
+		return text
+	end
+	
+
+	### Convert tabs in +str+ to spaces.
+	def detab( tabwidth=TabWidth )
+		copy = self.dup
+		copy.detab!( tabwidth )
+		return copy
+	end
+
+
+	### Convert tabs to spaces in place and return self if any were converted.
+	def detab!( tabwidth=TabWidth )
+		newstr = self.split( /\n/ ).collect {|line|
+			line.gsub( /(.*?)\t/ ) do
+				$1 + ' ' * (tabwidth - $1.length % tabwidth)
+			end
+		}.join("\n")
+		self.replace( newstr )
+	end
+
+
+	#######
+	#private
+	#######
+
+	### Do block-level transforms on a copy of +str+ using the specified render
+	### state +rs+ and return the results.
+	def apply_block_transforms( str, rs )
+		# Port: This was called '_runBlockGamut' in the original
+
+		@log.debug "Applying block transforms to:\n  %p" % str
+		text = transform_headers( str, rs )
+		text = transform_hrules( text, rs )
+		text = transform_lists( text, rs )
+		text = transform_code_blocks( text, rs )
+		text = transform_block_quotes( text, rs )
+		text = transform_auto_links( text, rs )
+		text = hide_html_blocks( text, rs )
+
+		text = form_paragraphs( text, rs )
+
+		@log.debug "Done with block transforms:\n  %p" % text
+		return text
+	end
+
+
+	### Apply Markdown span transforms to a copy of the specified +str+ with the
+	### given render state +rs+ and return it.
+	def apply_span_transforms( str, rs )
+		@log.debug "Applying span transforms to:\n  %p" % str
+
+		str = transform_code_spans( str, rs )
+		str = encode_html( str )
+		str = transform_images( str, rs )
+		str = transform_anchors( str, rs )
+		str = transform_italic_and_bold( str, rs )
+
+		# Hard breaks
+		str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" )
+
+		@log.debug "Done with span transforms:\n  %p" % str
+		return str
+	end
+
+
+	# The list of tags which are considered block-level constructs and an
+	# alternation pattern suitable for use in regexps made from the list
+	StrictBlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script noscript
+		form fieldset iframe math ins del ]
+	StrictTagPattern = StrictBlockTags.join('|')
+
+	LooseBlockTags = StrictBlockTags - %w[ins del]
+	LooseTagPattern = LooseBlockTags.join('|')
+
+	# Nested blocks:
+	# 	<div>
+	# 		<div>
+	# 		tags for inner block must be indented.
+	# 		</div>
+	# 	</div>
+	StrictBlockRegex = %r{
+		^						# Start of line
+		<(#{StrictTagPattern})	# Start tag: \2
+		\b						# word break
+		(.*\n)*?				# Any number of lines, minimal match
+		</\1>					# Matching end tag
+		[ ]*					# trailing spaces
+		$						# End of line or document
+	  }ix
+
+	# More-liberal block-matching
+	LooseBlockRegex = %r{
+		^						# Start of line
+		<(#{LooseTagPattern})	# start tag: \2
+		\b						# word break
+		(.*\n)*?				# Any number of lines, minimal match
+		.*</\1>					# Anything + Matching end tag
+		[ ]*					# trailing spaces
+		$						# End of line or document
+	  }ix
+
+	# Special case for <hr />.
+	HruleBlockRegex = %r{
+		(						# $1
+			\A\n?				# Start of doc + optional \n
+			|					# or
+			.*\n\n				# anything + blank line
+		)
+		(						# save in $2
+			[ ]*				# Any spaces
+			<hr					# Tag open
+			\b					# Word break
+			([^<>])*?			# Attributes
+			/?>					# Tag close
+			$					# followed by a blank line or end of document
+		)
+	  }ix
+
+	### Replace all blocks of HTML in +str+ that start in the left margin with
+	### tokens.
+	def hide_html_blocks( str, rs )
+		@log.debug "Hiding HTML blocks in %p" % str
+		
+		# Tokenizer proc to pass to gsub
+		tokenize = lambda {|match|
+			key = Digest::MD5::hexdigest( match )
+			rs.html_blocks[ key ] = match
+			@log.debug "Replacing %p with %p" % [ match, key ]
+			"\n\n#{key}\n\n"
+		}
+
+		rval = str.dup
+
+		@log.debug "Finding blocks with the strict regex..."
+		rval.gsub!( StrictBlockRegex, &tokenize )
+
+		@log.debug "Finding blocks with the loose regex..."
+		rval.gsub!( LooseBlockRegex, &tokenize )
+
+		@log.debug "Finding hrules..."
+		rval.gsub!( HruleBlockRegex ) {|match| $1 + tokenize[$2] }
+
+		return rval
+	end
+
+
+	# Link defs are in the form: ^[id]: url "optional title"
+	LinkRegex = %r{
+		^[ ]*\[(.+)\]:		# id = $1
+		  [ ]*
+		  \n?				# maybe *one* newline
+		  [ ]*
+		<?(\S+?)>?				# url = $2
+		  [ ]*
+		  \n?				# maybe one newline
+		  [ ]*
+		(?:
+			# Titles are delimited by "quotes" or (parens).
+			["(]
+			(.+?)			# title = $3
+			[")]			# Matching ) or "
+			[ ]*
+		)?	# title is optional
+		(?:\n+|\Z)
+	  }x
+
+	### Strip link definitions from +str+, storing them in the given RenderState
+	### +rs+.
+	def strip_link_definitions( str, rs )
+		str.gsub( LinkRegex ) {|match|
+			id, url, title = $1, $2, $3
+
+			rs.urls[ id.downcase ] = encode_html( url )
+			unless title.nil?
+				rs.titles[ id.downcase ] = title.gsub( /"/, "&quot;" )
+			end
+			""
+		}
+	end
+
+
+	### Escape special characters in the given +str+
+	def escape_special_chars( str )
+		@log.debug "  Escaping special characters"
+		text = ''
+
+		# The original Markdown source has something called '$tags_to_skip'
+		# declared here, but it's never used, so I don't define it.
+
+		tokenize_html( str ) {|token, str|
+			@log.debug "   Adding %p token %p" % [ token, str ]
+			case token
+
+			# Within tags, encode * and _
+			when :tag
+				text += str.
+					gsub( /\*/, EscapeTable['*'][:md5] ).
+					gsub( /_/, EscapeTable['_'][:md5] )
+
+			# Encode backslashed stuff in regular text
+			when :text
+				text += encode_backslash_escapes( str )
+			else
+				raise TypeError, "Unknown token type %p" % token
+			end
+		}
+
+		@log.debug "  Text with escapes is now: %p" % text
+		return text
+	end
+
+
+	### Swap escaped special characters in a copy of the given +str+ and return
+	### it.
+	def unescape_special_chars( str )
+		EscapeTable.each {|char, hash|
+			@log.debug "Unescaping escaped %p with %p" % [ char, hash[:md5re] ]
+			str.gsub!( hash[:md5re], char )
+		}
+
+		return str
+	end
+
+
+	### Return a copy of the given +str+ with any backslashed special character
+	### in it replaced with MD5 placeholders.
+	def encode_backslash_escapes( str )
+		# Make a copy with any double-escaped backslashes encoded
+		text = str.gsub( /\\\\/, EscapeTable['\\'][:md5] )
+		
+		EscapeTable.each_pair {|char, esc|
+			next if char == '\\'
+			text.gsub!( esc[:re], esc[:md5] )
+		}
+
+		return text
+	end
+
+
+	### Transform any Markdown-style horizontal rules in a copy of the specified
+	### +str+ and return it.
+	def transform_hrules( str, rs )
+		@log.debug " Transforming horizontal rules"
+		str.gsub( /^( ?[\-\*_] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" )
+	end
+
+
+
+	# Patterns to match and transform lists
+	ListMarkerOl = %r{\d+\.}
+	ListMarkerUl = %r{[*+-]}
+	ListMarkerAny = Regexp::union( ListMarkerOl, ListMarkerUl )
+
+	ListRegexp = %r{
+		  (?:
+			^[ ]{0,#{TabWidth - 1}}		# Indent < tab width
+			(#{ListMarkerAny})			# unordered or ordered ($1)
+			[ ]+						# At least one space
+		  )
+		  (?m:.+?)						# item content (include newlines)
+		  (?:
+			  \z						# Either EOF
+			|							#  or
+			  \n{2,}					# Blank line...
+			  (?=\S)					# ...followed by non-space
+			  (?![ ]*					# ...but not another item
+				(#{ListMarkerAny})
+			   [ ]+)
+		  )
+	  }x
+
+	### Transform Markdown-style lists in a copy of the specified +str+ and
+	### return it.
+	def transform_lists( str, rs )
+		@log.debug " Transforming lists at %p" % (str[0,100] + '...')
+
+		str.gsub( ListRegexp ) {|list|
+			@log.debug "  Found list %p" % list
+			bullet = $1
+			list_type = (ListMarkerUl.match(bullet) ? "ul" : "ol")
+			list.gsub!( /\n{2,}/, "\n\n\n" )
+
+			%{<%s>\n%s</%s>\n} % [
+				list_type,
+				transform_list_items( list, rs ),
+				list_type,
+			]
+		}
+	end
+
+
+	# Pattern for transforming list items
+	ListItemRegexp = %r{
+		(\n)?							# leading line = $1
+		(^[ ]*)							# leading whitespace = $2
+		(#{ListMarkerAny}) [ ]+			# list marker = $3
+		((?m:.+?)						# list item text   = $4
+		(\n{1,2}))
+		(?= \n* (\z | \2 (#{ListMarkerAny}) [ ]+))
+	  }x
+
+	### Transform list items in a copy of the given +str+ and return it.
+	def transform_list_items( str, rs )
+		@log.debug " Transforming list items"
+
+		# Trim trailing blank lines
+		str = str.sub( /\n{2,}\z/, "\n" )
+
+		str.gsub( ListItemRegexp ) {|line|
+			@log.debug "  Found item line %p" % line
+			leading_line, item = $1, $4
+
+			if leading_line or /\n{2,}/.match( item )
+				@log.debug "   Found leading line or item has a blank"
+				item = apply_block_transforms( outdent(item), rs )
+			else
+				# Recursion for sub-lists
+				@log.debug "   Recursing for sublist"
+				item = transform_lists( outdent(item), rs ).chomp
+				item = apply_span_transforms( item, rs )
+			end
+
+			%{<li>%s</li>\n} % item
+		}
+	end
+
+
+	# Pattern for matching codeblocks
+	CodeBlockRegexp = %r{
+		(?:\n\n|\A)
+		(									# $1 = the code block
+		  (?:
+			(?:[ ]{#{TabWidth}} | \t)		# a tab or tab-width of spaces
+			.*\n+
+		  )+
+		)
+		(^[ ]{0,#{TabWidth - 1}}\S|\Z)		# Lookahead for non-space at
+											# line-start, or end of doc
+	  }x
+
+	### Transform Markdown-style codeblocks in a copy of the specified +str+ and
+	### return it.
+	def transform_code_blocks( str, rs )
+		@log.debug " Transforming code blocks"
+
+		str.gsub( CodeBlockRegexp ) {|block|
+			codeblock = $1
+			remainder = $2
+
+			# Generate the codeblock
+			%{\n\n<pre><code>%s\n</code></pre>\n\n%s} %
+				[ encode_code( outdent(codeblock), rs ).rstrip, remainder ]
+		}
+	end
+
+
+	# Pattern for matching Markdown blockquote blocks
+	BlockQuoteRegexp = %r{
+		  (?:
+			^[ ]*>[ ]?		# '>' at the start of a line
+			  .+\n			# rest of the first line
+			(?:.+\n)*		# subsequent consecutive lines
+			\n*				# blanks
+		  )+
+	  }x
+	PreChunk = %r{ ( ^ \s* <pre> .+? </pre> ) }xm
+
+	### Transform Markdown-style blockquotes in a copy of the specified +str+
+	### and return it.
+	def transform_block_quotes( str, rs )
+		@log.debug " Transforming block quotes"
+
+		str.gsub( BlockQuoteRegexp ) {|quote|
+			@log.debug "Making blockquote from %p" % quote
+
+			quote.gsub!( /^ *> ?/, '' ) # Trim one level of quoting 
+			quote.gsub!( /^ +$/, '' )	# Trim whitespace-only lines
+
+			indent = " " * TabWidth
+			quoted = %{<blockquote>\n%s\n</blockquote>\n\n} %
+				apply_block_transforms( quote, rs ).
+				gsub( /^/, indent ).
+				gsub( PreChunk ) {|m| m.gsub(/^#{indent}/o, '') }
+			@log.debug "Blockquoted chunk is: %p" % quoted
+			quoted
+		}
+	end
+
+
+	AutoAnchorURLRegexp = /<((https?|ftp):[^'">\s]+)>/
+	AutoAnchorEmailRegexp = %r{
+		<
+		(
+			[-.\w]+
+			\@
+			[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
+		)
+		>
+	  }xi
+
+	### Transform URLs in a copy of the specified +str+ into links and return
+	### it.
+	def transform_auto_links( str, rs )
+		@log.debug " Transforming auto-links"
+		str.gsub( AutoAnchorURLRegexp, %{<a href="\\1">\\1</a>}).
+			gsub( AutoAnchorEmailRegexp ) {|addr|
+			encode_email_address( unescape_special_chars($1) )
+		}
+	end
+
+
+	# Encoder functions to turn characters of an email address into encoded
+	# entities.
+	Encoders = [
+		lambda {|char| "&#%03d;" % char},
+		lambda {|char| "&#x%X;" % char},
+		lambda {|char| char.chr },
+	]
+
+	### Transform a copy of the given email +addr+ into an escaped version safer
+	### for posting publicly.
+	def encode_email_address( addr )
+
+		rval = ''
+		("mailto:" + addr).each_byte {|b|
+			case b
+			when ?:
+				rval += ":"
+			when ?@
+				rval += Encoders[ rand(2) ][ b ]
+			else
+				r = rand(100)
+				rval += (
+					r > 90 ? Encoders[2][ b ] :
+					r < 45 ? Encoders[1][ b ] :
+							 Encoders[0][ b ]
+				)
+			end
+		}
+
+		return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ]
+	end
+
+
+	# Regex for matching Setext-style headers
+	SetextHeaderRegexp = %r{
+		(.+)			# The title text ($1)
+		\n
+		([\-=])+		# Match a line of = or -. Save only one in $2.
+		[ ]*\n+
+	   }x
+
+	# Regexp for matching ATX-style headers
+	AtxHeaderRegexp = %r{
+		^(\#{1,6})	# $1 = string of #'s
+		[ ]*
+		(.+?)		# $2 = Header text
+		[ ]*
+		\#*			# optional closing #'s (not counted)
+		\n+
+	  }x
+
+	### Apply Markdown header transforms to a copy of the given +str+ amd render
+	### state +rs+ and return the result.
+	def transform_headers( str, rs )
+		@log.debug " Transforming headers"
+
+		# Setext-style headers:
+		#	  Header 1
+		#	  ========
+		#  
+		#	  Header 2
+		#	  --------
+		#
+		str.
+			gsub( SetextHeaderRegexp ) {|m|
+				@log.debug "Found setext-style header"
+				title, hdrchar = $1, $2
+				title = apply_span_transforms( title, rs )
+
+				case hdrchar
+				when '='
+					%[<h1>#{title}</h1>\n\n]
+				when '-'
+					%[<h2>#{title}</h2>\n\n]
+				else
+					title
+				end
+			}.
+
+			gsub( AtxHeaderRegexp ) {|m|
+				@log.debug "Found ATX-style header"
+				hdrchars, title = $1, $2
+				title = apply_span_transforms( title, rs )
+
+				level = hdrchars.length
+				%{<h%d>%s</h%d>\n\n} % [ level, title, level ]
+			}
+	end
+
+
+	### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p>
+	### tags and return it.
+	def form_paragraphs( str, rs )
+		@log.debug " Forming paragraphs"
+		grafs = str.
+			sub( /\A\n+/, '' ).
+			sub( /\n+\z/, '' ).
+			split( /\n{2,}/ )
+
+		rval = grafs.collect {|graf|
+
+			# Unhashify HTML blocks if this is a placeholder
+			if rs.html_blocks.key?( graf )
+				rs.html_blocks[ graf ]
+
+			# Otherwise, wrap in <p> tags
+			else
+				apply_span_transforms(graf, rs).
+					sub( /^[ ]*/, '<p>' ) + '</p>'
+			end
+		}.join( "\n\n" )
+
+		@log.debug " Formed paragraphs: %p" % rval
+		return rval
+	end
+
+
+	# Pattern to match the linkid part of an anchor tag for reference-style
+	# links.
+	RefLinkIdRegex = %r{
+		[ ]?					# Optional leading space
+		(?:\n[ ]*)?				# Optional newline + spaces
+		\[
+			(.*?)				# Id = $1
+		\]
+	  }x
+
+	InlineLinkRegex = %r{
+		\(						# Literal paren
+			[ ]*				# Zero or more spaces
+			<?(.+?)>?			# URI = $1
+			[ ]*				# Zero or more spaces
+			(?:					# 
+				([\"\'])		# Opening quote char = $2
+				(.*?)			# Title = $3
+				\2				# Matching quote char
+			)?					# Title is optional
+		\)
+	  }x
+
+	### Apply Markdown anchor transforms to a copy of the specified +str+ with
+	### the given render state +rs+ and return it.
+	def transform_anchors( str, rs )
+		@log.debug " Transforming anchors"
+		@scanner.string = str.dup
+		text = ''
+
+		# Scan the whole string
+		until @scanner.empty?
+		
+			if @scanner.scan( /\[/ )
+				link = ''; linkid = ''
+				depth = 1
+				startpos = @scanner.pos
+				@log.debug " Found a bracket-open at %d" % startpos
+
+				# Scan the rest of the tag, allowing unlimited nested []s. If
+				# the scanner runs out of text before the opening bracket is
+				# closed, append the text and return (wasn't a valid anchor).
+				while depth.nonzero?
+					linktext = @scanner.scan_until( /\]|\[/ )
+
+					if linktext
+						@log.debug "  Found a bracket at depth %d: %p" % [ depth, linktext ]
+						link += linktext
+
+						# Decrement depth for each closing bracket
+						depth += ( linktext[-1, 1] == ']' ? -1 : 1 )
+						@log.debug "  Depth is now #{depth}"
+
+					# If there's no more brackets, it must not be an anchor, so
+					# just abort.
+					else
+						@log.debug "  Missing closing brace, assuming non-link."
+						link += @scanner.rest
+						@scanner.terminate
+						return text + '[' + link
+					end
+				end
+				link.slice!( -1 ) # Trim final ']'
+				@log.debug " Found leading link %p" % link
+
+				# Look for a reference-style second part
+				if @scanner.scan( RefLinkIdRegex )
+					linkid = @scanner[1]
+					linkid = link.dup if linkid.empty?
+					linkid.downcase!
+					@log.debug "  Found a linkid: %p" % linkid
+
+					# If there's a matching link in the link table, build an
+					# anchor tag for it.
+					if rs.urls.key?( linkid )
+						@log.debug "   Found link key in the link table: %p" % rs.urls[linkid]
+						url = escape_md( rs.urls[linkid] )
+
+						text += %{<a href="#{url}"}
+						if rs.titles.key?(linkid)
+							text += %{ title="%s"} % escape_md( rs.titles[linkid] )
+						end
+						text += %{>#{link}</a>}
+
+					# If the link referred to doesn't exist, just append the raw
+					# source to the result
+					else
+						@log.debug "  Linkid %p not found in link table" % linkid
+						@log.debug "  Appending original string instead: "
+						@log.debug "%p" % @scanner.string[ startpos-1 .. @scanner.pos-1 ]
+						text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
+					end
+
+				# ...or for an inline style second part
+				elsif @scanner.scan( InlineLinkRegex )
+					url = @scanner[1]
+					title = @scanner[3]
+					@log.debug "  Found an inline link to %p" % url
+
+					text += %{<a href="%s"} % escape_md( url )
+					if title
+						title.gsub!( /"/, "&quot;" )
+						text += %{ title="%s"} % escape_md( title )
+					end
+					text += %{>#{link}</a>}
+
+				# No linkid part: just append the first part as-is.
+				else
+					@log.debug "No linkid, so no anchor. Appending literal text."
+					text += @scanner.string[ startpos-1 .. @scanner.pos-1 ]
+				end # if linkid
+
+			# Plain text
+			else
+				@log.debug " Scanning to the next link from %p" % @scanner.rest
+				text += @scanner.scan( /[^\[]+/ )
+			end
+
+		end # until @scanner.empty?
+
+		return text
+	end
+
+
+	# Pattern to match strong emphasis in Markdown text
+	BoldRegexp = %r{ (\*\*|__) (\S|\S.+?\S) \1 }x
+
+	# Pattern to match normal emphasis in Markdown text
+	ItalicRegexp = %r{ (\*|_) (\S|\S.+?\S) \1 }x
+
+	### Transform italic- and bold-encoded text in a copy of the specified +str+
+	### and return it.
+	def transform_italic_and_bold( str, rs )
+		@log.debug " Transforming italic and bold"
+
+		str.
+			gsub( BoldRegexp, %{<strong>\\2</strong>} ).
+			gsub( ItalicRegexp, %{<em>\\2</em>} )
+	end
+
+	
+	### Transform backticked spans into <code> spans.
+	def transform_code_spans( str, rs )
+		@log.debug " Transforming code spans"
+
+		# Set up the string scanner and just return the string unless there's at
+		# least one backtick.
+		@scanner.string = str.dup
+		unless @scanner.exist?( /`/ )
+			@scanner.terminate
+			@log.debug "No backticks found for code span in %p" % str
+			return str
+		end
+
+		@log.debug "Transforming code spans in %p" % str
+
+		# Build the transformed text anew
+		text = ''
+
+		# Scan to the end of the string
+		until @scanner.empty?
+
+			# Scan up to an opening backtick
+			if pre = @scanner.scan_until( /.?(?=`)/m )
+				text += pre
+				@log.debug "Found backtick at %d after '...%s'" % [ @scanner.pos, text[-10, 10] ]
+
+				# Make a pattern to find the end of the span
+				opener = @scanner.scan( /`+/ )
+				len = opener.length
+				closer = Regexp::new( opener )
+				@log.debug "Scanning for end of code span with %p" % closer
+
+				# Scan until the end of the closing backtick sequence. Chop the
+				# backticks off the resultant string, strip leading and trailing
+				# whitespace, and encode any enitites contained in it.
+				codespan = @scanner.scan_until( closer ) or
+					raise FormatError::new( @scanner.rest[0,20],
+						"No %p found before end" % opener )
+
+				@log.debug "Found close of code span at %d: %p" % [ @scanner.pos - len, codespan ]
+				codespan.slice!( -len, len )
+				text += "<code>%s</code>" %
+					encode_code( codespan.strip, rs )
+
+			# If there's no more backticks, just append the rest of the string
+			# and move the scan pointer to the end
+			else
+				text += @scanner.rest
+				@scanner.terminate
+			end
+		end
+
+		return text
+	end
+
+
+	# Next, handle inline images:  ![alt text](url "optional title")
+	# Don't forget: encode * and _
+	InlineImageRegexp = %r{
+		(					# Whole match = $1
+			!\[ (.*?) \]	# alt text = $2
+		  \([ ]*
+			<?(\S+?)>?		# source url = $3
+		    [ ]*
+			(?:				# 
+			  (["'])		# quote char = $4
+			  (.*?)			# title = $5
+			  \4			# matching quote
+			  [ ]*
+			)?				# title is optional
+		  \)
+		)
+	  }xs #"
+
+
+	# Reference-style images
+	ReferenceImageRegexp = %r{
+		(					# Whole match = $1
+			!\[ (.*?) \]	# Alt text = $2
+			[ ]?			# Optional space
+			(?:\n[ ]*)?		# One optional newline + spaces
+			\[ (.*?) \]		# id = $3
+		)
+	  }xs
+
+	### Turn image markup into image tags.
+	def transform_images( str, rs )
+		@log.debug " Transforming images" % str
+
+		# Handle reference-style labeled images: ![alt text][id]
+		str.
+			gsub( ReferenceImageRegexp ) {|match|
+				whole, alt, linkid = $1, $2, $3.downcase
+				@log.debug "Matched %p" % match
+				res = nil
+				alt.gsub!( /"/, '&quot;' )
+
+				# for shortcut links like ![this][].
+				linkid = alt.downcase if linkid.empty?
+
+				if rs.urls.key?( linkid )
+					url = escape_md( rs.urls[linkid] )
+					@log.debug "Found url '%s' for linkid '%s' " % [ url, linkid ]
+
+					# Build the tag
+					result = %{<img src="%s" alt="%s"} % [ url, alt ]
+					if rs.titles.key?( linkid )
+						result += %{ title="%s"} % escape_md( rs.titles[linkid] )
+					end
+					result += EmptyElementSuffix
+
+				else
+					result = whole
+				end
+
+				@log.debug "Replacing %p with %p" % [ match, result ]
+				result
+			}.
+
+			# Inline image style
+			gsub( InlineImageRegexp ) {|match|
+				@log.debug "Found inline image %p" % match
+				whole, alt, title = $1, $2, $5
+				url = escape_md( $3 )
+				alt.gsub!( /"/, '&quot;' )
+
+				# Build the tag
+				result = %{<img src="%s" alt="%s"} % [ url, alt ]
+				unless title.nil?
+					title.gsub!( /"/, '&quot;' )
+					result += %{ title="%s"} % escape_md( title )
+				end
+				result += EmptyElementSuffix
+
+				@log.debug "Replacing %p with %p" % [ match, result ]
+				result
+			}
+	end
+
+
+	# Regexp to match special characters in a code block
+	CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] | \\ )}x
+
+	### Escape any characters special to HTML and encode any characters special
+	### to Markdown in a copy of the given +str+ and return it.
+	def encode_code( str, rs )
+		str.gsub( %r{&}, '&amp;' ).
+			gsub( %r{<}, '&lt;' ).
+			gsub( %r{>}, '&gt;' ).
+			gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]}
+	end
+				
+
+
+	#################################################################
+	###	U T I L I T Y   F U N C T I O N S
+	#################################################################
+
+	### Escape any markdown characters in a copy of the given +str+ and return
+	### it.
+	def escape_md( str )
+		str.
+			gsub( /\*/, EscapeTable['*'][:md5] ).
+			gsub( /_/,  EscapeTable['_'][:md5] )
+	end
+
+
+	# Matching constructs for tokenizing X/HTML
+	HTMLCommentRegexp  = %r{ <! ( -- .*? -- \s* )+ > }mx
+	XMLProcInstRegexp  = %r{ <\? .*? \?> }mx
+	MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp )
+
+	HTMLTagOpenRegexp  = %r{ < [a-z/!$] [^<>]* }imx
+	HTMLTagCloseRegexp = %r{ > }x
+	HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp )
+
+	### Break the HTML source in +str+ into a series of tokens and return
+	### them. The tokens are just 2-element Array tuples with a type and the
+	### actual content. If this function is called with a block, the type and
+	### text parts of each token will be yielded to it one at a time as they are
+	### extracted.
+	def tokenize_html( str )
+		depth = 0
+		tokens = []
+		@scanner.string = str.dup
+		type, token = nil, nil
+
+		until @scanner.empty?
+			@log.debug "Scanning from %p" % @scanner.rest
+
+			# Match comments and PIs without nesting
+			if (( token = @scanner.scan(MetaTag) ))
+				type = :tag
+
+			# Do nested matching for HTML tags
+			elsif (( token = @scanner.scan(HTMLTagOpenRegexp) ))
+				tagstart = @scanner.pos
+				@log.debug " Found the start of a plain tag at %d" % tagstart
+
+				# Start the token with the opening angle
+				depth = 1
+				type = :tag
+
+				# Scan the rest of the tag, allowing unlimited nested <>s. If
+				# the scanner runs out of text before the tag is closed, raise
+				# an error.
+				while depth.nonzero?
+
+					# Scan either an opener or a closer
+					chunk = @scanner.scan( HTMLTagPart ) or
+						raise "Malformed tag at character %d: %p" % 
+							[ tagstart, token + @scanner.rest ]
+						
+					@log.debug "  Found another part of the tag at depth %d: %p" % [ depth, chunk ]
+
+					token += chunk
+
+					# If the last character of the token so far is a closing
+					# angle bracket, decrement the depth. Otherwise increment
+					# it for a nested tag.
+					depth += ( token[-1, 1] == '>' ? -1 : 1 )
+					@log.debug "  Depth is now #{depth}"
+				end
+
+			# Match text segments
+			else
+				@log.debug " Looking for a chunk of text"
+				type = :text
+
+				# Scan forward, always matching at least one character to move
+				# the pointer beyond any non-tag '<'.
+				token = @scanner.scan_until( /[^<]+/m )
+			end
+
+			@log.debug " type: %p, token: %p" % [ type, token ]
+
+			# If a block is given, feed it one token at a time. Add the token to
+			# the token list to be returned regardless.
+			if block_given?
+				yield( type, token )
+			end
+			tokens << [ type, token ]
+		end
+
+		return tokens
+	end
+
+
+	### Return a copy of +str+ with angle brackets and ampersands HTML-encoded.
+	def encode_html( str )
+		str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&amp;" ).
+			gsub( %r{<(?![a-z/?\$!])}i, "&lt;" )
+	end
+
+	
+	### Return one level of line-leading tabs or spaces from a copy of +str+ and
+	### return it.
+	def outdent( str )
+		str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '')
+	end
+	
+end # class BlueCloth
+