diff options
author | Christian Neukirchen <chneukirchen@gmail.com> | 2008-09-17 16:31:21 +0200 |
---|---|---|
committer | Christian Neukirchen <chneukirchen@gmail.com> | 2008-09-17 16:31:21 +0200 |
commit | 8d669d2263bdcfb870a0f734de9fe7c76e2ec266 (patch) | |
tree | f3b88744cde2aaf17c3a04decbc990d296c02dc1 | |
download | trivium-8d669d2263bdcfb870a0f734de9fe7c76e2ec266.tar.gz trivium-8d669d2263bdcfb870a0f734de9fe7c76e2ec266.tar.xz trivium-8d669d2263bdcfb870a0f734de9fe7c76e2ec266.zip |
Initial import of Trivium
-rw-r--r-- | data/style.css | 155 | ||||
-rw-r--r-- | data/trivium.png | bin | 0 -> 4712 bytes | |||
-rw-r--r-- | template/all.ht | 37 | ||||
-rw-r--r-- | template/atom.ht | 39 | ||||
-rw-r--r-- | template/front.ht | 49 | ||||
-rw-r--r-- | template/monthly.ht | 63 | ||||
-rw-r--r-- | template/single.ht | 51 | ||||
-rw-r--r-- | trivium.rb | 158 | ||||
-rw-r--r-- | vendor/bluecloth.rb | 1144 | ||||
-rw-r--r-- | vendor/htemplate.rb | 68 | ||||
-rw-r--r-- | vendor/rubypants.rb | 490 |
11 files changed, 2254 insertions, 0 deletions
diff --git a/data/style.css b/data/style.css new file mode 100644 index 0000000..296ba6a --- /dev/null +++ b/data/style.css @@ -0,0 +1,155 @@ +body { + font: 12px/16px Helvetica, Arial, sans-serif; + width: 40em; + margin: 2.5em 3em; + position: relative; +} + + +h1 { + width: 141px; + height: 64px; + margin: 0; + padding: 0 0 8px 0; +} + +h2 { + font: 26px/16px Helvetica, Arial, sans-serif; + margin: 0 2px 0 -2px; + letter-spacing: -1.5px; + display: inline; + position: relative; +} + +h3 { + font: 12px/16px Helvetica, Arial, sans-serif; + display: inline; + font-weight: bold; +} + + +h2 a, address a { + color: black; + text-decoration: none; +} + +h2 abbr { + border: none; + text-decoration: none; +} + +.entry-content a, .nav a, .all { + text-decoration: none; + xcolor: #cade0d; + color: #69CE0D; + background-color: #EBFBCF; + padding: 0px 1px; +} + +.entry-content a:hover, .nav a:hover, .all:hover, address a:hover { + text-decoration: underline; +} + + +.hentry { + margin-top: 12px; +} + +p { + margin: 0; + padding: 0; +} + +.entry-content { + display: inline; +} + +.entry-content > p + p { + text-indent: 1em; +} + +.entry-content p:first-child, +h3 + p { + display: inline; +} + +.entry-content > p + h3 { + margin-left: 1em; +} + +address { + text-align: right; + margin: 3em 0; + font-size: 11px; +} + + +.quick p:first-child { + margin-left: 1em; +} + +.quick p { + display: inline; +} + +.quick p + p:before { + content: "\2215"; + padding: 0 0.75em 0 0.5em; +} + + +blockquote { + font-style: italic; + margin-left: 1em; +} + +blockquote .source { + display: block; + margin-left: 2em; + font-style: normal; + text-transform: uppercase; + font-size: 11px; +} + +img { + border: none; +} + +pre { + margin-left: 1em; + font-size: 11px; +} + + +img.inline-math { + position: relative; + top: 5px; + margin-top: -100px; +} + +.math { + text-align: center; + margin: 0.5em 0em; +} + + +.nav { + font-size: 12px; + display: block; +} + +.top.nav { + position: absolute; + right: 0; + top: 35px; +} + +.bot.nav { + text-align: right; + margin: 2em 0 -1.5em 0; +} + +.nav a { + margin-left: 0.5em; + padding: 2px 4px; +} diff --git a/data/trivium.png b/data/trivium.png new file mode 100644 index 0000000..3df518a --- /dev/null +++ b/data/trivium.png Binary files differdiff --git a/template/all.ht b/template/all.ht new file mode 100644 index 0000000..39d91cb --- /dev/null +++ b/template/all.ht @@ -0,0 +1,37 @@ +<!DOCTYPE html> +<html> + <head> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> + <title>Trivium: grammar, logic, rhetoric</title> + <link rel="stylesheet" href="style.css"> + </head> + <body> + +<div class="main"> +<div class="wrapper"> +<h1><span class="title"> +<a href="./">Trivium: grammar, logic, rhetoric</a></span> +<span class="author">by +<a href="http://chneukirchen.org/">Christian Neukirchen</a></span></h1> + +$ self[:entries].each { |entry| + +<h2> +$# class="entry-title"> + <a href="${entry[:id]}" rel="bookmark"> + <abbr class="published" title="${entry[:date].iso8601}"> + ${entry[:title]} + </abbr> + </a> +</h2> + +$ } + +<address class="author vcard"> +Copyright © 2008 <a class="email fn" href="mailto:chneukirchen@gmail.com">Christian Neukirchen</a> +</address> + +</div> +</div> +</body> +</html> diff --git a/template/atom.ht b/template/atom.ht new file mode 100644 index 0000000..f7d262d --- /dev/null +++ b/template/atom.ht @@ -0,0 +1,39 @@ +<?xml version="1.0" encoding="utf-8"?> +<feed xmlns="http://www.w3.org/2005/Atom"> + <title type="text">Trivium: grammar, logic, rhetoric</title> + <link rel="alternate" type="text/html" + href="http://chneukirchen.org/trivium/" /> + <link rel="self" href="http://chneukirchen.org/trivium/index.atom" /> + <author> + <name>Christian Neukirchen</name> + <uri>http://chneukirchen.org/</uri> + <email>chneukirchen@gmail.com</email> + </author> + <id>tag:chneukirchen.org,2008:trivium-feed</id> + <generator version="0.1">trivium.rb</generator> + <rights type="xhtml"> + <div xmlns="http://www.w3.org/1999/xhtml"> + Copyright © 2008 Christian Neukirchen, chneukirchen@gmail.com +Verbatim copying is permitted as long as this message is preserved. + </div> + </rights> + <updated>${self[:time].iso8601}</updated> +$ self[:entries].each { |entry| + <entry> + <title>${entry[:title]}</title> + <author> + <name>Christian Neukirchen</name> + <uri>http://chneukirchen.org/</uri> + <email>chneukirchen@gmail.com</email> + </author> + <link rel="alternate" type="text/html" + href="http://chneukirchen.org/trivium/${entry[:id]}" /> + <id>tag:chneukirchen.org,2008:trivium-${entry[:id]}</id> + <updated>${entry[:date].iso8601}</updated> + <published>${self[:time].iso8601}</published> + <content type="html"> +${format(entry)} + </content> + </entry> +$ } +</feed> diff --git a/template/front.ht b/template/front.ht new file mode 100644 index 0000000..45e2fc1 --- /dev/null +++ b/template/front.ht @@ -0,0 +1,49 @@ +<!DOCTYPE html> +<html> + <head> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> + <title>Trivium</title> + <link rel="stylesheet" href="style.css"> + <link rel="alternate" type="application/atom+xml" title="Atom" href="http://chneukirchen.org/trivium/index.atom"> + </head> + <body> + +<div class="hfeed"> +<h1><a href="./"><img src="trivium.png" alt="Trivium"></a></h1> + +<small class="top nav"> + $ if self[:entries].last[:prev_by_month] +<a class="prev" href="${self[:entries].last[:prev_by_month]}">« Previously</a> +$ end +</small> + +$ self[:entries].each { |entry| + +<div class="hentry"> +<h2 class="entry-title"> + <a href="${entry[:id]}" rel="bookmark"> + <abbr class="published" title="${entry[:date].iso8601}"> + ${entry[:title]} + </abbr> + </a> +</h2> +<div class="entry-content"> +$:{format entry} +</div> +</div> + +$ } + +<small class="bot nav"> + $ if self[:entries].last[:prev_by_month] +<a class="prev" href="${self[:entries].last[:prev_by_month]}">« Previously</a> +$ end +</small> + +<address class="author vcard"> +Copyright © 2008 <a class="email fn" href="mailto:chneukirchen@gmail.com">Christian Neukirchen</a> +</address> + +</div> +</body> +</html> diff --git a/template/monthly.ht b/template/monthly.ht new file mode 100644 index 0000000..e852b5f --- /dev/null +++ b/template/monthly.ht @@ -0,0 +1,63 @@ +<!DOCTYPE html> +<html> + <head> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> + <title>Trivium: ${self[:month]}</title> +$ if self[:entries].first[:prev_by_month] + <link rel="prev" href="${self[:entries].first[:prev_by_month]}"> +$ end +$ if self[:entries].first[:next_by_month] + <link rel="next" href="${self[:entries].first[:next_by_month]}"> +$ end + <link rel="stylesheet" href="style.css"> + <link rel="alternate" type="application/atom+xml" title="Atom" href="http://chneukirchen.org/trivium/index.atom"> + </head> + <body> + +<div class="hfeed"> +<h1><a href="./"><img src="trivium.png" alt="Trivium"></a></h1> + +<small class="top nav"> +$ if self[:entries].first[:prev_by_month] +<a class="prev" href="${self[:entries].first[:prev_by_month]}">« ${Time.parse(self[:entries].first[:prev_by_month]).strftime("%B %Y")}</a> +$ end + +$ if self[:entries].first[:next_by_month] +<a class="next" href="${self[:entries].first[:next_by_month]}">${Time.parse(self[:entries].first[:next_by_month]).strftime("%B %Y")} »</a> +$ end +</small> + +$ self[:entries].each { |entry| +<div class="hentry"> +<h2 class="entry-title"> + <a href="${entry[:id]}" rel="bookmark"> + <abbr class="published" title="${entry[:date].iso8601}"> + ${entry[:title]} + </abbr> + </a> +</h2> + +<div class="entry-content"> +$:{format entry} +</div> +</div> +$ } + +<small class="bot nav"> +$ if self[:entries].first[:prev_by_month] +<a class="prev" href="${self[:entries].first[:prev_by_month]}">« ${Time.parse(self[:entries].first[:prev_by_month]).strftime("%B %Y")}</a> +$ end + +$ if self[:entries].first[:next_by_month] +<a class="next" href="${self[:entries].first[:next_by_month]}">${Time.parse(self[:entries].first[:next_by_month]).strftime("%B %Y")} »</a> +$ end +</small> + +<address class="author vcard"> +Copyright © 2008 <a class="email fn" href="mailto:chneukirchen@gmail.com">Christian Neukirchen</a> +</address> + +</div> +</body> +</html> + diff --git a/template/single.ht b/template/single.ht new file mode 100644 index 0000000..3c21fa0 --- /dev/null +++ b/template/single.ht @@ -0,0 +1,51 @@ +<!DOCTYPE html> +<html> + <head> + <meta http-equiv="Content-Type" content="text/html; charset=utf-8"> + <title>Trivium: ${self[:title]}</title> +$ if self[:prev_by_date] + <link rel="prev" href="${self[:prev_by_date]}"> +$ end +$ if self[:next_by_date] + <link rel="next" href="${self[:next_by_date]}"> +$ end + <link rel="stylesheet" href="style.css"> + <link rel="alternate" type="application/atom+xml" title="Atom" href="http://chneukirchen.org/trivium/index.atom"> + </head> + <body> + +<div class="hfeed"> +<h1><a href="./"><img src="trivium.png" alt="Trivium"></a></h1> + +<small class="top nav"> +$ if l=Entry[self[:prev_by_date]] +<a class="prev" href="${l[:id]}">« ${l[:title]}</a> +$ end + +$ if l=Entry[self[:next_by_date]] +<a class="next" href="${l[:id]}">${l[:title]} »</a> +$ end +</small> + +<div class="hentry"> +<h2 class="entry-title"> + <a href="${self[:id]}" rel="bookmark"> + <abbr class="published" title="${self[:date].iso8601}"> + ${self[:title]} + </abbr> + </a> +</h2> + +<div class="entry-content"> +$:{format self} +</div> +</div> + +<address class="author vcard"> +Copyright © 2008 <a class="email fn" href="mailto:chneukirchen@gmail.com">Christian Neukirchen</a> +</address> + +</div> +</body> +</html> + diff --git a/trivium.rb b/trivium.rb new file mode 100644 index 0000000..23d76a8 --- /dev/null +++ b/trivium.rb @@ -0,0 +1,158 @@ +require 'time'; require 'cgi' +$: << "vendor" +require 'bluecloth'; require 'rubypants'; require 'htemplate' +BlueCloth::EmptyElementSuffix.replace(">") + +Dir.mkdir("html") rescue true + +def File.write(name, content) + File.open(name, "wb") { |out| out << content } + puts name +end + +def dep(dst, *srcs) + yield dst unless srcs.all? { |src| + (File.mtime(src) < File.mtime(dst) rescue false) } +end + +def parse(f) + head, body = File.read(f).split("\n\n", 2) rescue (return nil) + entry = {:body => body, :id => File.basename(f, ".entry"), :file => f} + head.scan(/(\w+):\s*(.*)/) { entry[$1.downcase.to_sym] = $2 } + entry[:date] = Time.parse(entry[:date]) if entry[:date] + entry[:title] = entry[:date].strftime("%d%b%Y").downcase if entry[:date] + entry +end + +Entry = Hash.new { |h, k| h[k] = parse k } +ENTRIES = Dir.glob("entries/*.entry").map { |x| Entry[x] }. + sort_by { |f| f[:date] }.reverse +Entry.values.each { |v| Entry[v[:id]] = v } + +class SpanBlueCloth < BlueCloth + def apply_block_transforms(text, rs) + text # we don't do blocks + end +end + +class InlineMath < String + MATH_TEX = 'http://vuxu.org/~chris/mathtex/mathtex.cgi?' + + CGI.escape('\textstyle{}\usepackage{color}\color{white}\rule[-0.333em]{0.01pt}{1.2em}\color{black}') + + def to_html + gsub(/\$(.*?)\$/) { + html = CGI.escapeHTML($1) + formula = CGI.escape($1).gsub('+', '%20') + %{<img class="inline-math" alt="#{html}" src="#{MATH_TEX}#{formula}">} + } + end +end + +class Dots < String + MATH_TEX = "http://vuxu.org/~chris/mathtex/mathtex.cgi?" + + def to_html + gsub(/^\.(\w+)([^\n]*?)\n(.*?)^\.\1\.$/m) { + name, args, body = $1, $2, $3 + case name + when "link" + title, desc = body.split("|", 2) + # why does bluecloth need those div? + %{<p class="link"><span><a href="#{args.strip}">#{title.strip}</a>#{SpanBlueCloth.new(desc).to_html}</span></p>} + when "quote" + if args.strip.empty? + src = "" + else + src = %{<span class="source">— #{args.strip}</span>} + end + %{<div class="quote">#{BlueCloth.new((body + src).gsub(/^.*$/, '> \& ')).to_html}</div>} + when "math" + body << "\\eqno{#{args.strip}}" unless args.strip.empty? + %{<div class="math"><img alt="#{CGI.escapeHTML body}" src="#{MATH_TEX}#{CGI.escape(body).gsub('+', '%20')}"></div>} + else + %{<div class="#{name}">#{BlueCloth.new(Dots.new(body).to_html).to_html}</div>} + end + } + end +end + +def format(e) + RubyPants.new( + BlueCloth.new( + Dots.new( + InlineMath.new( + e[:body] + ).to_html).to_html).to_html).to_html +end + +def template(template, data) + HTemplate.new(File.read(template), template).expand(data) +end + +def group(entries, &block) + r = {}; entries.each { |e| (r[block[e]] ||= []) << e }; r +end + +def inner_sort(group, &block) + group.each { |key, entries| entries.sort_by(&block) } +end + +def outer_sort(group, &block) + group.sort_by(&block) +end + +def chain(group, name) + group.each_with_index { |(key, entries), i| + entries.each { |e| + e[:"next_by_#{name}"] = group[i+1][0] if group[i+1] + e[:"prev_by_#{name}"] = group[i-1][0] if i > 0 + } + } +end + +def deps(e) + [e[:id], e[:next_by_date], e[:prev_by_date]].compact.map {|z| Entry[z][:file] } +end + +single_by_date = group(ENTRIES) { |e| e[:id] } +single_by_date = outer_sort(single_by_date) { |k,e| e.first[:date] } +chain(single_by_date, "date") + +single_by_date.each { |date, entries| + entry = entries.first + dep "html/#{entry[:id]}.html", "template/single.ht", *deps(entry) do |dst| + File.write(dst, template("template/single.ht", entry)) + end +} + +monthly = group(ENTRIES) { |e| e[:date].strftime("%Y-%m") } +inner_sort(monthly) { |e| e[:date] } +monthly = outer_sort(monthly) { |k,e| e.first[:date] } +chain(monthly, "month") + +monthly.each { |month, entries| + entry = entries.first + dep "html/#{month}.html", "template/monthly.ht", *deps(entry) do |dst| + File.write(dst, template("template/monthly.ht", :entries => entries, :month => month)) + end +} + +front = ENTRIES.first(2) #(10) +d = front.map { |e| e[:file] } +dep "html/index.html", "template/front.ht", *d do |dst| + File.write(dst, template("template/front.ht", :entries => front, :next => monthly.last)) +end + +feed = ENTRIES.first(20) +d = feed.map { |e| e[:file] } +dep "html/index.atom", "template/atom.ht", *d do |dst| + File.write(dst, template("template/atom.ht", + :entries => feed, :time => Time.now)) +end + +d = ENTRIES.map { |e| e[:file] } +dep "html/all.html", "template/all.ht", *d do |dst| + File.write(dst, template("template/all.ht", :entries => ENTRIES)) +end + +system "rsync -r data/ html" diff --git a/vendor/bluecloth.rb b/vendor/bluecloth.rb new file mode 100644 index 0000000..96266f2 --- /dev/null +++ b/vendor/bluecloth.rb @@ -0,0 +1,1144 @@ +#!/usr/bin/ruby +# +# Bluecloth is a Ruby implementation of Markdown, a text-to-HTML conversion +# tool. +# +# == Synopsis +# +# doc = BlueCloth::new " +# ## Test document ## +# +# Just a simple test. +# " +# +# puts doc.to_html +# +# == Authors +# +# * Michael Granger <ged@FaerieMUD.org> +# +# == Contributors +# +# * Martin Chase <stillflame@FaerieMUD.org> - Peer review, helpful suggestions +# * Florian Gross <flgr@ccan.de> - Filter options, suggestions +# +# == Copyright +# +# Original version: +# Copyright (c) 2003-2004 John Gruber +# <http://daringfireball.net/> +# All rights reserved. +# +# Ruby port: +# Copyright (c) 2004 The FaerieMUD Consortium. +# +# BlueCloth is free software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any later +# version. +# +# BlueCloth is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR +# A PARTICULAR PURPOSE. See the GNU General Public License for more details. +# +# == To-do +# +# * Refactor some of the larger uglier methods that have to do their own +# brute-force scanning because of lack of Perl features in Ruby's Regexp +# class. Alternately, could add a dependency on 'pcre' and use most Perl +# regexps. +# +# * Put the StringScanner in the render state for thread-safety. +# +# == Version +# +# $Id: bluecloth.rb 69 2004-08-25 05:27:15Z ged $ +# + +require 'digest/md5' +require 'logger' +require 'strscan' + + +### BlueCloth is a Ruby implementation of Markdown, a text-to-HTML conversion +### tool. +class BlueCloth < String + + ### Exception class for formatting errors. + class FormatError < RuntimeError + + ### Create a new FormatError with the given source +str+ and an optional + ### message about the +specific+ error. + def initialize( str, specific=nil ) + if specific + msg = "Bad markdown format near %p: %s" % [ str, specific ] + else + msg = "Bad markdown format near %p" % str + end + + super( msg ) + end + end + + + # Release Version + Version = '0.0.3' + + # SVN Revision + SvnRev = %q$Rev: 69 $ + + # SVN Id tag + SvnId = %q$Id: bluecloth.rb 69 2004-08-25 05:27:15Z ged $ + + # SVN URL + SvnUrl = %q$URL: svn+ssh://svn.faeriemud.org/usr/local/svn/BlueCloth/trunk/lib/bluecloth.rb $ + + + # Rendering state struct. Keeps track of URLs, titles, and HTML blocks + # midway through a render. I prefer this to the globals of the Perl version + # because globals make me break out in hives. Or something. + RenderState = Struct::new( "RenderState", :urls, :titles, :html_blocks, :log ) + + # Tab width for #detab! if none is specified + TabWidth = 4 + + # The tag-closing string -- set to '>' for HTML + EmptyElementSuffix = "/>"; + + # Table of MD5 sums for escaped characters + EscapeTable = {} + '\\`*_{}[]()#.!'.split(//).each {|char| + hash = Digest::MD5::hexdigest( char ) + + EscapeTable[ char ] = { + :md5 => hash, + :md5re => Regexp::new( hash ), + :re => Regexp::new( '\\\\' + Regexp::escape(char) ), + } + } + + + ################################################################# + ### I N S T A N C E M E T H O D S + ################################################################# + + ### Create a new BlueCloth string. + def initialize( content="", *restrictions ) + @log = Logger::new( $deferr ) + @log.level = $DEBUG ? + Logger::DEBUG : + ($VERBOSE ? Logger::INFO : Logger::WARN) + @scanner = nil + + # Add any restrictions, and set the line-folding attribute to reflect + # what happens by default. + @filter_html = nil + @filter_styles = nil + restrictions.flatten.each {|r| __send__("#{r}=", true) } + @fold_lines = true + + super( content ) + + @log.debug "String is: %p" % self + end + + + ###### + public + ###### + + # Filters for controlling what gets output for untrusted input. (But really, + # you're filtering bad stuff out of untrusted input at submission-time via + # untainting, aren't you?) + attr_accessor :filter_html, :filter_styles + + # RedCloth-compatibility accessor. Line-folding is part of Markdown syntax, + # so this isn't used by anything. + attr_accessor :fold_lines + + + ### Render Markdown-formatted text in this string object as HTML and return + ### it. The parameter is for compatibility with RedCloth, and is currently + ### unused, though that may change in the future. + def to_html( lite=false ) + + # Create a StringScanner we can reuse for various lexing tasks + @scanner = StringScanner::new( '' ) + + # Make a structure to carry around stuff that gets placeholdered out of + # the source. + rs = RenderState::new( {}, {}, {} ) + + # Make a copy of the string with normalized line endings, tabs turned to + # spaces, and a couple of guaranteed newlines at the end + text = self.gsub( /\r\n?/, "\n" ).detab + text += "\n\n" + @log.debug "Normalized line-endings: %p" % text + + # Filter HTML if we're asked to do so + if self.filter_html + text.gsub!( "<", "<" ) + text.gsub!( ">", ">" ) + @log.debug "Filtered HTML: %p" % text + end + + # Simplify blank lines + text.gsub!( /^ +$/, '' ) + @log.debug "Tabs -> spaces/blank lines stripped: %p" % text + + # Replace HTML blocks with placeholders + text = hide_html_blocks( text, rs ) + @log.debug "Hid HTML blocks: %p" % text + @log.debug "Render state: %p" % rs + + # Strip link definitions, store in render state + text = strip_link_definitions( text, rs ) + @log.debug "Stripped link definitions: %p" % text + @log.debug "Render state: %p" % rs + + # Escape meta-characters + text = escape_special_chars( text ) + @log.debug "Escaped special characters: %p" % text + + # Transform block-level constructs + text = apply_block_transforms( text, rs ) + @log.debug "After block-level transforms: %p" % text + + # Now swap back in all the escaped characters + text = unescape_special_chars( text ) + @log.debug "After unescaping special characters: %p" % text + + return text + end + + + ### Convert tabs in +str+ to spaces. + def detab( tabwidth=TabWidth ) + copy = self.dup + copy.detab!( tabwidth ) + return copy + end + + + ### Convert tabs to spaces in place and return self if any were converted. + def detab!( tabwidth=TabWidth ) + newstr = self.split( /\n/ ).collect {|line| + line.gsub( /(.*?)\t/ ) do + $1 + ' ' * (tabwidth - $1.length % tabwidth) + end + }.join("\n") + self.replace( newstr ) + end + + + ####### + #private + ####### + + ### Do block-level transforms on a copy of +str+ using the specified render + ### state +rs+ and return the results. + def apply_block_transforms( str, rs ) + # Port: This was called '_runBlockGamut' in the original + + @log.debug "Applying block transforms to:\n %p" % str + text = transform_headers( str, rs ) + text = transform_hrules( text, rs ) + text = transform_lists( text, rs ) + text = transform_code_blocks( text, rs ) + text = transform_block_quotes( text, rs ) + text = transform_auto_links( text, rs ) + text = hide_html_blocks( text, rs ) + + text = form_paragraphs( text, rs ) + + @log.debug "Done with block transforms:\n %p" % text + return text + end + + + ### Apply Markdown span transforms to a copy of the specified +str+ with the + ### given render state +rs+ and return it. + def apply_span_transforms( str, rs ) + @log.debug "Applying span transforms to:\n %p" % str + + str = transform_code_spans( str, rs ) + str = encode_html( str ) + str = transform_images( str, rs ) + str = transform_anchors( str, rs ) + str = transform_italic_and_bold( str, rs ) + + # Hard breaks + str.gsub!( / {2,}\n/, "<br#{EmptyElementSuffix}\n" ) + + @log.debug "Done with span transforms:\n %p" % str + return str + end + + + # The list of tags which are considered block-level constructs and an + # alternation pattern suitable for use in regexps made from the list + StrictBlockTags = %w[ p div h[1-6] blockquote pre table dl ol ul script noscript + form fieldset iframe math ins del ] + StrictTagPattern = StrictBlockTags.join('|') + + LooseBlockTags = StrictBlockTags - %w[ins del] + LooseTagPattern = LooseBlockTags.join('|') + + # Nested blocks: + # <div> + # <div> + # tags for inner block must be indented. + # </div> + # </div> + StrictBlockRegex = %r{ + ^ # Start of line + <(#{StrictTagPattern}) # Start tag: \2 + \b # word break + (.*\n)*? # Any number of lines, minimal match + </\1> # Matching end tag + [ ]* # trailing spaces + $ # End of line or document + }ix + + # More-liberal block-matching + LooseBlockRegex = %r{ + ^ # Start of line + <(#{LooseTagPattern}) # start tag: \2 + \b # word break + (.*\n)*? # Any number of lines, minimal match + .*</\1> # Anything + Matching end tag + [ ]* # trailing spaces + $ # End of line or document + }ix + + # Special case for <hr />. + HruleBlockRegex = %r{ + ( # $1 + \A\n? # Start of doc + optional \n + | # or + .*\n\n # anything + blank line + ) + ( # save in $2 + [ ]* # Any spaces + <hr # Tag open + \b # Word break + ([^<>])*? # Attributes + /?> # Tag close + $ # followed by a blank line or end of document + ) + }ix + + ### Replace all blocks of HTML in +str+ that start in the left margin with + ### tokens. + def hide_html_blocks( str, rs ) + @log.debug "Hiding HTML blocks in %p" % str + + # Tokenizer proc to pass to gsub + tokenize = lambda {|match| + key = Digest::MD5::hexdigest( match ) + rs.html_blocks[ key ] = match + @log.debug "Replacing %p with %p" % [ match, key ] + "\n\n#{key}\n\n" + } + + rval = str.dup + + @log.debug "Finding blocks with the strict regex..." + rval.gsub!( StrictBlockRegex, &tokenize ) + + @log.debug "Finding blocks with the loose regex..." + rval.gsub!( LooseBlockRegex, &tokenize ) + + @log.debug "Finding hrules..." + rval.gsub!( HruleBlockRegex ) {|match| $1 + tokenize[$2] } + + return rval + end + + + # Link defs are in the form: ^[id]: url "optional title" + LinkRegex = %r{ + ^[ ]*\[(.+)\]: # id = $1 + [ ]* + \n? # maybe *one* newline + [ ]* + <?(\S+?)>? # url = $2 + [ ]* + \n? # maybe one newline + [ ]* + (?: + # Titles are delimited by "quotes" or (parens). + ["(] + (.+?) # title = $3 + [")] # Matching ) or " + [ ]* + )? # title is optional + (?:\n+|\Z) + }x + + ### Strip link definitions from +str+, storing them in the given RenderState + ### +rs+. + def strip_link_definitions( str, rs ) + str.gsub( LinkRegex ) {|match| + id, url, title = $1, $2, $3 + + rs.urls[ id.downcase ] = encode_html( url ) + unless title.nil? + rs.titles[ id.downcase ] = title.gsub( /"/, """ ) + end + "" + } + end + + + ### Escape special characters in the given +str+ + def escape_special_chars( str ) + @log.debug " Escaping special characters" + text = '' + + # The original Markdown source has something called '$tags_to_skip' + # declared here, but it's never used, so I don't define it. + + tokenize_html( str ) {|token, str| + @log.debug " Adding %p token %p" % [ token, str ] + case token + + # Within tags, encode * and _ + when :tag + text += str. + gsub( /\*/, EscapeTable['*'][:md5] ). + gsub( /_/, EscapeTable['_'][:md5] ) + + # Encode backslashed stuff in regular text + when :text + text += encode_backslash_escapes( str ) + else + raise TypeError, "Unknown token type %p" % token + end + } + + @log.debug " Text with escapes is now: %p" % text + return text + end + + + ### Swap escaped special characters in a copy of the given +str+ and return + ### it. + def unescape_special_chars( str ) + EscapeTable.each {|char, hash| + @log.debug "Unescaping escaped %p with %p" % [ char, hash[:md5re] ] + str.gsub!( hash[:md5re], char ) + } + + return str + end + + + ### Return a copy of the given +str+ with any backslashed special character + ### in it replaced with MD5 placeholders. + def encode_backslash_escapes( str ) + # Make a copy with any double-escaped backslashes encoded + text = str.gsub( /\\\\/, EscapeTable['\\'][:md5] ) + + EscapeTable.each_pair {|char, esc| + next if char == '\\' + text.gsub!( esc[:re], esc[:md5] ) + } + + return text + end + + + ### Transform any Markdown-style horizontal rules in a copy of the specified + ### +str+ and return it. + def transform_hrules( str, rs ) + @log.debug " Transforming horizontal rules" + str.gsub( /^( ?[\-\*_] ?){3,}$/, "\n<hr#{EmptyElementSuffix}\n" ) + end + + + + # Patterns to match and transform lists + ListMarkerOl = %r{\d+\.} + ListMarkerUl = %r{[*+-]} + ListMarkerAny = Regexp::union( ListMarkerOl, ListMarkerUl ) + + ListRegexp = %r{ + (?: + ^[ ]{0,#{TabWidth - 1}} # Indent < tab width + (#{ListMarkerAny}) # unordered or ordered ($1) + [ ]+ # At least one space + ) + (?m:.+?) # item content (include newlines) + (?: + \z # Either EOF + | # or + \n{2,} # Blank line... + (?=\S) # ...followed by non-space + (?![ ]* # ...but not another item + (#{ListMarkerAny}) + [ ]+) + ) + }x + + ### Transform Markdown-style lists in a copy of the specified +str+ and + ### return it. + def transform_lists( str, rs ) + @log.debug " Transforming lists at %p" % (str[0,100] + '...') + + str.gsub( ListRegexp ) {|list| + @log.debug " Found list %p" % list + bullet = $1 + list_type = (ListMarkerUl.match(bullet) ? "ul" : "ol") + list.gsub!( /\n{2,}/, "\n\n\n" ) + + %{<%s>\n%s</%s>\n} % [ + list_type, + transform_list_items( list, rs ), + list_type, + ] + } + end + + + # Pattern for transforming list items + ListItemRegexp = %r{ + (\n)? # leading line = $1 + (^[ ]*) # leading whitespace = $2 + (#{ListMarkerAny}) [ ]+ # list marker = $3 + ((?m:.+?) # list item text = $4 + (\n{1,2})) + (?= \n* (\z | \2 (#{ListMarkerAny}) [ ]+)) + }x + + ### Transform list items in a copy of the given +str+ and return it. + def transform_list_items( str, rs ) + @log.debug " Transforming list items" + + # Trim trailing blank lines + str = str.sub( /\n{2,}\z/, "\n" ) + + str.gsub( ListItemRegexp ) {|line| + @log.debug " Found item line %p" % line + leading_line, item = $1, $4 + + if leading_line or /\n{2,}/.match( item ) + @log.debug " Found leading line or item has a blank" + item = apply_block_transforms( outdent(item), rs ) + else + # Recursion for sub-lists + @log.debug " Recursing for sublist" + item = transform_lists( outdent(item), rs ).chomp + item = apply_span_transforms( item, rs ) + end + + %{<li>%s</li>\n} % item + } + end + + + # Pattern for matching codeblocks + CodeBlockRegexp = %r{ + (?:\n\n|\A) + ( # $1 = the code block + (?: + (?:[ ]{#{TabWidth}} | \t) # a tab or tab-width of spaces + .*\n+ + )+ + ) + (^[ ]{0,#{TabWidth - 1}}\S|\Z) # Lookahead for non-space at + # line-start, or end of doc + }x + + ### Transform Markdown-style codeblocks in a copy of the specified +str+ and + ### return it. + def transform_code_blocks( str, rs ) + @log.debug " Transforming code blocks" + + str.gsub( CodeBlockRegexp ) {|block| + codeblock = $1 + remainder = $2 + + # Generate the codeblock + %{\n\n<pre><code>%s\n</code></pre>\n\n%s} % + [ encode_code( outdent(codeblock), rs ).rstrip, remainder ] + } + end + + + # Pattern for matching Markdown blockquote blocks + BlockQuoteRegexp = %r{ + (?: + ^[ ]*>[ ]? # '>' at the start of a line + .+\n # rest of the first line + (?:.+\n)* # subsequent consecutive lines + \n* # blanks + )+ + }x + PreChunk = %r{ ( ^ \s* <pre> .+? </pre> ) }xm + + ### Transform Markdown-style blockquotes in a copy of the specified +str+ + ### and return it. + def transform_block_quotes( str, rs ) + @log.debug " Transforming block quotes" + + str.gsub( BlockQuoteRegexp ) {|quote| + @log.debug "Making blockquote from %p" % quote + + quote.gsub!( /^ *> ?/, '' ) # Trim one level of quoting + quote.gsub!( /^ +$/, '' ) # Trim whitespace-only lines + + indent = " " * TabWidth + quoted = %{<blockquote>\n%s\n</blockquote>\n\n} % + apply_block_transforms( quote, rs ). + gsub( /^/, indent ). + gsub( PreChunk ) {|m| m.gsub(/^#{indent}/o, '') } + @log.debug "Blockquoted chunk is: %p" % quoted + quoted + } + end + + + AutoAnchorURLRegexp = /<((https?|ftp):[^'">\s]+)>/ + AutoAnchorEmailRegexp = %r{ + < + ( + [-.\w]+ + \@ + [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+ + ) + > + }xi + + ### Transform URLs in a copy of the specified +str+ into links and return + ### it. + def transform_auto_links( str, rs ) + @log.debug " Transforming auto-links" + str.gsub( AutoAnchorURLRegexp, %{<a href="\\1">\\1</a>}). + gsub( AutoAnchorEmailRegexp ) {|addr| + encode_email_address( unescape_special_chars($1) ) + } + end + + + # Encoder functions to turn characters of an email address into encoded + # entities. + Encoders = [ + lambda {|char| "&#%03d;" % char}, + lambda {|char| "&#x%X;" % char}, + lambda {|char| char.chr }, + ] + + ### Transform a copy of the given email +addr+ into an escaped version safer + ### for posting publicly. + def encode_email_address( addr ) + + rval = '' + ("mailto:" + addr).each_byte {|b| + case b + when ?: + rval += ":" + when ?@ + rval += Encoders[ rand(2) ][ b ] + else + r = rand(100) + rval += ( + r > 90 ? Encoders[2][ b ] : + r < 45 ? Encoders[1][ b ] : + Encoders[0][ b ] + ) + end + } + + return %{<a href="%s">%s</a>} % [ rval, rval.sub(/.+?:/, '') ] + end + + + # Regex for matching Setext-style headers + SetextHeaderRegexp = %r{ + (.+) # The title text ($1) + \n + ([\-=])+ # Match a line of = or -. Save only one in $2. + [ ]*\n+ + }x + + # Regexp for matching ATX-style headers + AtxHeaderRegexp = %r{ + ^(\#{1,6}) # $1 = string of #'s + [ ]* + (.+?) # $2 = Header text + [ ]* + \#* # optional closing #'s (not counted) + \n+ + }x + + ### Apply Markdown header transforms to a copy of the given +str+ amd render + ### state +rs+ and return the result. + def transform_headers( str, rs ) + @log.debug " Transforming headers" + + # Setext-style headers: + # Header 1 + # ======== + # + # Header 2 + # -------- + # + str. + gsub( SetextHeaderRegexp ) {|m| + @log.debug "Found setext-style header" + title, hdrchar = $1, $2 + title = apply_span_transforms( title, rs ) + + case hdrchar + when '=' + %[<h1>#{title}</h1>\n\n] + when '-' + %[<h2>#{title}</h2>\n\n] + else + title + end + }. + + gsub( AtxHeaderRegexp ) {|m| + @log.debug "Found ATX-style header" + hdrchars, title = $1, $2 + title = apply_span_transforms( title, rs ) + + level = hdrchars.length + %{<h%d>%s</h%d>\n\n} % [ level, title, level ] + } + end + + + ### Wrap all remaining paragraph-looking text in a copy of +str+ inside <p> + ### tags and return it. + def form_paragraphs( str, rs ) + @log.debug " Forming paragraphs" + grafs = str. + sub( /\A\n+/, '' ). + sub( /\n+\z/, '' ). + split( /\n{2,}/ ) + + rval = grafs.collect {|graf| + + # Unhashify HTML blocks if this is a placeholder + if rs.html_blocks.key?( graf ) + rs.html_blocks[ graf ] + + # Otherwise, wrap in <p> tags + else + apply_span_transforms(graf, rs). + sub( /^[ ]*/, '<p>' ) + '</p>' + end + }.join( "\n\n" ) + + @log.debug " Formed paragraphs: %p" % rval + return rval + end + + + # Pattern to match the linkid part of an anchor tag for reference-style + # links. + RefLinkIdRegex = %r{ + [ ]? # Optional leading space + (?:\n[ ]*)? # Optional newline + spaces + \[ + (.*?) # Id = $1 + \] + }x + + InlineLinkRegex = %r{ + \( # Literal paren + [ ]* # Zero or more spaces + <?(.+?)>? # URI = $1 + [ ]* # Zero or more spaces + (?: # + ([\"\']) # Opening quote char = $2 + (.*?) # Title = $3 + \2 # Matching quote char + )? # Title is optional + \) + }x + + ### Apply Markdown anchor transforms to a copy of the specified +str+ with + ### the given render state +rs+ and return it. + def transform_anchors( str, rs ) + @log.debug " Transforming anchors" + @scanner.string = str.dup + text = '' + + # Scan the whole string + until @scanner.empty? + + if @scanner.scan( /\[/ ) + link = ''; linkid = '' + depth = 1 + startpos = @scanner.pos + @log.debug " Found a bracket-open at %d" % startpos + + # Scan the rest of the tag, allowing unlimited nested []s. If + # the scanner runs out of text before the opening bracket is + # closed, append the text and return (wasn't a valid anchor). + while depth.nonzero? + linktext = @scanner.scan_until( /\]|\[/ ) + + if linktext + @log.debug " Found a bracket at depth %d: %p" % [ depth, linktext ] + link += linktext + + # Decrement depth for each closing bracket + depth += ( linktext[-1, 1] == ']' ? -1 : 1 ) + @log.debug " Depth is now #{depth}" + + # If there's no more brackets, it must not be an anchor, so + # just abort. + else + @log.debug " Missing closing brace, assuming non-link." + link += @scanner.rest + @scanner.terminate + return text + '[' + link + end + end + link.slice!( -1 ) # Trim final ']' + @log.debug " Found leading link %p" % link + + # Look for a reference-style second part + if @scanner.scan( RefLinkIdRegex ) + linkid = @scanner[1] + linkid = link.dup if linkid.empty? + linkid.downcase! + @log.debug " Found a linkid: %p" % linkid + + # If there's a matching link in the link table, build an + # anchor tag for it. + if rs.urls.key?( linkid ) + @log.debug " Found link key in the link table: %p" % rs.urls[linkid] + url = escape_md( rs.urls[linkid] ) + + text += %{<a href="#{url}"} + if rs.titles.key?(linkid) + text += %{ title="%s"} % escape_md( rs.titles[linkid] ) + end + text += %{>#{link}</a>} + + # If the link referred to doesn't exist, just append the raw + # source to the result + else + @log.debug " Linkid %p not found in link table" % linkid + @log.debug " Appending original string instead: " + @log.debug "%p" % @scanner.string[ startpos-1 .. @scanner.pos-1 ] + text += @scanner.string[ startpos-1 .. @scanner.pos-1 ] + end + + # ...or for an inline style second part + elsif @scanner.scan( InlineLinkRegex ) + url = @scanner[1] + title = @scanner[3] + @log.debug " Found an inline link to %p" % url + + text += %{<a href="%s"} % escape_md( url ) + if title + title.gsub!( /"/, """ ) + text += %{ title="%s"} % escape_md( title ) + end + text += %{>#{link}</a>} + + # No linkid part: just append the first part as-is. + else + @log.debug "No linkid, so no anchor. Appending literal text." + text += @scanner.string[ startpos-1 .. @scanner.pos-1 ] + end # if linkid + + # Plain text + else + @log.debug " Scanning to the next link from %p" % @scanner.rest + text += @scanner.scan( /[^\[]+/ ) + end + + end # until @scanner.empty? + + return text + end + + + # Pattern to match strong emphasis in Markdown text + BoldRegexp = %r{ (\*\*|__) (\S|\S.+?\S) \1 }x + + # Pattern to match normal emphasis in Markdown text + ItalicRegexp = %r{ (\*|_) (\S|\S.+?\S) \1 }x + + ### Transform italic- and bold-encoded text in a copy of the specified +str+ + ### and return it. + def transform_italic_and_bold( str, rs ) + @log.debug " Transforming italic and bold" + + str. + gsub( BoldRegexp, %{<strong>\\2</strong>} ). + gsub( ItalicRegexp, %{<em>\\2</em>} ) + end + + + ### Transform backticked spans into <code> spans. + def transform_code_spans( str, rs ) + @log.debug " Transforming code spans" + + # Set up the string scanner and just return the string unless there's at + # least one backtick. + @scanner.string = str.dup + unless @scanner.exist?( /`/ ) + @scanner.terminate + @log.debug "No backticks found for code span in %p" % str + return str + end + + @log.debug "Transforming code spans in %p" % str + + # Build the transformed text anew + text = '' + + # Scan to the end of the string + until @scanner.empty? + + # Scan up to an opening backtick + if pre = @scanner.scan_until( /.?(?=`)/m ) + text += pre + @log.debug "Found backtick at %d after '...%s'" % [ @scanner.pos, text[-10, 10] ] + + # Make a pattern to find the end of the span + opener = @scanner.scan( /`+/ ) + len = opener.length + closer = Regexp::new( opener ) + @log.debug "Scanning for end of code span with %p" % closer + + # Scan until the end of the closing backtick sequence. Chop the + # backticks off the resultant string, strip leading and trailing + # whitespace, and encode any enitites contained in it. + codespan = @scanner.scan_until( closer ) or + raise FormatError::new( @scanner.rest[0,20], + "No %p found before end" % opener ) + + @log.debug "Found close of code span at %d: %p" % [ @scanner.pos - len, codespan ] + codespan.slice!( -len, len ) + text += "<code>%s</code>" % + encode_code( codespan.strip, rs ) + + # If there's no more backticks, just append the rest of the string + # and move the scan pointer to the end + else + text += @scanner.rest + @scanner.terminate + end + end + + return text + end + + + # Next, handle inline images: ![alt text](url "optional title") + # Don't forget: encode * and _ + InlineImageRegexp = %r{ + ( # Whole match = $1 + !\[ (.*?) \] # alt text = $2 + \([ ]* + <?(\S+?)>? # source url = $3 + [ ]* + (?: # + (["']) # quote char = $4 + (.*?) # title = $5 + \4 # matching quote + [ ]* + )? # title is optional + \) + ) + }xs #" + + + # Reference-style images + ReferenceImageRegexp = %r{ + ( # Whole match = $1 + !\[ (.*?) \] # Alt text = $2 + [ ]? # Optional space + (?:\n[ ]*)? # One optional newline + spaces + \[ (.*?) \] # id = $3 + ) + }xs + + ### Turn image markup into image tags. + def transform_images( str, rs ) + @log.debug " Transforming images" % str + + # Handle reference-style labeled images: ![alt text][id] + str. + gsub( ReferenceImageRegexp ) {|match| + whole, alt, linkid = $1, $2, $3.downcase + @log.debug "Matched %p" % match + res = nil + alt.gsub!( /"/, '"' ) + + # for shortcut links like ![this][]. + linkid = alt.downcase if linkid.empty? + + if rs.urls.key?( linkid ) + url = escape_md( rs.urls[linkid] ) + @log.debug "Found url '%s' for linkid '%s' " % [ url, linkid ] + + # Build the tag + result = %{<img src="%s" alt="%s"} % [ url, alt ] + if rs.titles.key?( linkid ) + result += %{ title="%s"} % escape_md( rs.titles[linkid] ) + end + result += EmptyElementSuffix + + else + result = whole + end + + @log.debug "Replacing %p with %p" % [ match, result ] + result + }. + + # Inline image style + gsub( InlineImageRegexp ) {|match| + @log.debug "Found inline image %p" % match + whole, alt, title = $1, $2, $5 + url = escape_md( $3 ) + alt.gsub!( /"/, '"' ) + + # Build the tag + result = %{<img src="%s" alt="%s"} % [ url, alt ] + unless title.nil? + title.gsub!( /"/, '"' ) + result += %{ title="%s"} % escape_md( title ) + end + result += EmptyElementSuffix + + @log.debug "Replacing %p with %p" % [ match, result ] + result + } + end + + + # Regexp to match special characters in a code block + CodeEscapeRegexp = %r{( \* | _ | \{ | \} | \[ | \] | \\ )}x + + ### Escape any characters special to HTML and encode any characters special + ### to Markdown in a copy of the given +str+ and return it. + def encode_code( str, rs ) + str.gsub( %r{&}, '&' ). + gsub( %r{<}, '<' ). + gsub( %r{>}, '>' ). + gsub( CodeEscapeRegexp ) {|match| EscapeTable[match][:md5]} + end + + + + ################################################################# + ### U T I L I T Y F U N C T I O N S + ################################################################# + + ### Escape any markdown characters in a copy of the given +str+ and return + ### it. + def escape_md( str ) + str. + gsub( /\*/, EscapeTable['*'][:md5] ). + gsub( /_/, EscapeTable['_'][:md5] ) + end + + + # Matching constructs for tokenizing X/HTML + HTMLCommentRegexp = %r{ <! ( -- .*? -- \s* )+ > }mx + XMLProcInstRegexp = %r{ <\? .*? \?> }mx + MetaTag = Regexp::union( HTMLCommentRegexp, XMLProcInstRegexp ) + + HTMLTagOpenRegexp = %r{ < [a-z/!$] [^<>]* }imx + HTMLTagCloseRegexp = %r{ > }x + HTMLTagPart = Regexp::union( HTMLTagOpenRegexp, HTMLTagCloseRegexp ) + + ### Break the HTML source in +str+ into a series of tokens and return + ### them. The tokens are just 2-element Array tuples with a type and the + ### actual content. If this function is called with a block, the type and + ### text parts of each token will be yielded to it one at a time as they are + ### extracted. + def tokenize_html( str ) + depth = 0 + tokens = [] + @scanner.string = str.dup + type, token = nil, nil + + until @scanner.empty? + @log.debug "Scanning from %p" % @scanner.rest + + # Match comments and PIs without nesting + if (( token = @scanner.scan(MetaTag) )) + type = :tag + + # Do nested matching for HTML tags + elsif (( token = @scanner.scan(HTMLTagOpenRegexp) )) + tagstart = @scanner.pos + @log.debug " Found the start of a plain tag at %d" % tagstart + + # Start the token with the opening angle + depth = 1 + type = :tag + + # Scan the rest of the tag, allowing unlimited nested <>s. If + # the scanner runs out of text before the tag is closed, raise + # an error. + while depth.nonzero? + + # Scan either an opener or a closer + chunk = @scanner.scan( HTMLTagPart ) or + raise "Malformed tag at character %d: %p" % + [ tagstart, token + @scanner.rest ] + + @log.debug " Found another part of the tag at depth %d: %p" % [ depth, chunk ] + + token += chunk + + # If the last character of the token so far is a closing + # angle bracket, decrement the depth. Otherwise increment + # it for a nested tag. + depth += ( token[-1, 1] == '>' ? -1 : 1 ) + @log.debug " Depth is now #{depth}" + end + + # Match text segments + else + @log.debug " Looking for a chunk of text" + type = :text + + # Scan forward, always matching at least one character to move + # the pointer beyond any non-tag '<'. + token = @scanner.scan_until( /[^<]+/m ) + end + + @log.debug " type: %p, token: %p" % [ type, token ] + + # If a block is given, feed it one token at a time. Add the token to + # the token list to be returned regardless. + if block_given? + yield( type, token ) + end + tokens << [ type, token ] + end + + return tokens + end + + + ### Return a copy of +str+ with angle brackets and ampersands HTML-encoded. + def encode_html( str ) + str.gsub( /&(?!#?[x]?(?:[0-9a-f]+|\w+);)/i, "&" ). + gsub( %r{<(?![a-z/?\$!])}i, "<" ) + end + + + ### Return one level of line-leading tabs or spaces from a copy of +str+ and + ### return it. + def outdent( str ) + str.gsub( /^(\t|[ ]{1,#{TabWidth}})/, '') + end + +end # class BlueCloth + diff --git a/vendor/htemplate.rb b/vendor/htemplate.rb new file mode 100644 index 0000000..5a429a1 --- /dev/null +++ b/vendor/htemplate.rb @@ -0,0 +1,68 @@ +require 'strscan' + +class HTemplate + def initialize(source, filename=nil) + @source = source + @filename = filename + compile + end + + if defined? Rack::Utils + def escape_html(s) + Rack::Utils.escape_html s + end + else + require 'cgi' + def escape_html(s) + CGI.escapeHTML s + end + end + + def expand(data, output="", escape=nil, &block) + escape ||= lambda { |v| + escape_html v.to_s + } + @code.call(output, data, block, escape) + output + end + + def compile + code = "lambda { |output, data, block, __escape| data.instance_eval {\n" + + scanner = StringScanner.new(@source) + + until scanner.eos? + if scanner.bol? and scanner.scan(/\s*\$ (.*\n)/) + # raw line of code, $ if bla + code << scanner[1] + elsif scanner.scan(/\$(:?)(\{(.*?)\}|(@?[\w.!?]+))/) + # expression, ${foo} or $foo.bar + expr = scanner[3] || scanner[4] + + if scanner[1] == ":" # disable escaping? + code << %Q{output << (#{expr}).to_s;} + else + code << %Q{output << __escape[#{expr}];} + end + elsif scanner.scan(/\$\$/) # plain $ + code << %Q{output << '$';} + elsif scanner.scan(/\$#.*?(?:#\$|$\n?)/) # comment $#...#$ or $#... + # nothing + elsif scanner.scan(/([^\n$]+\n?)|([^\n$]*\n)/) # text + if scanner.matched =~ /\\$/ && scanner.bol? + code << %Q{output << #{scanner.matched.chop.chop.dump};} + else + code << %Q{output << #{scanner.matched.dump};} + end + + code << "\n" if scanner.bol? + else + raise "can't parse template: #{scanner.rest[0..20].dump}" + end + end + + code << "}}" + + @code = eval(code, nil, @filename || '(template)', 0) + end +end diff --git a/vendor/rubypants.rb b/vendor/rubypants.rb new file mode 100644 index 0000000..6c21324 --- /dev/null +++ b/vendor/rubypants.rb @@ -0,0 +1,490 @@ +# +# = RubyPants -- SmartyPants ported to Ruby +# +# Ported by Christian Neukirchen <mailto:chneukirchen@gmail.com> +# Copyright (C) 2004, 2006 Christian Neukirchen +# +# Incooporates ideas, comments and documentation by Chad Miller +# Copyright (C) 2004 Chad Miller +# +# Original SmartyPants by John Gruber +# Copyright (C) 2003 John Gruber +# + +# +# = RubyPants -- SmartyPants ported to Ruby +# +# == Synopsis +# +# RubyPants is a Ruby port of the smart-quotes library SmartyPants. +# +# The original "SmartyPants" is a free web publishing plug-in for +# Movable Type, Blosxom, and BBEdit that easily translates plain ASCII +# punctuation characters into "smart" typographic punctuation HTML +# entities. +# +# +# == Description +# +# RubyPants can perform the following transformations: +# +# * Straight quotes (<tt>"</tt> and <tt>'</tt>) into "curly" quote +# HTML entities +# * Backticks-style quotes (<tt>``like this''</tt>) into "curly" quote +# HTML entities +# * Dashes (<tt>--</tt> and <tt>---</tt>) into en- and em-dash +# entities +# * Three consecutive dots (<tt>...</tt> or <tt>. . .</tt>) into an +# ellipsis entity +# +# This means you can write, edit, and save your posts using plain old +# ASCII straight quotes, plain dashes, and plain dots, but your +# published posts (and final HTML output) will appear with smart +# quotes, em-dashes, and proper ellipses. +# +# RubyPants does not modify characters within <tt><pre></tt>, +# <tt><code></tt>, <tt><kbd></tt>, <tt><math></tt> or +# <tt><script></tt> tag blocks. Typically, these tags are used to +# display text where smart quotes and other "smart punctuation" would +# not be appropriate, such as source code or example markup. +# +# +# == Backslash Escapes +# +# If you need to use literal straight quotes (or plain hyphens and +# periods), RubyPants accepts the following backslash escape sequences +# to force non-smart punctuation. It does so by transforming the +# escape sequence into a decimal-encoded HTML entity: +# +# \\ \" \' \. \- \` +# +# This is useful, for example, when you want to use straight quotes as +# foot and inch marks: 6'2" tall; a 17" iMac. (Use <tt>6\'2\"</tt> +# resp. <tt>17\"</tt>.) +# +# +# == Algorithmic Shortcomings +# +# One situation in which quotes will get curled the wrong way is when +# apostrophes are used at the start of leading contractions. For +# example: +# +# 'Twas the night before Christmas. +# +# In the case above, RubyPants will turn the apostrophe into an +# opening single-quote, when in fact it should be a closing one. I +# don't think this problem can be solved in the general case--every +# word processor I've tried gets this wrong as well. In such cases, +# it's best to use the proper HTML entity for closing single-quotes +# ("<tt>’</tt>") by hand. +# +# +# == Bugs +# +# To file bug reports or feature requests (except see above) please +# send email to: mailto:chneukirchen@gmail.com +# +# If the bug involves quotes being curled the wrong way, please send +# example text to illustrate. +# +# +# == Authors +# +# John Gruber did all of the hard work of writing this software in +# Perl for Movable Type and almost all of this useful documentation. +# Chad Miller ported it to Python to use with Pyblosxom. +# +# Christian Neukirchen provided the Ruby port, as a general-purpose +# library that follows the *Cloth API. +# +# +# == Copyright and License +# +# === SmartyPants license: +# +# Copyright (c) 2003 John Gruber +# (http://daringfireball.net) +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# * Neither the name "SmartyPants" nor the names of its contributors +# may be used to endorse or promote products derived from this +# software without specific prior written permission. +# +# This software is provided by the copyright holders and contributors +# "as is" and any express or implied warranties, including, but not +# limited to, the implied warranties of merchantability and fitness +# for a particular purpose are disclaimed. In no event shall the +# copyright owner or contributors be liable for any direct, indirect, +# incidental, special, exemplary, or consequential damages (including, +# but not limited to, procurement of substitute goods or services; +# loss of use, data, or profits; or business interruption) however +# caused and on any theory of liability, whether in contract, strict +# liability, or tort (including negligence or otherwise) arising in +# any way out of the use of this software, even if advised of the +# possibility of such damage. +# +# === RubyPants license +# +# RubyPants is a derivative work of SmartyPants and smartypants.py. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# This software is provided by the copyright holders and contributors +# "as is" and any express or implied warranties, including, but not +# limited to, the implied warranties of merchantability and fitness +# for a particular purpose are disclaimed. In no event shall the +# copyright owner or contributors be liable for any direct, indirect, +# incidental, special, exemplary, or consequential damages (including, +# but not limited to, procurement of substitute goods or services; +# loss of use, data, or profits; or business interruption) however +# caused and on any theory of liability, whether in contract, strict +# liability, or tort (including negligence or otherwise) arising in +# any way out of the use of this software, even if advised of the +# possibility of such damage. +# +# +# == Links +# +# John Gruber:: http://daringfireball.net +# SmartyPants:: http://daringfireball.net/projects/smartypants +# +# Chad Miller:: http://web.chad.org +# +# Christian Neukirchen:: http://chneukirchen.org +# + + +class RubyPants < String + VERSION = "0.2" + + # Create a new RubyPants instance with the text in +string+. + # + # Allowed elements in the options array: + # + # 0 :: do nothing + # 1 :: enable all, using only em-dash shortcuts + # 2 :: enable all, using old school en- and em-dash shortcuts (*default*) + # 3 :: enable all, using inverted old school en and em-dash shortcuts + # -1 :: stupefy (translate HTML entities to their ASCII-counterparts) + # + # If you don't like any of these defaults, you can pass symbols to change + # RubyPants' behavior: + # + # <tt>:quotes</tt> :: quotes + # <tt>:backticks</tt> :: backtick quotes (``double'' only) + # <tt>:allbackticks</tt> :: backtick quotes (``double'' and `single') + # <tt>:dashes</tt> :: dashes + # <tt>:oldschool</tt> :: old school dashes + # <tt>:inverted</tt> :: inverted old school dashes + # <tt>:ellipses</tt> :: ellipses + # <tt>:convertquotes</tt> :: convert <tt>"</tt> entities to + # <tt>"</tt> for Dreamweaver users + # <tt>:stupefy</tt> :: translate RubyPants HTML entities + # to their ASCII counterparts. + # + def initialize(string, options=[2]) + super string + @options = options.respond_to?(:to_ary) ? options.to_ary : [options] + end + + # Apply SmartyPants transformations. + def to_html + do_quotes = do_backticks = do_dashes = do_ellipses = do_stupify = nil + convert_quotes = false + + if @options.include? 0 + # Do nothing. + return self + elsif @options.include? 1 + # Do everything, turn all options on. + do_quotes = do_backticks = do_ellipses = true + do_dashes = :normal + elsif @options.include? 2 + # Do everything, turn all options on, use old school dash shorthand. + do_quotes = do_backticks = do_ellipses = true + do_dashes = :oldschool + elsif @options.include? 3 + # Do everything, turn all options on, use inverted old school + # dash shorthand. + do_quotes = do_backticks = do_ellipses = true + do_dashes = :inverted + elsif @options.include?(-1) + do_stupefy = true + else + do_quotes = @options.include? :quotes + do_backticks = @options.include? :backticks + do_backticks = :both if @options.include? :allbackticks + do_dashes = :normal if @options.include? :dashes + do_dashes = :oldschool if @options.include? :oldschool + do_dashes = :inverted if @options.include? :inverted + do_ellipses = @options.include? :ellipses + convert_quotes = @options.include? :convertquotes + do_stupefy = @options.include? :stupefy + end + + # Parse the HTML + tokens = tokenize + + # Keep track of when we're inside <pre> or <code> tags. + in_pre = false + + # Here is the result stored in. + result = "" + + # This is a cheat, used to get some context for one-character + # tokens that consist of just a quote char. What we do is remember + # the last character of the previous text token, to use as context + # to curl single- character quote tokens correctly. + prev_token_last_char = nil + + tokens.each { |token| + if token.first == :tag + result << token[1] + if token[1] =~ %r!<(/?)(?:pre|code|kbd|script|math)[\s>]! + in_pre = ($1 != "/") # Opening or closing tag? + end + else + t = token[1] + + # Remember last char of this token before processing. + last_char = t[-1].chr + + unless in_pre + t = process_escapes t + + t.gsub!(/"/, '"') if convert_quotes + + if do_dashes + t = educate_dashes t if do_dashes == :normal + t = educate_dashes_oldschool t if do_dashes == :oldschool + t = educate_dashes_inverted t if do_dashes == :inverted + end + + t = educate_ellipses t if do_ellipses + + # Note: backticks need to be processed before quotes. + if do_backticks + t = educate_backticks t + t = educate_single_backticks t if do_backticks == :both + end + + if do_quotes + if t == "'" + # Special case: single-character ' token + if prev_token_last_char =~ /\S/ + t = "’" + else + t = "‘" + end + elsif t == '"' + # Special case: single-character " token + if prev_token_last_char =~ /\S/ + t = "”" + else + t = "“" + end + else + # Normal case: + t = educate_quotes t + end + end + + t = stupefy_entities t if do_stupefy + end + + prev_token_last_char = last_char + result << t + end + } + + # Done + result + end + + protected + + # Return the string, with after processing the following backslash + # escape sequences. This is useful if you want to force a "dumb" quote + # or other character to appear. + # + # Escaped are: + # \\ \" \' \. \- \` + # + def process_escapes(str) + str.gsub('\\\\', '\'). + gsub('\"', '"'). + gsub("\\\'", '''). + gsub('\.', '.'). + gsub('\-', '-'). + gsub('\`', '`') + end + + # The string, with each instance of "<tt>--</tt>" translated to an + # em-dash HTML entity. + # + def educate_dashes(str) + str.gsub(/--/, '—') + end + + # The string, with each instance of "<tt>--</tt>" translated to an + # en-dash HTML entity, and each "<tt>---</tt>" translated to an + # em-dash HTML entity. + # + def educate_dashes_oldschool(str) + str.gsub(/---/, '—').gsub(/--/, '–') + end + + # Return the string, with each instance of "<tt>--</tt>" translated + # to an em-dash HTML entity, and each "<tt>---</tt>" translated to + # an en-dash HTML entity. Two reasons why: First, unlike the en- and + # em-dash syntax supported by +educate_dashes_oldschool+, it's + # compatible with existing entries written before SmartyPants 1.1, + # back when "<tt>--</tt>" was only used for em-dashes. Second, + # em-dashes are more common than en-dashes, and so it sort of makes + # sense that the shortcut should be shorter to type. (Thanks to + # Aaron Swartz for the idea.) + # + def educate_dashes_inverted(str) + str.gsub(/---/, '–').gsub(/--/, '—') + end + + # Return the string, with each instance of "<tt>...</tt>" translated + # to an ellipsis HTML entity. Also converts the case where there are + # spaces between the dots. + # + def educate_ellipses(str) + str.gsub('...', '…').gsub('. . .', '…') + end + + # Return the string, with "<tt>``backticks''</tt>"-style single quotes + # translated into HTML curly quote entities. + # + def educate_backticks(str) + str.gsub("``", '“').gsub("''", '”') + end + + # Return the string, with "<tt>`backticks'</tt>"-style single quotes + # translated into HTML curly quote entities. + # + def educate_single_backticks(str) + str.gsub("`", '‘').gsub("'", '’') + end + + # Return the string, with "educated" curly quote HTML entities. + # + def educate_quotes(str) + punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]' + + str = str.dup + + # Special case if the very first character is a quote followed by + # punctuation at a non-word-break. Close the quotes by brute + # force: + str.gsub!(/^'(?=#{punct_class}\B)/, '’') + str.gsub!(/^"(?=#{punct_class}\B)/, '”') + + # Special case for double sets of quotes, e.g.: + # <p>He said, "'Quoted' words in a larger quote."</p> + str.gsub!(/"'(?=\w)/, '“‘') + str.gsub!(/'"(?=\w)/, '‘“') + + # Special case for decade abbreviations (the '80s): + str.gsub!(/'(?=\d\ds)/, '’') + + close_class = %![^\ \t\r\n\\[\{\(\-]! + dec_dashes = '–|—' + + # Get most opening single quotes: + str.gsub!(/(\s| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)'(?=\w)/, + '\1‘') + # Single closing quotes: + str.gsub!(/(#{close_class})'/, '\1’') + str.gsub!(/'(\s|s\b)/, '’\1') + # Any remaining single quotes should be opening ones: + str.gsub!(/'/, '‘') + + # Get most opening double quotes: + str.gsub!(/(\s| |--|&[mn]dash;|#{dec_dashes}|ȁ[34];)"(?=\w)/, + '\1“') + # Double closing quotes: + str.gsub!(/(#{close_class})"/, '\1”') + str.gsub!(/"\s/, '”\1') + # Any remaining quotes should be opening ones: + str.gsub!(/"/, '“') + + str + end + + # Return the string, with each RubyPants HTML entity translated to + # its ASCII counterpart. + # + # Note: This is not reversible (but exactly the same as in SmartyPants) + # + def stupefy_entities(str) + str. + gsub(/–/, '-'). # en-dash + gsub(/—/, '--'). # em-dash + + gsub(/‘/, "'"). # open single quote + gsub(/’/, "'"). # close single quote + + gsub(/“/, '"'). # open double quote + gsub(/”/, '"'). # close double quote + + gsub(/…/, '...') # ellipsis + end + + # Return an array of the tokens comprising the string. Each token is + # either a tag (possibly with nested, tags contained therein, such + # as <tt><a href="<MTFoo>"></tt>, or a run of text between + # tags. Each element of the array is a two-element array; the first + # is either :tag or :text; the second is the actual value. + # + # Based on the <tt>_tokenize()</tt> subroutine from Brad Choate's + # MTRegex plugin. <http://www.bradchoate.com/past/mtregex.php> + # + # This is actually the easier variant using tag_soup, as used by + # Chad Miller in the Python port of SmartyPants. + # + def tokenize + tag_soup = /\G([^<]*)(<[^>]*>)/ + + tokens = [] + + prev_end = 0 + scan(tag_soup) { + tokens << [:text, $1] if $1 != "" + tokens << [:tag, $2] + + prev_end = $~.end(0) + } + + if prev_end < size + tokens << [:text, self[prev_end..-1]] + end + + tokens + end +end |