hikidoc2/hikidoc.rb
# Copyright (c) 2005, Kazuhiko <kazuhiko@fdiary.net>
# Copyright (c) 2007 Minero Aoki
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
# * Neither the name of the HikiDoc nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
require 'stringio'
require 'strscan'
require 'lineinput'
require 'uri'
begin
require 'syntax/convertors/html'
rescue LoadError
end
class HikiDoc
Revision = %q$Rev: 44 $
def HikiDoc.to_html(src, options = {})
new(HTMLOutput.new('>'), options).compile(src)
end
def HikiDoc.to_xhtml(src, options = {})
new(HTMLOutput.new(' />'), options).compile(src)
end
def initialize(output, options)
@output = output
@header_re = nil
@level = options[:level] || 1
@plugin_syntax = options[:plugin_syntax] || method(:valid_plugin_syntax?)
end
def compile(src)
@output.reset
escape_plugin_blocks(src) {|escaped|
compile_blocks escaped
@output.string
}
end
private
#
# Plugin
#
def valid_plugin_syntax?(code)
/['"]/ !~ code.gsub(/'(?:[^\\']+|\\.)*'|"(?:[^\\"]+|\\.)*"/m, '')
end
def escape_plugin_blocks(text)
s = StringScanner.new(text)
buf = ''
@plugin_blocks = []
while chunk = s.scan_until(/\\\{|\{\{/)
tail = chunk[-2, 2]
chunk[-2, 2] = ''
buf << chunk
if tail == '\\{'
buf << tail
else
# plugin
if block = extract_plugin_block(s)
@plugin_blocks.push block
buf << "\0#{@plugin_blocks.size - 1}\0"
else
buf << '{{'
end
end
end
buf << s.rest.gsub(/\\\{/, '{')
buf = yield(buf)
buf.gsub(/\0(\d+)\0/) {
@output.inline_plugin(plugin_block($1.to_i))
}
end
def restore_plugin_block(str)
str.gsub(/\0(\d+)\0/) {
'{{' + plugin_block($1.to_i) + '}}'
}
end
def plugin_block(id)
@plugin_blocks[id] or raise "must not happen: #{id.inspect}"
end
def extract_plugin_block(s)
pos = s.pos
buf = ''
while chunk = s.scan_until(/\}\}/)
buf << chunk
buf.chomp!('}}')
if @plugin_syntax.call(buf)
return buf
end
buf << '}}'
end
s.pos = pos
nil
end
#
# Block Level
#
def compile_blocks(src)
f = LineInput.new(StringIO.new(src))
while line = f.peek
case line
when COMMENT_RE
f.gets
when HEADER_RE
compile_header f.gets
when HRULE_RE
f.gets
compile_hrule
when LIST_RE
compile_list f
when DLIST_RE
compile_dlist f
when TABLE_RE
compile_table f
when BLOCKQUOTE_RE
compile_blockquote f
when INDENTED_PRE_RE
compile_indented_pre f
when BLOCK_PRE_OPEN_RE
compile_block_pre f
else
if /^$/ =~ line
f.gets
next
end
compile_paragraph f
end
end
end
COMMENT_RE = %r<\A//>
def skip_comments(f)
f.while_match(COMMENT_RE) do |line|
end
end
HEADER_RE = /\A!+/
def compile_header(line)
@header_re ||= /\A!{1,#{7 - @level}}/
level = @level + (line.slice!(@header_re).size - 1)
title = strip(line)
@output.headline level, compile_inline(title)
end
HRULE_RE = /\A----$/
def compile_hrule
@output.hrule
end
ULIST = '*'
OLIST = '#'
LIST_RE = /\A#{Regexp.union(ULIST, OLIST)}+/
def compile_list(f)
typestack = []
level = 0
@output.list_begin
f.while_match(LIST_RE) do |line|
list_type = (line[0,1] == ULIST ? 'ul' : 'ol')
new_level = line.slice(LIST_RE).size
item = strip(line.sub(LIST_RE, ''))
if new_level > level
(new_level - level).times do
typestack.push list_type
@output.list_open list_type
@output.listitem_open
end
@output.listitem compile_inline(item)
elsif new_level < level
(level - new_level).times do
@output.listitem_close
@output.list_close typestack.pop
end
@output.listitem_close
@output.listitem_open
@output.listitem compile_inline(item)
elsif list_type == typestack.last
@output.listitem_close
@output.listitem_open
@output.listitem compile_inline(item)
else
@output.listitem_close
@output.list_close typestack.pop
@output.list_open list_type
@output.listitem_open
@output.listitem compile_inline(item)
typestack.push list_type
end
level = new_level
skip_comments f
end
level.times do
@output.listitem_close
@output.list_close typestack.pop
end
@output.list_end
end
DLIST_RE = /\A:/
def compile_dlist(f)
@output.dlist_open
f.while_match(DLIST_RE) do |line|
dt, dd = split_dlitem(line.sub(DLIST_RE, ''))
@output.dlist_item compile_inline(dt), compile_inline(dd)
skip_comments f
end
@output.dlist_close
end
def split_dlitem(line)
re = /\A((?:[^\\:\[]+|\\.|#{BRACKET_LINK_RE}|\[)*):/o
if m = re.match(line)
return m[1], m.post_match
else
return line, ''
end
end
TABLE_RE = /\A\|\|/
def compile_table(f)
lines = []
f.while_match(TABLE_RE) do |line|
lines.push line
skip_comments f
end
@output.table_open
lines.each do |line|
@output.table_record_open
split_columns(line.sub(TABLE_RE, '')).each do |col|
mid = col.sub!(/\A!/, '') ? 'table_head' : 'table_data'
span = col.slice!(/\A[\^>]*/)
rs = span_count(span, '^')
cs = span_count(span, '>')
@output.__send__(mid, compile_inline(col), rs, cs)
end
@output.table_record_close
end
@output.table_close
end
def split_columns(str)
cols = []
while m = /(?:[^\\\|]+|\\.|(?!\|\|)\|)*\|\|/.match(str)
cols.push strip(m[0].chomp('||'))
str = m.post_match
end
cols.push str.chomp unless str.chomp.empty?
cols
end
def span_count(str, ch)
c = str.count(ch)
c == 0 ? nil : c + 1
end
BLOCKQUOTE_RE = /\A""[ \t]?/
def compile_blockquote(f)
@output.blockquote_open
lines = []
f.while_match(BLOCKQUOTE_RE) do |line|
lines.push line.sub(BLOCKQUOTE_RE, '')
skip_comments f
end
compile_blocks lines.join('')
@output.blockquote_close
end
INDENTED_PRE_RE = /\A[ \t]/
def compile_indented_pre(f)
lines = f.span(INDENTED_PRE_RE)\
.map {|line| rstrip(line.sub(INDENTED_PRE_RE, '')) }
@output.preformatted restore_plugin_block(lines.join(''))
end
BLOCK_PRE_OPEN_RE = /\A<<<\s*(\w+)?/
BLOCK_PRE_CLOSE_RE = /\A>>>/
def compile_block_pre(f)
m = BLOCK_PRE_OPEN_RE.match(f.gets) or raise 'must not happen'
syntax = m[1] ? m[1].downcase : nil
str = restore_plugin_block(f.break(BLOCK_PRE_CLOSE_RE).join)
f.gets
if syntax
begin
convertor = Syntax::Convertors::HTML.for_syntax(syntax)
@outupt.preformatted convertor.convert(str)
rescue
@output.preformatted @output.text(str)
end
else
@output.preformatted @output.text(str)
end
end
BLANK = /\A$/
PARAGRAPH_END_RE = Regexp.union(BLANK,
HEADER_RE, HRULE_RE, LIST_RE, DLIST_RE,
BLOCKQUOTE_RE, TABLE_RE,
INDENTED_PRE_RE, BLOCK_PRE_OPEN_RE)
def compile_paragraph(f)
str = f.break(PARAGRAPH_END_RE)\
.reject {|line| COMMENT_RE =~ line }\
.map {|line| compile_inline(strip(line)) }\
.join("\n")
if /\A\0(\d+)\0\z/ =~ str
@output.block_plugin plugin_block($1.to_i)
else
@output.paragraph str
end
end
#
# Inline Level
#
METACHARS = %q[{}:|'"]
ESCAPED_METACHARS = /\\[#{Regexp.quote(METACHARS)}]/
BRACKET_LINK_RE = /\[\[(?:[^\\\]]+|\\.|(?!\]\])\])*\]\]/
URI_RE = /(?:https?|ftp|file|mailto):[A-Za-z0-9;\/?:@&=+$,\-_.!~*\'()#%]+/
def compile_inline(str)
re = / (#{ESCAPED_METACHARS})
| (#{BRACKET_LINK_RE})
| (#{URI_RE})
| (#{MODIFIER_RE})
/xo
buf = ''
while m = re.match(str)
buf << @output.text(m.pre_match)
case
when meta = m[1]
buf << meta[1,1]
when link = m[2]
buf << compile_bracket_link(link[2...-2])
when uri = m[3]
buf << compile_uri_autolink(uri)
when mod = m[4]
buf << compile_modifier(mod)
else
raise 'must not happen'
end
str = m.post_match
end
buf << @output.text(str)
buf
end
def compile_bracket_link(link)
if m = /\A(?>[^|\\]+|\\.)*\|/.match(link)
title = m[0].chop
uri = m.post_match
@output.hyperlink(fix_uri(uri), compile_modifier(title))
else
@output.hyperlink(fix_uri(link), @output.text(link))
end
end
def compile_uri_autolink(uri)
if image?(uri)
@output.image_hyperlink(fix_uri(uri))
else
@output.hyperlink(fix_uri(uri), uri)
end
end
def fix_uri(uri)
if %r|://| !~ uri and /\Amailto:/ !~ uri
uri.sub(/\A\w+:/, '')
else
uri
end
end
IMAGE_EXTS = %w( .jpg .jpeg .gif .png )
def image?(uri)
IMAGE_EXTS.include?(File.extname(uri).downcase)
end
STRONG = "'''"
EM = "''"
DEL = '=='
STRONG_RE = /'''(?:[^\\']+|\\.|(?!''')''?)+'''/
EM_RE = /''(?:[^\\']+|\\.|(?!'')')+''/
DEL_RE = /==(?:[^\\=]+|\\.|(?!==)=)+==/
MODIFIER_RE = Regexp.union(STRONG_RE, EM_RE, DEL_RE)
MODTAG = {
STRONG => 'strong',
EM => 'em',
DEL => 'del'
}
def compile_modifier(str)
buf = ''
while m = / (#{ESCAPED_METACHARS})
| (#{MODIFIER_RE})
/xo.match(str)
buf << @output.text(m.pre_match)
case
when esc = m[1]
buf << @output.text(esc[1, 1])
when chunk = m[2]
mod, s = split_mod(chunk)
mid = MODTAG[mod]
buf << @output.__send__(mid, compile_modifier(s))
else
raise 'must not happen'
end
str = m.post_match
end
buf << @output.text(str)
buf
end
def split_mod(str)
case str
when /\A'''/
return str[0, 3], str[3...-3]
when /\A''/
return str[0, 2], str[2...-2]
when /\A==/
return str[0, 2], str[2...-2]
else
raise "must not happen: #{str.inspect}"
end
end
def strip(str)
rstrip(lstrip(str))
end
def rstrip(str)
str.sub(/[ \t\r\n\v\f]+\z/, '')
end
def lstrip(str)
str.sub(/\A[ \t\r\n\v\f]+/, '')
end
class HTMLOutput
def initialize(suffix = ' />')
@suffix = suffix
@f = nil
end
def reset
@f = StringIO.new
end
def string
@f.string
end
#
# Procedures
#
def headline(level, title)
@f.puts "<h#{level}>#{title}</h#{level}>"
end
def hrule
@f.puts "<hr#{@suffix}"
end
def list_begin
end
def list_end
@f.puts
end
def list_open(type)
@f.puts "<#{type}>"
end
def list_close(type)
@f.print "</#{type}>"
end
def listitem_open
@f.print '<li>'
end
def listitem_close
@f.puts '</li>'
end
def listitem(item)
@f.print item
end
def dlist_open
@f.puts "<dl>"
end
def dlist_close
@f.puts "</dl>"
end
def dlist_item(dt, dd)
case
when dd.empty?
@f.puts "<dt>#{dt}</dt>"
when dt.empty?
@f.puts "<dd>#{dd}</dd>"
else
@f.puts "<dt>#{dt}</dt>"
@f.puts "<dd>#{dd}</dd>"
end
end
def table_open
@f.puts %Q(<table border="1">)
end
def table_close
@f.puts "</table>"
end
def table_record_open
@f.print "<tr>"
end
def table_record_close
@f.puts "</tr>"
end
def table_head(item, rs, cs)
@f.print "<th#{tdattr(rs, cs)}>#{item}</th>"
end
def table_data(item, rs, cs)
@f.print "<td#{tdattr(rs, cs)}>#{item}</td>"
end
def tdattr(rs, cs)
buf = ''
buf << %Q( rowspan="#{rs}") if rs
buf << %Q( colspan="#{cs}") if cs
buf
end
private :tdattr
def blockquote_open
@f.print "<blockquote>"
end
def blockquote_close
@f.puts "</blockquote>"
end
def preformatted(str)
@f.print "<pre>"
@f.print str
@f.puts "</pre>"
end
def paragraph(str)
@f.puts "<p>#{str}</p>"
end
def block_plugin(str)
@f.puts %Q(<div class="plugin">{{#{escape_html(str)}}}</div>)
end
#
# Functions
#
def hyperlink(uri, title)
%Q(<a href="#{escape_html_param(uri)}">#{title}</a>)
end
def image_hyperlink(uri)
alt = escape_html(File.basename(uri))
%Q(<img src="#{escape_html_param(uri)}" alt="#{alt}"#{@suffix})
end
def strong(item)
"<strong>#{item}</strong>"
end
def em(item)
"<em>#{item}</em>"
end
def del(item)
"<del>#{item}</del>"
end
def text(str)
escape_html(str)
end
def inline_plugin(src)
%Q(<span class="plugin">{{#{src}}}</span>)
end
#
# Utilities
#
def escape_html_param(str)
escape_quote(escape_html(str))
end
def escape_html(text)
text.gsub(/&/, '&').gsub(/</, '<').gsub(/>/, '>')
end
def unescape_html(text)
text.gsub(/>/, '>').gsub( /</, '<').gsub( /&/, '&')
end
def escape_quote(text)
text.gsub(/"/, '"')
end
end
end
if __FILE__ == $0
puts HikiDoc.to_html(ARGF.read(nil))
end