1 require 'uri'
2
3 module HTMLUtils
4 ESC = {
5 '&' => '&',
6 '"' => '"',
7 '<' => '<',
8 '>' => '>'
9 }
10
11 def escape(str)
12 table = ESC
13 str.gsub(/[&"<>]/n) {|s| table[s] }
14 end
15
16 def urlencode(str)
17 str.gsub(/[^\w\.\-]/n) {|ch| sprintf('%%%02X', ch[0]) }
18 end
19 end
20
21 class PukiWikiParser
22 include HTMLUtils
23
24 def initialize(logger)
25 @logger = logger
26 @h_start_level = 2
27 end
28
29 def to_html(src, page_names, base_uri = '/', suffix= '/')
30 @page_names = page_names
31 @base_uri = base_uri
32 @pagelist_suffix = suffix
33 @inline_re = nil
34
35 buf = []
36 lines = src.rstrip.split(/\r?\n/).map {|line| line.chomp }
37 while lines.first
38 case lines.first
39 when ''
40 lines.shift
41 when /\A----/
42 lines.shift
43 buf.push '<hr />'
44 when /\A\*/
45 buf.push parse_h(lines.shift)
46 when /\A\s/
47 buf.concat parse_pre(take_block(lines, /\A\s/))
48 when /\A>/
49 buf.concat parse_quote(take_block(lines, /\A>/))
50 when /\A-/
51 buf.concat parse_list('ul', take_block(lines, /\A-/))
52 when /\A\+/
53 buf.concat parse_list('ol', take_block(lines, /\A\+/))
54 when /\A:/
55 buf.concat parse_dl(take_block(lines, /\A:/))
56 else
57 buf.push '<p>'
58 buf.concat parse_p(take_block(lines, /\A(?![*\s>:\-\+]|----|\z)/))
59 buf.push '</p>'
60 end
61 end
62 buf.join("\n")
63 end
64
65 private
66
67 def take_block(lines, marker)
68 buf = []
69 until lines.empty?
70 break unless marker =~ lines.first
71 buf.push lines.shift.sub(marker, '')
72 end
73 buf
74 end
75
76 def parse_h(line)
77 @logger.debug "h: #{line.inspect}"
78 level = @h_start_level + (line.slice(/\A\*{1,4}/).length - 1)
79 content = line.sub(/\A\*+/, '')
80 "<h#{level}>#{parse_inline(content)}</h#{level}>"
81 end
82
83 def parse_list(type, lines)
84 @logger.debug "#{type}: #{lines.inspect}"
85 marker = ((type == 'ul') ? /\A-/ : /\A\+/)
86 parse_list0(type, lines, marker)
87 end
88
89 def parse_list0(type, lines, marker)
90 buf = ["<#{type}>"]
91 closeli = nil
92 until lines.empty?
93 if marker =~ lines.first
94 buf.concat parse_list0(type, take_block(lines, marker), marker)
95 else
96 buf.push closeli if closeli; closeli = '</li>'
97 buf.push "<li>#{parse_inline(lines.shift)}"
98 end
99 end
100 buf.push closeli if closeli; closeli = '</li>'
101 buf.push "</#{type}>"
102 buf
103 end
104
105 def parse_dl(lines)
106 @logger.debug "dl: #{lines.inspect}"
107 buf = ["<dl>"]
108 lines.each do |line|
109 dt, dd = *line.split('|', 2)
110 buf.push "<dt>#{parse_inline(dt)}</dt>"
111 buf.push "<dd>#{parse_inline(dd)}</dd>" if dd
112 end
113 buf.push "</dl>"
114 buf
115 end
116
117 def parse_quote(lines)
118 @logger.debug "quote: #{lines.inspect}"
119 [ "<blockquote><p>", lines.join("\n"), "</p></blockquote>"]
120 end
121
122 def parse_pre(lines)
123 @logger.debug "pre: #{lines.inspect}"
124 [ "<pre><code>#{lines.map {|line| escape(line) }.join("\n")}",
125 '</code></pre>']
126 end
127
128 def parse_p(lines)
129 lines.map {|line| parse_inline(line) }
130 end
131
132 def parse_inline(str)
133 @inline_re ||= %r<
134 ([&<>"]) # $1: HTML escape characters
135 | \[\[(.+?):\s*(https?://\S+)\s*\]\] # $2: label, $3: URI
136 | (#{autolink_re()}) # $4: Page name autolink
137 | (#{URI.regexp('http')}) # $5...: URI autolink
138 >x
139 str.gsub(@inline_re) {
140 case
141 when htmlchar = $1 then escape(htmlchar)
142 when bracket = $2 then a_href($3, bracket, 'outlink')
143 when pagename = $4 then a_href(page_uri(pagename), pagename, 'pagelink')
144 when uri = $5 then a_href(uri, uri, 'outlink')
145 else
146 raise 'must not happen'
147 end
148 }
149 end
150
151 def a_href(uri, label, cssclass)
152 %Q[<a class="#{cssclass}" href="#{escape(uri)}">#{escape(label)}</a>]
153 end
154
155 def autolink_re
156 Regexp.union(* @page_names.reject {|name| name.size <= 3 })
157 end
158
159 def page_uri(page_name)
160 "#{@base_uri}#{urlencode(page_name)}#{@pagelist_suffix}"
161 end
162 end