tmail/tmail/encode.rb

#
# encode.rb
#
#   Copyright (c) 1999 Minero Aoki <aamine@dp.u-netsurf.ne.jp>
#
#   This program is free software.
#   You can distribute/modify this program under the terms of
#   the GNU Lesser General Public License version 2 or later.
#

require 'nkf'

require 'strscan'
require 'amstd/bug'


module TMail


class HFdecoder

  def initialize( ret = nil, charset = nil )
    @f     = ret   || ''
    @opt   = '-' + (/ejs/ === charset ? charset : 'e')
  end


  class << self
  
    def decode( str, outcode = 'e' )
      NKF.nkf( "-#{outcode}m", str )
    end
  
  end


  def write_in
    @f
  end

  def header_line( str )
    @f << decode( str )
  end

  def header_name( nm )
    @f << nm << ': '
  end

  def header_body( str )
    @f << decode( str )
  end
    
  def spc
    @f << ' '
  end

  def lwsp( str )
    @f << str
  end
    
  def meta( str )
    @f << str
  end

  def text( str )
    @f << decode( str )
  end


  private

  def decode( str )
    NKF.nkf( @opt + 'm', str )
  end

end


class HFencoder

  unless defined? BENCODE_DEBUG then
    BENCODE_DEBUG = false
  end

  def initialize( ret = nil, eol = nil, charset = nil, limit = nil )
    @sbuf = []
    @vbuf = []
    @buf = ''
    @f     = ret   || ''
    @sep   = (eol  || "\r\n") + ' '
    @limit = limit || 72
    @opt   = '-' + case charset
                   when 'e', 'j', 's' then charset
                   else                    'j'
                   end
    @len      = @limit - @sep.size
    @chunkmax = chunk_max( @len )

    @firstflush = true
  end

  def reset
    @firstflush = true
    @sbuf.clear
    @vbuf.clear
  end


  class << self

    def encode( str )
      ret = ''
      e = new( ret )
      e.header_body( str )
      e.write_in
      ret
    end
  
  end


  def write_in
    do_encode
    reset
    @f
  end

  def header_line( line )
    scanadd line
  end

  def header_name( nm )
    @sbuf.push :A
    @vbuf.push nm
    meta ':'
    spc
  end

  def header_body( str )
    scanadd str
  end

  def spc
    @sbuf.push :S
    @vbuf.push ' '
  end

  def lwsp( str )
    @sbuf.push :S
    @vbuf.push str
  end

  def meta( str )
    @sbuf.push :A
    @vbuf.push str
  end

  def text( str )
    scanadd str
  end


  private


  def scanadd( str )
    scan NKF.nkf( '-j', str )
  end

  ESC_ASCII = "\e(B"

  def scan( str, force = false )
    tmp = pre = i = nil
    s = StringScanner.new( str, false )

    while s.rest? do
      if tmp = s.scan( /\A[^\e\t\r\n ]+/ ) then
        @sbuf.push force ? :J : :A
        @vbuf.push tmp

      elsif tmp = s.scan( /\A[\t\r\n ]+/ ) then
        @sbuf.push :S
        @vbuf.push tmp

      elsif pre = s.scan( /\A\e../ ) then
        next if pre == ESC_ASCII

## faster
#while tmp = s.scan( /\A[^\e]{1,30}/ ) do
#  @sbuf.push :J
#  @vbuf.push tmp
#end
        if tmp = s.scan( /\A[^\e]+/ ) then
          0.step( tmp.size, @chunkmax ) do |i|
            @sbuf.push :J
            @vbuf.push pre + tmp[ i, @chunkmax ] + ESC_ASCII
          end
        end

      else
        bug! "HFencoder#scan, not match"
      end
    end
  end


  def do_encode
    #
    # @sbuf = (J|A)(J|A|S)*(J|A)
    #
    #   A: ascii only, no need to encode
    #   J: jis, etc. need to encode
    #   S: LWSP
    #
    # (J|A)*J(J|A)* -> W
    # W(SW)*        -> E
    #
    # result = (A|E)(S(A|E))*
    #
    if BENCODE_DEBUG then
      puts "\n  parse_words ------------\n\n"
      puts @sbuf.collect{|i| i.id2name }.inspect
      puts @vbuf.inspect
    end

    spc = ''

    beg = i = endi = si = 0
    while @sbuf[i] do
      while @sbuf[i] and @sbuf[i] == :A do i += 1 end

      if @sbuf[i] == :J then
        #
        #  E
        #
        while @sbuf[i] and @sbuf[i] != :S do i += 1 end
        endi = i

        while true do
          while @sbuf[i] and @sbuf[i] == :S do i += 1 end
          si = i
          while @sbuf[i] and @sbuf[i] == :A do i += 1 end
          break unless @sbuf[i] == :J
          while @sbuf[i] and @sbuf[i] != :S do i += 1 end

          endi = i
        end
        concat_e spc, @vbuf[ beg, endi - beg ]

        if i > endi then
          concat_a @vbuf[ endi, si - endi ].join(''), @vbuf[ si, i - si ]
        end
      else
        if i == beg then
          #
          #  S
          #
          while @sbuf[i] and @sbuf[i] == :S do i += 1 end
          spc = @vbuf[ beg, i - beg ].join('')
        else
          #
          #  A
          #
          concat_a spc, @vbuf[ beg, i - beg ]
        end
      end

      beg = i
    end

    flush_buf( '' ) unless @buf.empty?
  end


  def flush_buf( replace )
    unless @firstflush then
      @f << @sep
    else
      @firstflush = false
    end
    @f << @buf
    @buf = replace
  end

  
  def concat_a( sp, words )
    str = words.join('')
    if BENCODE_DEBUG then
      puts "concat_a: arg=#{str.inspect}"
    end

    if @buf.size + sp.size + str.size <= @len then
      @buf << sp << str
    else
      flush_buf( str )
    end

    if BENCODE_DEBUG then
      puts "concat_a: ret=#{@f.inspect}"
      puts "concat_a: buf=#{@buf.inspect}"
    end
  end
    
  
  MPREFIX = '=?ISO-2022-JP?B?'
  MSUFFIX = '?='
  PRESIZE = MPREFIX.size + MSUFFIX.size
  
  def concat_e( sp, arr )
    if BENCODE_DEBUG then
      puts "concat_e: sp=#{sp.inspect}"
      puts "concat_e: arr=#{arr.inspect}"
    end
    
    # check if can concat lwsp

    if @buf.size + sp.size + encsize( arr[0].size ) <= @len then
      @buf << sp
    else
      flush_buf ''
    end

    tmp = ''
    arr.each do |i|
      if @buf.size + encsize( tmp.size + i.size ) <= @len then
        tmp << i
      else
        e_cat tmp
        flush_buf ''
        tmp = i
      end
    end
    e_cat tmp unless tmp.empty?

    if BENCODE_DEBUG then
      puts "concat_e: ret=#{@f.inspect}"
      puts "concat_e: buf=#{@buf.inspect}"
    end
  end


  def e_cat( str )
    puts "e_cat: start" if BENCODE_DEBUG

    @buf << MPREFIX << [str].pack('m')
    @buf.chop!
    @buf << MSUFFIX
  end


  def encsize( len )
    amari = (if len % 3 == 0 then 0 else 1 end)
    (len / 3 + amari) * 4 + PRESIZE
  end

  def chunk_max( len )
    rest = len - PRESIZE
    rest = rest / 4 * 3 - 6
    rest -= rest % 2
    rest
  end

end


end    # module TMail