tmail/tmail/mails/mails.c
/* vi:set sw=4 ts=4:
mails.c version 0.1.1
Copyright (c) 1998,1999 Minero Aoki <aamine@dp.u-netsurf.ne.jp>
This program is free software.
You can distribute/modify this program under the terms of
the GNU Lesser General Public License version 2 or later.
*/
#include <stdio.h>
#ifdef __STDC__
# include <stdlib.h>
#endif
#include "ruby.h"
static VALUE MailScanner;
static VALUE ScanError;
struct mails
{
unsigned char *ptr;
unsigned char *p;
unsigned char *pend;
unsigned int flags;
VALUE comments;
};
#define MODE_MIME (1 << 0)
#define MODE_RECV (1 << 1)
#define MIME_MODE_P(s) ((s)->flags & MODE_MIME)
#define RECV_MODE_P(s) ((s)->flags & MODE_RECV)
#define MAILS_DEBUG (1 << 4)
#define BUFSIZE 256
#define GET_SCANNER(val, s) Data_Get_Struct(val, struct mails, s)
static void
real_reset(sc)
struct mails *sc;
{
sc->ptr = 0;
sc->p = 0;
sc->pend = 0;
sc->flags = 0;
sc->comments = Qnil;
}
static void
mails_free(sc)
struct mails *sc;
{
free(sc);
}
static VALUE
mails_s_new(klass, str, ident, cmt)
VALUE klass;
{
struct mails *sc;
char *tmp;
sc = ALLOC_N(struct mails, 1);
real_reset(sc);
Check_Type(str, T_STRING);
sc->ptr = sc->p = RSTRING(str)->ptr;
sc->pend = sc->p + RSTRING(str)->len;
tmp = STR2CSTR(ident);
if (strcmp(tmp, "RecvH") == 0) sc->flags |= MODE_RECV;
else if (strcmp(tmp, "CTypeH") == 0) sc->flags |= MODE_MIME;
else if (strcmp(tmp, "CEncodingH") == 0) sc->flags |= MODE_MIME;
else if (strcmp(tmp, "CDispositionH") == 0) sc->flags |= MODE_MIME;
if (! NIL_P(cmt)) {
Check_Type(cmt, T_ARRAY);
sc->comments = cmt;
}
else {
sc->comments = Qnil;
}
return Data_Wrap_Struct(MailScanner, 0, mails_free, sc);
}
static VALUE
mails_debug_get(self)
VALUE self;
{
struct mails *sc;
GET_SCANNER(self, sc);
if (sc->flags & MAILS_DEBUG)
return Qtrue;
else
return Qfalse;
}
static VALUE
mails_debug_set(self, flag)
VALUE self, flag;
{
struct mails *sc;
GET_SCANNER(self, sc);
if (RTEST(flag))
sc->flags |= MAILS_DEBUG;
else
sc->flags &= ~MAILS_DEBUG;
return Qnil;
}
/* --------- scan functions ----------- */
/* this implement is VERY agree, but usually useful. */
#define ESC '\033'
/* skip until "\e(B" (ascii) */
static void
fwd_jis(sc)
struct mails *sc;
{
for (; sc->p < sc->pend; sc->p++) {
if (*sc->p == ESC) {
if (strcmp(sc->p, "\033(B") == 0) {
sc->p += 3;
break;
}
}
}
}
#define IS_JCHAR(ch) ((ch) > 127)
static void
fwd_jstr(sc)
struct mails *sc;
{
while (sc->p < sc->pend) {
if (*sc->p > 127) {
sc->p++;
if (sc->p < sc->pend) sc->p++;
}
else return;
}
}
static VALUE
scan_atom(sc)
struct mails *sc;
{
unsigned char *ret;
ret = sc->p;
while (sc->p < sc->pend) {
switch (*sc->p) {
case '(':
case ')':
case '<':
case '>':
case '@':
case ',':
case ';':
case ':':
case '\\':
case '\"':
case '.':
case '[':
case ']':
goto retval;
case ESC:
fwd_jis(sc);
continue;
default:
if (*sc->p > 127) {
fwd_jstr(sc);
continue;
}
else if (*sc->p <= 32) {
goto retval;
}
break;
}
sc->p++;
}
retval:
return rb_str_new(ret, sc->p - ret);
}
static VALUE
scan_token(sc)
struct mails *sc;
{
unsigned char *ret;
ret = sc->p;
while (sc->p < sc->pend) {
switch (*sc->p) {
case '(':
case ')':
case '<':
case '>':
case '@':
case ',':
case ';':
case ':':
case '\\':
case '\"':
case '/':
case '[':
case ']':
case '?':
case '=':
goto retval;
break;
case ESC:
fwd_jis(sc);
continue;
default:
if (*sc->p > 127)
fwd_jstr(sc);
else if (*sc->p <= 32)
goto retval;
break;
}
sc->p++;
}
retval:
return rb_str_new(ret, sc->p - ret);
}
static VALUE
scan_quoted(sc)
struct mails *sc;
{
unsigned char *ret;
unsigned char buf[BUFSIZE];
if (*sc->p != '\"')
return Qnil;
sc->p++;
ret = buf;
while (sc->p < sc->pend) {
switch (*sc->p) {
case '\"':
sc->p++;
goto retval;
break;
case '\\':
sc->p++;
break;
}
*ret = *sc->p;
ret++; sc->p++;
}
retval:
return rb_str_new(buf, ret - buf);
}
static VALUE
scan_domlit(sc)
struct mails *sc;
{
unsigned char *ret;
unsigned char buf[BUFSIZE];
ret = buf;
while (sc->p < sc->pend) {
switch (*sc->p) {
case ']':
sc->p++;
goto retval;
break;
case '\\':
sc->p++;
break;
}
*ret = *sc->p;
ret++; sc->p++;
}
retval:
return rb_str_new(buf, ret - buf);
}
/*
static VALUE
scan_digit(sc)
struct mails *sc;
{
unsigned char *ret;
ret = sc->p;
while (sc->p < sc->pend) {
switch (*sc->p) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
break;
default:
goto retval;
}
sc->p++;
}
retval:
return rb_str_new(ret, sc->p - ret);
}
*/
/*
static VALUE
scan_boundary(sc)
struct mails *sc;
{
unsigned char *ret;
ret = sc->p;
while (sc->p < sc->pend) {
switch (*sc->p) {
case ' ':
case '\'':
case '(':
case ')':
case '+':
case '_':
case ',':
case '-':
case '.':
case '/':
case ':':
case '=':
case '?':
break;
default:
if (*sc->p >= 'A' && *sc->p <= 'Z') break;
if (*sc->p >= 'a' && *sc->p <= 'z') break;
if (*sc->p >= '0' && *sc->p <= '9') break;
goto retval;
}
}
retval:
return rb_str_new(ret, sc->p - ret);
}
*/
static void
fwd_eol(sc)
struct mails *sc;
{
if (*sc->p == '\n') {
sc->p++;
}
else if (*sc->p == '\r') {
sc->p++;
if (*sc->p == '\n')
sc->p++;
}
return;
}
static VALUE
scan_comment(sc)
struct mails *sc;
{
int nest = 0;
unsigned char *ret;
ret = sc->p;
while (sc->p < sc->pend) {
if (*sc->p == ESC) {
fwd_jis(sc);
continue;
}
if (IS_JCHAR(*sc->p)) {
fwd_jstr(sc);
continue;
}
switch (*sc->p) {
case '(':
nest++;
break;
case ')':
nest--;
if (nest == 0) {
sc->p++;
goto retval;
}
break;
case '\\':
sc->p++;
break;
case '\r':
case '\n':
fwd_eol(sc);
continue;
}
sc->p++;
}
rb_raise(ScanError, "unterminated comment");
retval:
return rb_str_new(ret + 1, sc->p - ret - 2);
}
static int
findin(ch, str)
char ch;
char *str;
{
for (; *str; str++) {
if (*str == ch) return 1;
}
return 0;
}
#define IS_LWSP(ch) (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r')
#define IS_DIGIT(ch) (ch >= '0' && ch <= '9')
#define IS_ATOMCHAR(ch) \
((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || \
(ch >= '0' && ch <= '9') || (ch == ESC) || IS_JCHAR(ch) || \
findin(ch, "#!$%&'`*+-{|}~^/=?"))
#define IS_TOKENCHAR(ch) \
((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || \
(ch >= '0' && ch <= '9') || (ch == ESC) || IS_JCHAR(ch) || \
findin(ch, "#!$%&'`*+-{|}~^."))
static void
skip_lwsp(sc)
struct mails *sc;
{
while (sc->p < sc->pend) {
if (IS_LWSP(*sc->p)) sc->p++;
else break;
}
}
#define IS_ALPHA(ch) ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch >= 'Z'))
#define IS_UPPER(ch) (ch >= 'A' && ch >= 'Z')
#define TO_LOWER(ch) (IS_UPPER(ch) ? ch + 32 : ch)
static int
nccmp(a, b)
char *a, *b;
{
while (*a && *b) {
if (*a != *b)
if (TO_LOWER(*a) != TO_LOWER(*b))
return 0;
a++;
b++;
}
return (*a == *b) ? 1 : 0;
}
static int
digit_p(vret)
VALUE vret;
{
char *p;
int i;
p = RSTRING(vret)->ptr;
for (i = 0; i < RSTRING(vret)->len; i++) {
if (! IS_DIGIT(RSTRING(vret)->ptr[i]))
return 0;
}
return 1;
}
static VALUE tok_atom, tok_digit, tok_token, tok_quoted, tok_domlit;
static VALUE tok_from, tok_by, tok_via, tok_with, tok_id, tok_for;
static VALUE
mails_scan(self, arr)
VALUE self;
{
struct mails *sc;
VALUE sret, vret;
unsigned char *p;
int debug;
GET_SCANNER(self, sc);
if (!sc->p) {
rb_raise(ScanError, "Mails#scan called before reset");
}
Check_Type(arr, T_ARRAY);
debug = sc->flags & MAILS_DEBUG;
sret = Qnil;
vret = Qnil;
while (1) {
if (debug) puts("new loop");
if (sc->p == sc->pend) {
sret = vret = Qfalse;
break;
}
if (debug) puts("not void");
if (IS_LWSP(*sc->p)) {
skip_lwsp(sc);
if (sc->p == sc->pend) {
sret = vret = Qfalse;
break;
}
}
if (MIME_MODE_P(sc)) {
if (IS_TOKENCHAR(*sc->p)) {
if (debug) puts("TOKEN");
sret = tok_token;
vret = scan_token(sc);
break;
}
}
else {
if (IS_ATOMCHAR(*sc->p)) {
if (debug) puts("ATOM");
sret = tok_atom;
vret = scan_atom(sc);
if (RECV_MODE_P(sc)) {
p = RSTRING(vret)->ptr;
if (nccmp(p, "from")) sret = tok_from;
else if (nccmp(p, "by")) sret = tok_by;
else if (nccmp(p, "via")) sret = tok_via;
else if (nccmp(p, "with")) sret = tok_with;
else if (nccmp(p, "id")) sret = tok_id;
else if (nccmp(p, "for")) sret = tok_for;
}
if (digit_p(vret)) sret = tok_digit;
break;
}
}
if (*sc->p == '"') {
if (debug) puts("QUOTED");
sret = tok_quoted;
vret = scan_quoted(sc);
}
else if (*sc->p == '(') {
VALUE c;
if (debug) puts("COMMENT");
c = scan_comment(sc);
if (! NIL_P(sc->comments))
rb_ary_push(sc->comments, c);
continue;
}
else if (*sc->p == '[') {
if (debug) puts("DOMLIT");
sret = tok_domlit;
vret = scan_domlit(sc);
}
else {
if (debug) puts("CHAR");
sret = vret = rb_str_new(sc->p, 1);
sc->p++;
}
break;
}
rb_ary_store(arr, 0, sret);
rb_ary_store(arr, 1, vret);
return arr;
}
static VALUE
intn(str)
char *str;
{
ID tmp;
tmp = rb_intern(str);
return INT2FIX(tmp);
}
void
Init_mails()
{
MailScanner = rb_define_class("MailScanner", rb_cObject);
rb_define_singleton_method(MailScanner, "new", mails_s_new, 3) ;
rb_define_method(MailScanner, "scan", mails_scan, 1);
rb_define_method(MailScanner, "debug", mails_debug_get, 0);
rb_define_method(MailScanner, "debug=", mails_debug_set, 1);
ScanError = rb_eval_string("::ScanError");
tok_atom = intn("ATOM");
tok_digit = intn("DIGIT");
tok_token = intn("TOKEN");
tok_quoted = intn("QUOTED");
tok_domlit = intn("DOMLIT");
tok_from = intn("FROM");
tok_by = intn("BY");
tok_via = intn("VIA");
tok_with = intn("WITH");
tok_id = intn("ID");
tok_for = intn("FOR");
}