tmail/strscan/strscan.c

/* vi:set ts=4 sw=4:

  strscan.c version 0.5.4

    Copyright (c) 1999 Minero Aoki <aamine@dp.u-netsurf.ne.jp>

    This library is free software.
    You can distribute/modify this library under the terms of
    the GNU Lesser General Public License version 2 or later.

*/


#include <stdio.h>
#ifdef __STDC__
#  include <stdlib.h>
#endif

#include "ruby.h"
#include "re.h"


struct strscanner
{
    char *pbeg;
    char *ptr;
    char *pend;
    char *pre;
    unsigned int flags;
    struct re_registers regi;
};

#define MATCH_FLAG (1U << 0)
#define DUP_FLAG   (1U << 1)

#define MATCHED_P(p) (p->flags & MATCH_FLAG)

#define GET_SCANNER(obj,dat) Data_Get_Struct(obj, struct strscanner, dat)
#define SCAN_FINISHED(dat) ((dat)->ptr >= (dat)->pend)

static VALUE StringScanner;
static VALUE ScanError;


/* ------------------------------------------------------------- */


static void
strscan_free(p)
    struct strscanner *p;
{
    if (p->flags & DUP_FLAG)
        free(p->pbeg);
    ruby_re_free_registers(&(p->regi));
    free(p);
}


static VALUE
strscan_s_new(argc, argv, klass)
    int argc;
    VALUE *argv, klass;
{
    VALUE str, dup_p;
    struct strscanner *p;

    if (rb_scan_args(argc, argv, "11", &str, &dup_p) == 1)
        dup_p = Qtrue;
    Check_Type(str, T_STRING);

    p = ALLOC_N(struct strscanner, 1);
    p->flags = 0U;
    if (rb_safe_level() >= 4 || RTEST(dup_p)) {
        p->flags |= DUP_FLAG;
        p->ptr = ALLOC_N(char, RSTRING(str)->len) ;
        memcpy(p->ptr, RSTRING(str)->ptr, sizeof(char) * RSTRING(str)->len);
    } else {
        p->ptr = RSTRING(str)->ptr;
    }
    p->pbeg = p->ptr;
    p->pend = p->ptr + RSTRING(str)->len;
    p->pre = 0;
    MEMZERO(&(p->regi), struct re_registers, 1);

    return Data_Wrap_Struct(klass, 0, strscan_free, p);
}


static VALUE
strscan_reset(self)
    VALUE self;
{
    struct strscanner *p;

    GET_SCANNER(self, p);
    p->ptr = p->pbeg;
    p->pre = 0;
    p->flags &= ~MATCH_FLAG;
    return self;
}


static VALUE
strscan_clear(self)
    VALUE self;
{
    struct strscanner *p;

    GET_SCANNER(self, p);
    p->ptr = p->pend;
    p->pre = 0;
    p->flags &= ~MATCH_FLAG;
    return self;
}


static VALUE
strscan_do_scan(self, regex, ptrflag, strflag)
    VALUE self, regex;
    int ptrflag, strflag;
{
    struct strscanner *p;
    int ret;

    Check_Type(regex, T_REGEXP);
    GET_SCANNER(self, p);

    p->flags &= ~MATCH_FLAG;
    ret = re_match(RREGEXP(regex)->ptr,
                   p->ptr,
                   p->pend - p->ptr,
                   0,
                   &(p->regi));
    if (ret == -2) {
        rb_raise(ScanError, "regexp buffer overflow");
    }
    else if ( ret < 0 ) {
        return Qnil;
    }
    else {
        size_t len = p->regi.end[0];

        p->flags |= MATCH_FLAG;
        if (ptrflag) {
            p->pre = p->ptr;
            p->ptr += len;
            if (strflag)
                return rb_str_new(p->pre, len);
        }
        else {
            if (strflag)
                return rb_str_new(p->ptr, len);
        }
        return INT2FIX(len);
    }

    return Qnil; /* not reach */
}


static VALUE
strscan_scan(self, reg)
    VALUE self, reg;
{
    return strscan_do_scan(self, reg, 1, 1);
}


static VALUE
strscan_match_p(self, reg)
    VALUE self, reg;
{
    return strscan_do_scan(self, reg, 0, 0);
}


static VALUE
strscan_skip(self, reg)
    VALUE self, reg;
{
    return strscan_do_scan(self, reg, 1, 0);
}


static VALUE
strscan_check(self, reg)
    VALUE self, reg;
{
    return strscan_do_scan(self, reg, 0, 1);
}


static VALUE
strscan_scan_full(self, reg, s, f)
{
    return strscan_do_scan(self, reg, RTEST(s), RTEST(f));
}


static VALUE
strscan_getch(self)
    VALUE self;
{
    struct strscanner *p;

    GET_SCANNER(self, p);
    p->flags &= ~MATCH_FLAG;
    if (SCAN_FINISHED(p))
        return Qnil;

    p->flags |= MATCH_FLAG;
    p->pre = p->ptr;
    p->ptr += mbclen(*p->ptr);
    if (p->ptr > p->pend)
        p->ptr = p->pend;
    return rb_str_new(p->pre, p->ptr - p->pre);
}


static VALUE
strscan_empty_p(self)
    VALUE self;
{
    struct strscanner *p;

    GET_SCANNER(self, p);
    if (SCAN_FINISHED(p))
        return Qtrue;
    else
        return Qfalse;
}


static VALUE
strscan_rest_p(self)
    VALUE self;
{
    struct strscanner *p;

    GET_SCANNER(self, p);
    if (SCAN_FINISHED(p))
        return Qfalse;
    else
        return Qtrue;
}


static VALUE
strscan_matched_p(self)
    VALUE self;
{
    struct strscanner *p;

    GET_SCANNER(self, p);
    if (MATCHED_P(p))
        return Qtrue;
    else
        return Qfalse;
}


static VALUE
strscan_restsize(self)
    VALUE self;
{
    struct strscanner *p;

    GET_SCANNER(self, p);
    return INT2FIX(p->pend - p->ptr);
}


static VALUE
strscan_unscan(self)
    VALUE self;
{
    struct strscanner *p;

    GET_SCANNER(self, p);
    if (! MATCHED_P(p))
        rb_raise(ScanError, "cannot unscan: not scanned yet");
    p->ptr = p->pre;
    p->pre = 0;
    return Qtrue;
}


static VALUE
strscan_matched(self)
    VALUE self;
{
    struct strscanner *p;

    GET_SCANNER(self, p);
    if (MATCHED_P(p))
        return rb_str_new(p->pre, p->ptr - p->pre);
    else
        return Qnil;
}


static VALUE
strscan_pre_match(self)
    VALUE self;
{
    struct strscanner *p;

    GET_SCANNER(self, p);
    return rb_str_new(p->pbeg, p->pre - p->pbeg);
}


static VALUE
strscan_post_match(self)
    VALUE self;
{
    struct strscanner *p;

    GET_SCANNER(self, p);
    return rb_str_new(p->ptr, p->pend - p->ptr);
}


static VALUE
strscan_aref(self, idx)
    VALUE self, idx;
{
    struct strscanner *p;
    long i;

    GET_SCANNER(self, p);
    if (! MATCHED_P(p))
        return Qnil;
    
    i = NUM2LONG(idx);
    if (i >= p->regi.num_regs)
        return Qnil;
    
    if (p->regi.beg[i] == -1)
        return Qnil;
    
    return rb_str_new(p->ptr + p->regi.beg[i],
                      p->regi.end[i] - p->regi.beg[i]);
}


static VALUE
strscan_matchedsize(self)
    VALUE self;
{
    struct strscanner *p;

    GET_SCANNER(self, p);
    if (p->pre == 0)
        return Qnil;
    else
        return INT2NUM(p->ptr - p->pre);
}


static VALUE
strscan_string(self)
    VALUE self;
{
    struct strscanner *p;

    GET_SCANNER(self, p);
    return rb_str_new(p->pbeg, p->pend - p->pbeg);
}


static VALUE
strscan_peep(self, vlen)
    VALUE self, vlen;
{
    struct strscanner *p;
    long len;

    GET_SCANNER(self, p);

    len = NUM2LONG(vlen);
    if (p->ptr + len > p->pend)
        len = p->pend - p->ptr;

    return rb_str_new(p->ptr, len);
}



void
Init_strscan()
{
    ScanError = rb_define_class("ScanError", rb_eStandardError);
    StringScanner = rb_define_class("StringScanner", rb_cObject);

    rb_define_singleton_method(StringScanner, "new", strscan_s_new, -1);
    rb_define_method(StringScanner, "reset",       strscan_reset,       0);
    rb_define_method(StringScanner, "clear",       strscan_clear,       0);
    rb_define_method(StringScanner, "scan",        strscan_scan,        1);
    rb_define_method(StringScanner, "skip",        strscan_skip,        1);
    rb_define_method(StringScanner, "match?",      strscan_match_p,     1);
    rb_define_method(StringScanner, "check",       strscan_check,       1);
    rb_define_method(StringScanner, "scan_full",   strscan_scan_full,   3);
    rb_define_method(StringScanner, "getch",       strscan_getch,       0);
    rb_define_method(StringScanner, "empty?",      strscan_empty_p,     0);
    rb_define_method(StringScanner, "rest?",       strscan_rest_p,      0);
    rb_define_method(StringScanner, "unscan",      strscan_unscan,      0);
    rb_define_method(StringScanner, "matched?",    strscan_matched_p,   0);
    rb_define_method(StringScanner, "matched",     strscan_matched,     0);
    rb_define_method(StringScanner, "matchedsize", strscan_matchedsize, 0);
    rb_define_method(StringScanner, "pre_match",   strscan_pre_match,   0);
    rb_define_method(StringScanner, "post_match",  strscan_post_match,  0);
    rb_define_method(StringScanner, "rest",        strscan_post_match,  0);
    rb_define_method(StringScanner, "restsize",    strscan_restsize,    0);
    rb_define_method(StringScanner, "[]",          strscan_aref,        1);
    rb_define_method(StringScanner, "string",      strscan_string,      0);
    rb_define_method(StringScanner, "peep",        strscan_peep,        1);
}