marshal.c


DEFINITIONS

This source file includes following functions.
  1. shortlen
  2. w_byte
  3. w_bytes
  4. w_short
  5. w_long
  6. w_float
  7. w_symbol
  8. w_unique
  9. hash_each
  10. obj_each
  11. w_extended
  12. w_class
  13. w_uclass
  14. w_ivar
  15. w_object
  16. dump
  17. dump_ensure
  18. marshal_dump
  19. r_byte
  20. r_short
  21. long_toobig
  22. SIGN_EXTEND_CHAR
  23. SIGN_EXTEND_CHAR
  24. r_long
  25. r_bytes0
  26. r_symlink
  27. r_symreal
  28. r_symbol
  29. r_unique
  30. r_string
  31. r_regist
  32. r_ivar
  33. path2class
  34. path2module
  35. r_object0
  36. r_object
  37. load
  38. load_ensure
  39. marshal_load
  40. Init_marshal
  41. rb_marshal_dump
  42. rb_marshal_load


   1  /**********************************************************************
   2  
   3    marshal.c -
   4  
   5    $Author: matz $
   6    $Date: 2002/09/06 08:59:38 $
   7    created at: Thu Apr 27 16:30:01 JST 1995
   8  
   9    Copyright (C) 1993-2002 Yukihiro Matsumoto
  10  
  11  **********************************************************************/
  12  
  13  #include <math.h>
  14  
  15  #include "ruby.h"
  16  #include "rubyio.h"
  17  #include "st.h"
  18  #include "util.h"
  19  
  20  #define BITSPERSHORT (2*CHAR_BIT)
  21  #define SHORTMASK ((1<<BITSPERSHORT)-1)
  22  #define SHORTDN(x) RSHIFT(x,BITSPERSHORT)
  23  
  24  #if SIZEOF_SHORT == SIZEOF_BDIGITS
  25  #define SHORTLEN(x) (x)
  26  #else
  27  static int
  28  shortlen(len, ds)
  29      long len;
  30      BDIGIT *ds;
  31  {
  32      BDIGIT num;
  33      int offset = 0;
  34  
  35      num = ds[len-1];
  36      while (num) {
  37          num = SHORTDN(num);
  38          offset++;
  39      }
  40      return (len - 1)*sizeof(BDIGIT)/2 + offset;
  41  }
  42  #define SHORTLEN(x) shortlen((x),d)
  43  #endif
  44  
  45  #define MARSHAL_MAJOR   4
  46  #define MARSHAL_MINOR   7
  47  
  48  #define TYPE_NIL        '0'
  49  #define TYPE_TRUE       'T'
  50  #define TYPE_FALSE      'F'
  51  #define TYPE_FIXNUM     'i'
  52  
  53  #define TYPE_EXTENDED   'e'
  54  #define TYPE_UCLASS     'C'
  55  #define TYPE_OBJECT     'o'
  56  #define TYPE_DATA       'd'
  57  #define TYPE_USERDEF    'u'
  58  #define TYPE_USRMARHAL  'U'
  59  #define TYPE_FLOAT      'f'
  60  #define TYPE_BIGNUM     'l'
  61  #define TYPE_STRING     '"'
  62  #define TYPE_REGEXP     '/'
  63  #define TYPE_ARRAY      '['
  64  #define TYPE_HASH       '{'
  65  #define TYPE_HASH_DEF   '}'
  66  #define TYPE_STRUCT     'S'
  67  #define TYPE_MODULE_OLD 'M'
  68  #define TYPE_CLASS      'c'
  69  #define TYPE_MODULE     'm'
  70  
  71  #define TYPE_SYMBOL     ':'
  72  #define TYPE_SYMLINK    ';'
  73  
  74  #define TYPE_IVAR       'I'
  75  #define TYPE_LINK       '@'
  76  
  77  static ID s_dump, s_load;
  78  static ID s_dump_data, s_load_data, s_alloc;
  79  
  80  struct dump_arg {
  81      VALUE obj;
  82      FILE *fp;
  83      VALUE str;
  84      st_table *symbol;
  85      st_table *data;
  86      int taint;
  87  };
  88  
  89  struct dump_call_arg {
  90      VALUE obj;
  91      struct dump_arg *arg;
  92      int limit;
  93  };
  94  
  95  static void w_long _((long, struct dump_arg*));
  96  
  97  static void
  98  w_byte(c, arg)
  99      char c;
 100      struct dump_arg *arg;
 101  {
 102      if (arg->fp) putc(c, arg->fp);
 103      else rb_str_buf_cat(arg->str, &c, 1);
 104  }
 105  
 106  static void
 107  w_bytes(s, n, arg)
 108      char *s;
 109      int n;
 110      struct dump_arg *arg;
 111  {
 112      w_long(n, arg);
 113      if (arg->fp) {
 114          fwrite(s, 1, n, arg->fp);
 115      }
 116      else {
 117          rb_str_buf_cat(arg->str, s, n);
 118      }
 119  }
 120  
 121  static void
 122  w_short(x, arg)
 123      int x;
 124      struct dump_arg *arg;
 125  {
 126      w_byte((x >> 0) & 0xff, arg);
 127      w_byte((x >> 8) & 0xff, arg);
 128  }
 129  
 130  static void
 131  w_long(x, arg)
 132      long x;
 133      struct dump_arg *arg;
 134  {
 135      char buf[sizeof(long)+1];
 136      int i, len = 0;
 137  
 138  #if SIZEOF_LONG > 4
 139      if (!(RSHIFT(x, 31) == 0 || RSHIFT(x, 31) == -1)) {
 140          /* big long does not fit in 4 bytes */
 141          rb_raise(rb_eTypeError, "long too big to dump");
 142      }
 143  #endif
 144  
 145      if (x == 0) {
 146          w_byte(0, arg);
 147          return;
 148      }
 149      if (0 < x && x < 123) {
 150          w_byte(x + 5, arg);
 151          return;
 152      }
 153      if (-124 < x && x < 0) {
 154          w_byte((x - 5)&0xff, arg);
 155          return;
 156      }
 157      for (i=1;i<sizeof(long)+1;i++) {
 158          buf[i] = x & 0xff;
 159          x = RSHIFT(x,8);
 160          if (x == 0) {
 161              buf[0] = i;
 162              break;
 163          }
 164          if (x == -1) {
 165              buf[0] = -i;
 166              break;
 167          }
 168      }
 169      len = i;
 170      for (i=0;i<=len;i++) {
 171          w_byte(buf[i], arg);
 172      }
 173  }
 174  
 175  static void
 176  w_float(d, arg)
 177      double d;
 178      struct dump_arg *arg;
 179  {
 180      char buf[100];
 181  
 182      if (isinf(d)) {
 183          if (d < 0) strcpy(buf, "-inf");
 184          else       strcpy(buf, "inf");
 185      }
 186      else if (isnan(d)) {
 187          strcpy(buf, "nan");
 188      }
 189      else if (d == 0.0) {
 190          if (1.0/d < 0) strcpy(buf, "-0");
 191          else           strcpy(buf, "0");
 192      }
 193      else {
 194          /* xxx: should not use system's sprintf(3) */
 195          sprintf(buf, "%.16g", d);
 196      }
 197      w_bytes(buf, strlen(buf), arg);
 198  }
 199  
 200  static void
 201  w_symbol(id, arg)
 202      ID id;
 203      struct dump_arg *arg;
 204  {
 205      char *sym = rb_id2name(id);
 206      long num;
 207  
 208      if (st_lookup(arg->symbol, id, &num)) {
 209          w_byte(TYPE_SYMLINK, arg);
 210          w_long(num, arg);
 211      }
 212      else {
 213          w_byte(TYPE_SYMBOL, arg);
 214          w_bytes(sym, strlen(sym), arg);
 215          st_add_direct(arg->symbol, id, arg->symbol->num_entries);
 216      }
 217  }
 218  
 219  static void
 220  w_unique(s, arg)
 221      char *s;
 222      struct dump_arg *arg;
 223  {
 224      if (s[0] == '#') {
 225          rb_raise(rb_eArgError, "can't dump anonymous class %s", s);
 226      }
 227      w_symbol(rb_intern(s), arg);
 228  }
 229  
 230  static void w_object _((VALUE,struct dump_arg*,int));
 231  
 232  static int
 233  hash_each(key, value, arg)
 234      VALUE key, value;
 235      struct dump_call_arg *arg;
 236  {
 237      w_object(key, arg->arg, arg->limit);
 238      w_object(value, arg->arg, arg->limit);
 239      return ST_CONTINUE;
 240  }
 241  
 242  static int
 243  obj_each(id, value, arg)
 244      ID id;
 245      VALUE value;
 246      struct dump_call_arg *arg;
 247  {
 248      w_symbol(id, arg->arg);
 249      w_object(value, arg->arg, arg->limit);
 250      return ST_CONTINUE;
 251  }
 252  
 253  static void
 254  w_extended(klass, arg)
 255      VALUE klass;
 256      struct dump_arg *arg;
 257  {
 258      char *path;
 259  
 260      if (FL_TEST(klass, FL_SINGLETON)) {
 261          if (RCLASS(klass)->m_tbl->num_entries ||
 262              (RCLASS(klass)->iv_tbl && RCLASS(klass)->iv_tbl->num_entries > 1)) {
 263              rb_raise(rb_eTypeError, "singleton can't be dumped");
 264          }
 265          klass = RCLASS(klass)->super;
 266      }
 267      while (BUILTIN_TYPE(klass) == T_ICLASS) {
 268          path = rb_class2name(RBASIC(klass)->klass);
 269          w_byte(TYPE_EXTENDED, arg);
 270          w_unique(path, arg);
 271          klass = RCLASS(klass)->super;
 272      }
 273  }
 274  
 275  static void
 276  w_class(type, obj, arg)
 277      int type;
 278      VALUE obj;
 279      struct dump_arg *arg;
 280  {
 281      char *path;
 282  
 283      VALUE klass = CLASS_OF(obj);
 284      w_extended(klass, arg);
 285      w_byte(type, arg);
 286      path = rb_class2name(klass);
 287      w_unique(path, arg);
 288  }
 289  
 290  static void
 291  w_uclass(obj, base_klass, arg)
 292      VALUE obj, base_klass;
 293      struct dump_arg *arg;
 294  {
 295      VALUE klass = CLASS_OF(obj);
 296      char *path;
 297  
 298      w_extended(klass, arg);
 299      if (klass != base_klass) {
 300          w_byte(TYPE_UCLASS, arg);
 301          w_unique(rb_class2name(CLASS_OF(obj)), arg);
 302      }
 303  }
 304  
 305  static void
 306  w_ivar(tbl, arg)
 307      st_table *tbl;
 308      struct dump_call_arg *arg;
 309  {
 310      if (tbl) {
 311          w_long(tbl->num_entries, arg->arg);
 312          st_foreach(tbl, obj_each, arg);
 313      }
 314      else {
 315          w_long(0, arg->arg);
 316      }
 317  }
 318  
 319  static void
 320  w_object(obj, arg, limit)
 321      VALUE obj;
 322      struct dump_arg *arg;
 323      int limit;
 324  {
 325      struct dump_call_arg c_arg;
 326      st_table *ivtbl = 0;
 327  
 328      if (limit == 0) {
 329          rb_raise(rb_eArgError, "exceed depth limit");
 330      }
 331      if (obj == Qnil) {
 332          w_byte(TYPE_NIL, arg);
 333      }
 334      else if (obj == Qtrue) {
 335          w_byte(TYPE_TRUE, arg);
 336      }
 337      else if (obj == Qfalse) {
 338          w_byte(TYPE_FALSE, arg);
 339      }
 340      else if (FIXNUM_P(obj)) {
 341  #if SIZEOF_LONG <= 4
 342          w_byte(TYPE_FIXNUM, arg);
 343          w_long(FIX2INT(obj), arg);
 344  #else
 345          if (RSHIFT((long)obj, 31) == 0 || RSHIFT((long)obj, 31) == -1) {
 346              w_byte(TYPE_FIXNUM, arg);
 347              w_long(FIX2LONG(obj), arg);
 348          }
 349          else {
 350              w_object(rb_int2big(FIX2LONG(obj)), arg, limit);
 351              return;
 352          }
 353  #endif
 354      }
 355      else if (SYMBOL_P(obj)) {
 356          w_symbol(SYM2ID(obj), arg);
 357          return;
 358      }
 359      else {
 360          long num;
 361  
 362          limit--;
 363          c_arg.limit = limit;
 364          c_arg.arg = arg;
 365  
 366          if (st_lookup(arg->data, obj, &num)) {
 367              w_byte(TYPE_LINK, arg);
 368              w_long(num, arg);
 369              return;
 370          }
 371  
 372          if (OBJ_TAINTED(obj)) arg->taint = Qtrue;
 373  
 374          st_add_direct(arg->data, obj, arg->data->num_entries);
 375          if (rb_respond_to(obj, s_dump)) {
 376              VALUE v;
 377  
 378              w_byte(TYPE_USERDEF, arg);
 379              w_unique(rb_class2name(CLASS_OF(obj)), arg);
 380              v = rb_funcall(obj, s_dump, 1, INT2NUM(limit));
 381              if (TYPE(v) != T_STRING) {
 382                  rb_raise(rb_eTypeError, "_dump() must return String");
 383              }
 384              w_bytes(RSTRING(v)->ptr, RSTRING(v)->len, arg);
 385              return;
 386          }
 387  
 388          if (ivtbl = rb_generic_ivar_table(obj)) {
 389              w_byte(TYPE_IVAR, arg);
 390          }
 391  
 392          switch (BUILTIN_TYPE(obj)) {
 393            case T_CLASS:
 394              if (FL_TEST(obj, FL_SINGLETON)) {
 395                  rb_raise(rb_eTypeError, "singleton class can't be dumped");
 396              }
 397              w_byte(TYPE_CLASS, arg);
 398              {
 399                  VALUE path = rb_class_path(obj);
 400                  if (RSTRING(path)->ptr[0] == '#') {
 401                      rb_raise(rb_eArgError, "can't dump anonymous class %s",
 402                               RSTRING(path)->ptr);
 403                  }
 404                  w_bytes(RSTRING(path)->ptr, RSTRING(path)->len, arg);
 405              }
 406              break;
 407  
 408            case T_MODULE:
 409              w_byte(TYPE_MODULE, arg);
 410              {
 411                  VALUE path = rb_class_path(obj);
 412                  if (RSTRING(path)->ptr[0] == '#') {
 413                      rb_raise(rb_eArgError, "can't dump anonymous module %s",
 414                               RSTRING(path)->ptr);
 415                  }
 416                  w_bytes(RSTRING(path)->ptr, RSTRING(path)->len, arg);
 417              }
 418              break;
 419  
 420            case T_FLOAT:
 421              w_byte(TYPE_FLOAT, arg);
 422              w_float(RFLOAT(obj)->value, arg);
 423              break;
 424  
 425            case T_BIGNUM:
 426              w_byte(TYPE_BIGNUM, arg);
 427              {
 428                  char sign = RBIGNUM(obj)->sign ? '+' : '-';
 429                  long len = RBIGNUM(obj)->len;
 430                  BDIGIT *d = RBIGNUM(obj)->digits;
 431  
 432                  w_byte(sign, arg);
 433                  w_long(SHORTLEN(len), arg); /* w_short? */
 434                  while (len--) {
 435  #if SIZEOF_BDIGITS > SIZEOF_SHORT
 436                      BDIGIT num = *d;
 437                      int i;
 438  
 439                      for (i=0; i<SIZEOF_BDIGITS; i+=SIZEOF_SHORT) {
 440                          w_short(num & SHORTMASK, arg);
 441                          num = SHORTDN(num);
 442                          if (len == 0 && num == 0) break;
 443                      }
 444  #else
 445                      w_short(*d, arg);
 446  #endif
 447                      d++;
 448                  }
 449              }
 450              break;
 451  
 452            case T_STRING:
 453              w_uclass(obj, rb_cString, arg);
 454              w_byte(TYPE_STRING, arg);
 455              w_bytes(RSTRING(obj)->ptr, RSTRING(obj)->len, arg);
 456              break;
 457  
 458            case T_REGEXP:
 459              w_uclass(obj, rb_cRegexp, arg);
 460              w_byte(TYPE_REGEXP, arg);
 461              w_bytes(RREGEXP(obj)->str, RREGEXP(obj)->len, arg);
 462              w_byte(rb_reg_options(obj), arg);
 463              break;
 464  
 465            case T_ARRAY:
 466              w_uclass(obj, rb_cArray, arg);
 467              w_byte(TYPE_ARRAY, arg);
 468              {
 469                  long len = RARRAY(obj)->len;
 470                  VALUE *ptr = RARRAY(obj)->ptr;
 471  
 472                  w_long(len, arg);
 473                  while (len--) {
 474                      w_object(*ptr, arg, limit);
 475                      ptr++;
 476                  }
 477              }
 478              break;
 479  
 480            case T_HASH:
 481              w_uclass(obj, rb_cHash, arg);
 482              if (NIL_P(RHASH(obj)->ifnone)) {
 483                  w_byte(TYPE_HASH, arg);
 484              }
 485              else if (FL_TEST(obj, FL_USER2)) {
 486                  /* FL_USER2 means HASH_PROC_DEFAULT (see hash.c) */
 487                  rb_raise(rb_eArgError, "cannot dump hash with default proc");
 488              }
 489              else {
 490                  w_byte(TYPE_HASH_DEF, arg);
 491              }
 492              w_long(RHASH(obj)->tbl->num_entries, arg);
 493              st_foreach(RHASH(obj)->tbl, hash_each, &c_arg);
 494              if (!NIL_P(RHASH(obj)->ifnone)) {
 495                  w_object(RHASH(obj)->ifnone, arg, limit);
 496              }
 497              break;
 498  
 499            case T_STRUCT:
 500              w_byte(TYPE_STRUCT, arg);
 501              {
 502                  long len = RSTRUCT(obj)->len;
 503                  VALUE mem;
 504                  long i;
 505  
 506                  w_unique(rb_class2name(CLASS_OF(obj)), arg);
 507                  w_long(len, arg);
 508                  mem = rb_struct_iv_get(rb_obj_class(obj), "__member__");
 509                  if (mem == Qnil) {
 510                      rb_raise(rb_eTypeError, "uninitialized struct");
 511                  }
 512                  for (i=0; i<len; i++) {
 513                      w_symbol(SYM2ID(RARRAY(mem)->ptr[i]), arg);
 514                      w_object(RSTRUCT(obj)->ptr[i], arg, limit);
 515                  }
 516              }
 517              break;
 518  
 519            case T_OBJECT:
 520              w_class(TYPE_OBJECT, obj, arg);
 521              w_ivar(ROBJECT(obj)->iv_tbl, &c_arg);
 522              break;
 523  
 524           case T_DATA:
 525             {
 526                 VALUE v;
 527  
 528                 w_class(TYPE_DATA, obj, arg);
 529                 if (!rb_respond_to(obj, s_dump_data)) {
 530                     rb_raise(rb_eTypeError,
 531                              "class %s needs to have instance method `_dump_data'",
 532                              rb_class2name(CLASS_OF(obj)));
 533                 }
 534                 v = rb_funcall(obj, s_dump_data, 0);
 535                 w_object(v, arg, limit);
 536             }
 537             break;
 538  
 539            default:
 540              rb_raise(rb_eTypeError, "can't dump %s",
 541                       rb_class2name(CLASS_OF(obj)));
 542              break;
 543          }
 544      }
 545      if (ivtbl) {
 546          w_ivar(ivtbl, &c_arg);
 547      }
 548  }
 549  
 550  static VALUE
 551  dump(arg)
 552      struct dump_call_arg *arg;
 553  {
 554      w_object(arg->obj, arg->arg, arg->limit);
 555      return 0;
 556  }
 557  
 558  static VALUE
 559  dump_ensure(arg)
 560      struct dump_arg *arg;
 561  {
 562      st_free_table(arg->symbol);
 563      st_free_table(arg->data);
 564      if (!arg->fp && arg->taint) {
 565          OBJ_TAINT(arg->str);
 566      }
 567      return 0;
 568  }
 569  
 570  static VALUE
 571  marshal_dump(argc, argv)
 572      int argc;
 573      VALUE* argv;
 574  {
 575      VALUE obj, port, a1, a2;
 576      int limit = -1;
 577      struct dump_arg arg;
 578      struct dump_call_arg c_arg;
 579  
 580      port = 0;
 581      rb_scan_args(argc, argv, "12", &obj, &a1, &a2);
 582      if (argc == 3) {
 583          if (!NIL_P(a2)) limit = NUM2INT(a2);
 584          port = a1;
 585      }
 586      else if (argc == 2) {
 587          if (FIXNUM_P(a1)) limit = FIX2INT(a1);
 588          else port = a1;
 589      }
 590      if (port) {
 591          if (rb_obj_is_kind_of(port, rb_cIO)) {
 592              OpenFile *fptr;
 593  
 594              rb_io_binmode(port);
 595              GetOpenFile(port, fptr);
 596              rb_io_check_writable(fptr);
 597              arg.fp = (fptr->f2) ? fptr->f2 : fptr->f;
 598          }
 599          else {
 600              rb_raise(rb_eTypeError, "instance of IO needed");
 601          }
 602      }
 603      else {
 604          arg.fp = 0;
 605          port = rb_str_buf_new(0);
 606          arg.str = port;
 607      }
 608  
 609      arg.symbol = st_init_numtable();
 610      arg.data   = st_init_numtable();
 611      arg.taint  = Qfalse;
 612      c_arg.obj = obj;
 613      c_arg.arg = &arg;
 614      c_arg.limit = limit;
 615  
 616      w_byte(MARSHAL_MAJOR, &arg);
 617      w_byte(MARSHAL_MINOR, &arg);
 618  
 619      rb_ensure(dump, (VALUE)&c_arg, dump_ensure, (VALUE)&arg);
 620  
 621      return port;
 622  }
 623  
 624  struct load_arg {
 625      FILE *fp;
 626      char *ptr, *end;
 627      st_table *symbol;
 628      VALUE data;
 629      VALUE proc;
 630      int taint;
 631  };
 632  
 633  static VALUE r_object _((struct load_arg *arg));
 634  
 635  static int
 636  r_byte(arg)
 637      struct load_arg *arg;
 638  {
 639      int c;
 640  
 641      if (arg->fp) {
 642          c = rb_getc(arg->fp);
 643          if (c == EOF) rb_eof_error();
 644      }
 645      else if (arg->ptr < arg->end) {
 646          c = *(unsigned char*)arg->ptr++;
 647      }
 648      else {
 649          rb_raise(rb_eArgError, "marshal data too short");
 650      }
 651      return c;
 652  }
 653  
 654  static unsigned short
 655  r_short(arg)
 656      struct load_arg *arg;
 657  {
 658      unsigned short x;
 659  
 660      x =  r_byte(arg);
 661      x |= r_byte(arg)<<8;
 662  
 663      return x;
 664  }
 665  
 666  static void
 667  long_toobig(size)
 668      int size;
 669  {
 670      rb_raise(rb_eTypeError, "long too big for this architecture (size %d, given %d)",
 671               sizeof(long), size);
 672  }
 673  
 674  #undef SIGN_EXTEND_CHAR
 675  #if __STDC__
 676  # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
 677  #else  /* not __STDC__ */
 678  /* As in Harbison and Steele.  */
 679  # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
 680  #endif
 681  
 682  static long
 683  r_long(arg)
 684      struct load_arg *arg;
 685  {
 686      register long x;
 687      int c = SIGN_EXTEND_CHAR(r_byte(arg));
 688      long i;
 689  
 690      if (c == 0) return 0;
 691      if (c > 0) {
 692          if (4 < c && c < 128) {
 693              return c - 5;
 694          }
 695          if (c > sizeof(long)) long_toobig(c);
 696          x = 0;
 697          for (i=0;i<c;i++) {
 698              x |= (long)r_byte(arg) << (8*i);
 699          }
 700      }
 701      else {
 702          if (-129 < c && c < -4) {
 703              return c + 5;
 704          }
 705          c = -c;
 706          if (c > sizeof(long)) long_toobig(c);
 707          x = -1;
 708          for (i=0;i<c;i++) {
 709              x &= ~((long)0xff << (8*i));
 710              x |= (long)r_byte(arg) << (8*i);
 711          }
 712      }
 713      return x;
 714  }
 715  
 716  #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
 717  
 718  static VALUE
 719  r_bytes0(len, arg)
 720      long len;
 721      struct load_arg *arg;
 722  {
 723      VALUE str;
 724  
 725      if (arg->fp) {
 726          str = rb_str_new(0, len);
 727          if (rb_io_fread(RSTRING(str)->ptr, len, arg->fp) != len) {
 728            too_short:
 729              rb_raise(rb_eArgError, "marshal data too short");
 730          }
 731      }
 732      else {
 733          if (arg->ptr + len > arg->end) {
 734              goto too_short;
 735          }
 736          str = rb_str_new(arg->ptr, len);
 737          arg->ptr += len;
 738      }
 739      return str;
 740  }
 741  
 742  static ID
 743  r_symlink(arg)
 744      struct load_arg *arg;
 745  {
 746      ID id;
 747      long num = r_long(arg);
 748  
 749      if (st_lookup(arg->symbol, num, &id)) {
 750          return id;
 751      }
 752      rb_raise(rb_eTypeError, "bad symbol");
 753  }
 754  
 755  static ID
 756  r_symreal(arg)
 757      struct load_arg *arg;
 758  {
 759      ID id;
 760  
 761      id = rb_intern(RSTRING(r_bytes(arg))->ptr);
 762      st_insert(arg->symbol, arg->symbol->num_entries, id);
 763  
 764      return id;
 765  }
 766  
 767  static ID
 768  r_symbol(arg)
 769      struct load_arg *arg;
 770  {
 771      if (r_byte(arg) == TYPE_SYMLINK) {
 772          return r_symlink(arg);
 773      }
 774      return r_symreal(arg);
 775  }
 776  
 777  static char*
 778  r_unique(arg)
 779      struct load_arg *arg;
 780  {
 781      return rb_id2name(r_symbol(arg));
 782  }
 783  
 784  static VALUE
 785  r_string(arg)
 786      struct load_arg *arg;
 787  {
 788      return r_bytes(arg);
 789  }
 790  
 791  static VALUE
 792  r_regist(v, arg)
 793      VALUE v;
 794      struct load_arg *arg;
 795  {
 796      rb_hash_aset(arg->data, INT2FIX(RHASH(arg->data)->tbl->num_entries), v);
 797      if (arg->taint) OBJ_TAINT(v);
 798      return v;
 799  }
 800  
 801  static void
 802  r_ivar(obj, arg)
 803      VALUE obj;
 804      struct load_arg *arg;
 805  {
 806      long len;
 807  
 808      len = r_long(arg);
 809      if (len > 0) {
 810          while (len--) {
 811              ID id = r_symbol(arg);
 812              VALUE val = r_object(arg);
 813              rb_ivar_set(obj, id, val);
 814          }
 815      }
 816  }
 817  
 818  static VALUE
 819  path2class(path)
 820      char *path;
 821  {
 822      VALUE v = rb_path2class(path);
 823  
 824      if (TYPE(v) != T_CLASS) {
 825          rb_raise(rb_eTypeError, "%s does not refer class", path);
 826      }
 827      return v;
 828  }
 829  
 830  static VALUE
 831  path2module(path)
 832      char *path;
 833  {
 834      VALUE v = rb_path2class(path);
 835  
 836      if (TYPE(v) != T_MODULE) {
 837          rb_raise(rb_eTypeError, "%s does not refer module", path);
 838      }
 839      return v;
 840  }
 841  
 842  static VALUE
 843  r_object0(arg, proc)
 844      struct load_arg *arg;
 845      VALUE proc;
 846  {
 847      VALUE v = Qnil;
 848      int type = r_byte(arg);
 849      long id;
 850  
 851      switch (type) {
 852        case TYPE_LINK:
 853          id = r_long(arg);
 854          v = rb_hash_aref(arg->data, LONG2FIX(id));
 855          if (NIL_P(v)) {
 856              rb_raise(rb_eArgError, "dump format error (unlinked)");
 857          }
 858          return v;
 859  
 860        case TYPE_IVAR:
 861          v = r_object0(arg, 0);
 862          r_ivar(v, arg);
 863          break;
 864  
 865        case TYPE_EXTENDED:
 866          {
 867              VALUE m = path2module(r_unique(arg));
 868  
 869              v = r_object0(arg, 0);
 870              rb_extend_object(v, m);
 871          }
 872          break;
 873  
 874        case TYPE_UCLASS:
 875          {
 876              VALUE c = path2class(r_unique(arg));
 877  
 878              v = r_object0(arg, 0);
 879              if (rb_special_const_p(v) || TYPE(v) == T_OBJECT || TYPE(v) == T_CLASS) {
 880                format_error:
 881                  rb_raise(rb_eArgError, "dump format error (user class)");
 882              }
 883              if (TYPE(v) == T_MODULE || !RTEST(rb_funcall(c, '<', 1, RBASIC(v)->klass))) {
 884                  VALUE tmp = rb_obj_alloc(c);
 885  
 886                  if (TYPE(v) != TYPE(tmp)) goto format_error;
 887              }
 888              RBASIC(v)->klass = c;
 889          }
 890          break;
 891  
 892        case TYPE_NIL:
 893          v = Qnil;
 894          break;
 895  
 896        case TYPE_TRUE:
 897          v = Qtrue;
 898          break;
 899  
 900        case TYPE_FALSE:
 901          v = Qfalse;
 902          break;
 903  
 904        case TYPE_FIXNUM:
 905          {
 906              long i = r_long(arg);
 907              v = LONG2FIX(i);
 908          }
 909          break;
 910  
 911        case TYPE_FLOAT:
 912          {
 913              double d, t = 0.0;
 914              VALUE str = r_bytes(arg);
 915  
 916              if (strcmp(RSTRING(str)->ptr, "nan") == 0) {
 917                  d = t / t;
 918              }
 919              else if (strcmp(RSTRING(str)->ptr, "inf") == 0) {
 920                  d = 1.0 / t;
 921              }
 922              else if (strcmp(RSTRING(str)->ptr, "-inf") == 0) {
 923                  d = -1.0 / t;
 924              }
 925              else {
 926                  d = strtod(RSTRING(str)->ptr, 0);
 927              }
 928              v = rb_float_new(d);
 929              r_regist(v, arg);
 930          }
 931          break;
 932  
 933        case TYPE_BIGNUM:
 934          {
 935              long len;
 936              BDIGIT *digits;
 937  
 938              NEWOBJ(big, struct RBignum);
 939              OBJSETUP(big, rb_cBignum, T_BIGNUM);
 940              big->sign = (r_byte(arg) == '+');
 941              len = r_long(arg);
 942  #if SIZEOF_BDIGITS == SIZEOF_SHORT
 943              big->len = len;
 944  #else
 945              big->len = (len + 1) * 2 / sizeof(BDIGIT);
 946  #endif
 947              big->digits = digits = ALLOC_N(BDIGIT, big->len);
 948              while (len > 0) {
 949  #if SIZEOF_BDIGITS > SIZEOF_SHORT
 950                  BDIGIT num = 0;
 951                  int shift = 0;
 952                  int i;
 953  
 954                  for (i=0; i<SIZEOF_BDIGITS; i+=2) {
 955                      int j = r_short(arg);
 956                      num |= j << shift;
 957                      shift += BITSPERSHORT;
 958                      if (--len == 0) break;
 959                  }
 960                  *digits++ = num;
 961  #else
 962                  *digits++ = r_short(arg);
 963                  len--;
 964  #endif
 965              }
 966              v = rb_big_norm((VALUE)big);
 967              r_regist(v, arg);
 968          }
 969          break;
 970  
 971        case TYPE_STRING:
 972          v = r_regist(r_string(arg), arg);
 973          break;
 974  
 975        case TYPE_REGEXP:
 976          {
 977              volatile VALUE str = r_bytes(arg);
 978              int options = r_byte(arg);
 979              v = r_regist(rb_reg_new(RSTRING(str)->ptr, RSTRING(str)->len, options), arg);
 980          }
 981          break;
 982  
 983        case TYPE_ARRAY:
 984          {
 985              volatile long len = r_long(arg); /* gcc 2.7.2.3 -O2 bug?? */
 986  
 987              v = rb_ary_new2(len);
 988              r_regist(v, arg);
 989              while (len--) {
 990                  rb_ary_push(v, r_object(arg));
 991              }
 992          }
 993          break;
 994  
 995        case TYPE_HASH:
 996        case TYPE_HASH_DEF:
 997          {
 998              long len = r_long(arg);
 999  
1000              v = rb_hash_new();
1001              r_regist(v, arg);
1002              while (len--) {
1003                  VALUE key = r_object(arg);
1004                  VALUE value = r_object(arg);
1005                  rb_hash_aset(v, key, value);
1006              }
1007              if (type == TYPE_HASH_DEF) {
1008                  RHASH(v)->ifnone = r_object(arg);
1009              }
1010          }
1011          break;
1012  
1013        case TYPE_STRUCT:
1014          {
1015              VALUE klass, mem, values;
1016              volatile long i;    /* gcc 2.7.2.3 -O2 bug?? */
1017              long len;
1018              ID slot;
1019  
1020              klass = path2class(r_unique(arg));
1021              mem = rb_struct_iv_get(klass, "__member__");
1022              if (mem == Qnil) {
1023                  rb_raise(rb_eTypeError, "uninitialized struct");
1024              }
1025              len = r_long(arg);
1026  
1027              values = rb_ary_new2(len);
1028              for (i=0; i<len; i++) {
1029                  rb_ary_push(values, Qnil);
1030              }
1031              v = rb_struct_alloc(klass, values);
1032              r_regist(v, arg);
1033              for (i=0; i<len; i++) {
1034                  slot = r_symbol(arg);
1035  
1036                  if (RARRAY(mem)->ptr[i] != ID2SYM(slot)) {
1037                      rb_raise(rb_eTypeError, "struct %s not compatible (:%s for :%s)",
1038                               rb_class2name(klass),
1039                               rb_id2name(slot),
1040                               rb_id2name(SYM2ID(RARRAY(mem)->ptr[i])));
1041                  }
1042                  rb_struct_aset(v, LONG2FIX(i), r_object(arg));
1043              }
1044          }
1045          break;
1046  
1047        case TYPE_USERDEF:
1048          {
1049              VALUE klass = path2class(r_unique(arg));
1050  
1051              if (!rb_respond_to(klass, s_load)) {
1052                  rb_raise(rb_eTypeError, "class %s needs to have method `_load'",
1053                           rb_class2name(klass));
1054              }
1055              v = rb_funcall(klass, s_load, 1, r_string(arg));
1056              r_regist(v, arg);
1057          }
1058          break;
1059  
1060        case TYPE_OBJECT:
1061          {
1062              VALUE klass = path2class(r_unique(arg));
1063  
1064              v = rb_obj_alloc(klass);
1065              if (TYPE(v) != T_OBJECT) {
1066                  rb_raise(rb_eArgError, "dump format error");
1067              }
1068              r_regist(v, arg);
1069              r_ivar(v, arg);
1070          }
1071          break;
1072  
1073        case TYPE_DATA:
1074         {
1075             VALUE klass = path2class(r_unique(arg));
1076             if (rb_respond_to(klass, s_alloc)) {
1077                 static int warn = Qtrue;
1078                 if (warn) {
1079                     rb_warn("define `allocate' instead of `_alloc'");
1080                     warn = Qfalse;
1081                 }
1082                 v = rb_funcall(klass, s_alloc, 0);
1083             }
1084             else {
1085                 v = rb_obj_alloc(klass);
1086             }
1087             if (TYPE(v) != T_DATA) {
1088                 rb_raise(rb_eArgError, "dump format error");
1089             }
1090             r_regist(v, arg);
1091             if (!rb_respond_to(v, s_load_data)) {
1092                 rb_raise(rb_eTypeError,
1093                          "class %s needs to have instance method `_load_data'",
1094                          rb_class2name(klass));
1095             }
1096             rb_funcall(v, s_load_data, 1, r_object0(arg, 0));
1097         }
1098         break;
1099  
1100        case TYPE_MODULE_OLD:
1101          {
1102              VALUE str = r_bytes(arg);
1103  
1104              v = path2module(RSTRING(str)->ptr);
1105              r_regist(v, arg);
1106          }
1107          break;
1108  
1109        case TYPE_CLASS:
1110          {
1111              VALUE str = r_bytes(arg);
1112  
1113              v = path2class(RSTRING(str)->ptr);
1114              r_regist(v, arg);
1115          }
1116          break;
1117  
1118        case TYPE_MODULE:
1119          {
1120              VALUE str = r_bytes(arg);
1121  
1122              v = path2module(RSTRING(str)->ptr);
1123              r_regist(v, arg);
1124          }
1125          break;
1126  
1127        case TYPE_SYMBOL:
1128          v = ID2SYM(r_symreal(arg));
1129          break;
1130  
1131        case TYPE_SYMLINK:
1132          return ID2SYM(r_symlink(arg));
1133  
1134        default:
1135          rb_raise(rb_eArgError, "dump format error(0x%x)", type);
1136          break;
1137      }
1138      if (proc) {
1139          rb_funcall(proc, rb_intern("yield"), 1, v);
1140      }
1141      return v;
1142  }
1143  
1144  static VALUE
1145  r_object(arg)
1146      struct load_arg *arg;
1147  {
1148      return r_object0(arg, arg->proc);
1149  }
1150  
1151  static VALUE
1152  load(arg)
1153      struct load_arg *arg;
1154  {
1155      return r_object(arg);
1156  }
1157  
1158  static VALUE
1159  load_ensure(arg)
1160      struct load_arg *arg;
1161  {
1162      st_free_table(arg->symbol);
1163      return 0;
1164  }
1165  
1166  static VALUE
1167  marshal_load(argc, argv)
1168      int argc;
1169      VALUE *argv;
1170  {
1171      VALUE port, proc;
1172      int major, minor;
1173      VALUE v;
1174      OpenFile *fptr;
1175      struct load_arg arg;
1176      volatile VALUE hash;        /* protect from GC */
1177  
1178      rb_scan_args(argc, argv, "11", &port, &proc);
1179      if (rb_obj_is_kind_of(port, rb_cIO)) {
1180          rb_io_binmode(port);
1181          GetOpenFile(port, fptr);
1182          rb_io_check_readable(fptr);
1183          arg.fp = fptr->f;
1184          arg.taint = Qtrue;
1185      }
1186      else if (rb_respond_to(port, rb_intern("to_str"))) {
1187          arg.taint = OBJ_TAINTED(port); /* original taintedness */
1188          StringValue(port);             /* possible conversion */
1189          arg.fp = 0;
1190          arg.ptr = RSTRING(port)->ptr;
1191          arg.end = arg.ptr + RSTRING(port)->len;
1192      }
1193      else {
1194          rb_raise(rb_eTypeError, "instance of IO needed");
1195      }
1196  
1197      major = r_byte(&arg);
1198      minor = r_byte(&arg);
1199      if (major != MARSHAL_MAJOR || minor > MARSHAL_MINOR) {
1200          rb_raise(rb_eTypeError, "incompatible marshal file format (can't be read)\n\
1201  \tformat version %d.%d required; %d.%d given",
1202                   MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
1203      }
1204      if (RTEST(ruby_verbose) && minor != MARSHAL_MINOR) {
1205          rb_warn("incompatible marshal file format (can be read)\n\
1206  \tformat version %d.%d required; %d.%d given",
1207                  MARSHAL_MAJOR, MARSHAL_MINOR, major, minor);
1208      }
1209  
1210      arg.symbol = st_init_numtable();
1211      arg.data   = hash = rb_hash_new();
1212      if (NIL_P(proc)) arg.proc = 0;
1213      else             arg.proc = proc;
1214      v = rb_ensure(load, (VALUE)&arg, load_ensure, (VALUE)&arg);
1215  
1216      return v;
1217  }
1218  
1219  void
1220  Init_marshal()
1221  {
1222      VALUE rb_mMarshal = rb_define_module("Marshal");
1223  
1224      s_dump = rb_intern("_dump");
1225      s_load = rb_intern("_load");
1226      s_dump_data = rb_intern("_dump_data");
1227      s_load_data = rb_intern("_load_data");
1228      s_alloc = rb_intern("_alloc");
1229      rb_define_module_function(rb_mMarshal, "dump", marshal_dump, -1);
1230      rb_define_module_function(rb_mMarshal, "load", marshal_load, -1);
1231      rb_define_module_function(rb_mMarshal, "restore", marshal_load, -1);
1232  
1233      rb_define_const(rb_mMarshal, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR));
1234      rb_define_const(rb_mMarshal, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR));
1235  }
1236  
1237  VALUE
1238  rb_marshal_dump(obj, port)
1239      VALUE obj, port;
1240  {
1241      int argc = 1;
1242      VALUE argv[2];
1243  
1244      argv[0] = obj;
1245      argv[1] = port;
1246      if (!NIL_P(port)) argc = 2;
1247      return marshal_dump(argc, argv);
1248  }
1249  
1250  VALUE
1251  rb_marshal_load(port)
1252      VALUE port;
1253  {
1254      return marshal_load(1, &port);
1255  }