sprintf.c


DEFINITIONS

This source file includes following functions.
  1. remove_sign_bits
  2. rb_f_sprintf
  3. fmt_setup


   1  /**********************************************************************
   2  
   3    sprintf.c -
   4  
   5    $Author: michal $
   6    $Date: 2002/08/28 08:05:23 $
   7    created at: Fri Oct 15 10:39:26 JST 1993
   8  
   9    Copyright (C) 1993-2002 Yukihiro Matsumoto
  10    Copyright (C) 2000  Network Applied Communication Laboratory, Inc.
  11    Copyright (C) 2000  Information-technology Promotion Agency, Japan
  12  
  13  **********************************************************************/
  14  
  15  #include "ruby.h"
  16  #include <ctype.h>
  17  #include <math.h>
  18  
  19  #define BIT_DIGITS(N)   (((N)*146)/485 + 1)  /* log2(10) =~ 146/485 */
  20  
  21  static void fmt_setup _((char*,int,int,int,int));
  22  
  23  static char*
  24  remove_sign_bits(str, base)
  25      char *str;
  26      int base;
  27  {
  28      char *s, *t, *end;
  29      unsigned long len;
  30      
  31      s = t = str;
  32      len = strlen(str);
  33      end = str + len;
  34  
  35      if (base == 16) {
  36          while (t<end && *t == 'f') {
  37              t++;
  38          }
  39      }
  40      else if (base == 8) {
  41          while (t<end && *t == '7') {
  42              t++;
  43          }
  44      }
  45      else if (base == 2) {
  46          while (t<end && *t == '1') {
  47              t++;
  48          }
  49      }
  50      while (*t) *s++ = *t++;
  51      *s = '\0';
  52  
  53      return str;
  54  }
  55  
  56  #define FNONE  0
  57  #define FSHARP 1
  58  #define FMINUS 2
  59  #define FPLUS  4
  60  #define FZERO  8
  61  #define FSPACE 16
  62  #define FWIDTH 32
  63  #define FPREC  64
  64  
  65  #define CHECK(l) \
  66      while (blen + (l) >= bsiz) {\
  67          REALLOC_N(buf, char, bsiz*2);\
  68          bsiz*=2;\
  69      }
  70  
  71  #define PUSH(s, l) do { \
  72      CHECK(l);\
  73      memcpy(&buf[blen], s, l);\
  74      blen += (l);\
  75  } while (0)
  76  
  77  #define GETARG() \
  78      ((nextarg >= argc) ? (rb_raise(rb_eArgError, "too few argument."), 0) : argv[nextarg++])
  79  
  80  #define GETASTER(val) do { \
  81      t = p++; \
  82      n = 0; \
  83      for (; p < end && ISDIGIT(*p); p++) { \
  84          n = 10 * n + (*p - '0'); \
  85      } \
  86      if (p >= end) { \
  87          rb_raise(rb_eArgError, "malformed format string - %%*[0-9]"); \
  88      } \
  89      if (*p == '$') { \
  90          int curarg = nextarg; \
  91          nextarg = n; \
  92          tmp = GETARG(); \
  93          nextarg = curarg; \
  94      } \
  95      else { \
  96          tmp = GETARG(); \
  97          p = t; \
  98      } \
  99      val = NUM2INT(tmp); \
 100  } while (0)
 101  
 102  VALUE
 103  rb_f_sprintf(argc, argv)
 104      int argc;
 105      VALUE *argv;
 106  {
 107      VALUE fmt;
 108      char *buf, *p, *end;
 109      int blen, bsiz;
 110      VALUE result;
 111  
 112      int width, prec, flags = FNONE;
 113      int nextarg = 0;
 114      int tainted = 0;
 115      VALUE tmp;
 116      VALUE str;
 117  
 118      fmt = GETARG();
 119      if (OBJ_TAINTED(fmt)) tainted = 1;
 120      StringValue(fmt);
 121      p = RSTRING(fmt)->ptr;
 122      end = p + RSTRING(fmt)->len;
 123      blen = 0;
 124      bsiz = 120;
 125      buf = ALLOC_N(char, bsiz);
 126  
 127      for (; p < end; p++) {
 128          char *t;
 129          int n;
 130  
 131          for (t = p; t < end && *t != '%'; t++) ;
 132          PUSH(p, t - p);
 133          if (t >= end) {
 134              /* end of fmt string */
 135              goto sprint_exit;
 136          }
 137          p = t + 1;              /* skip `%' */
 138  
 139          width = prec = -1;
 140        retry:
 141          switch (*p) {
 142            default:
 143              if (ISPRINT(*p))
 144                  rb_raise(rb_eArgError, "malformed format string - %%%c", *p);
 145              else
 146                  rb_raise(rb_eArgError, "malformed format string");
 147              break;
 148  
 149            case ' ':
 150              flags |= FSPACE;
 151              p++;
 152              goto retry;
 153  
 154            case '#':
 155              flags |= FSHARP;
 156              p++;
 157              goto retry;
 158  
 159            case '+':
 160              flags |= FPLUS;
 161              p++;
 162              goto retry;
 163  
 164            case '-':
 165              flags |= FMINUS;
 166              p++;
 167              goto retry;
 168  
 169            case '0':
 170              flags |= FZERO;
 171              p++;
 172              goto retry;
 173  
 174            case '1': case '2': case '3': case '4':
 175            case '5': case '6': case '7': case '8': case '9':
 176              n = 0;
 177              for (; p < end && ISDIGIT(*p); p++) {
 178                  n = 10 * n + (*p - '0');
 179              }
 180              if (p >= end) {
 181                  rb_raise(rb_eArgError, "malformed format string - %%[0-9]");
 182              }
 183              if (*p == '$') {
 184                  nextarg = n;
 185                  p++;
 186                  goto retry;
 187              }
 188              width = n;
 189              flags |= FWIDTH;
 190              goto retry;
 191  
 192            case '*':
 193              if (flags & FWIDTH) {
 194                  rb_raise(rb_eArgError, "width given twice");
 195              }
 196  
 197              flags |= FWIDTH;
 198              GETASTER(width);
 199              if (width < 0) {
 200                  flags |= FMINUS;
 201                  width = -width;
 202              }
 203              p++;
 204              goto retry;
 205  
 206            case '.':
 207              if (flags & FPREC) {
 208                  rb_raise(rb_eArgError, "precision given twice");
 209              }
 210              flags |= FPREC;
 211  
 212              prec = 0;
 213              p++;
 214              if (*p == '*') {
 215                  GETASTER(prec);
 216                  if (prec < 0) { /* ignore negative precision */
 217                      flags &= ~FPREC;
 218                  }
 219                  p++;
 220                  goto retry;
 221              }
 222  
 223              for (; p < end && ISDIGIT(*p); p++) {
 224                  prec = 10 * prec + (*p - '0');
 225              }
 226              if (p >= end) {
 227                  rb_raise(rb_eArgError, "malformed format string - %%.[0-9]");
 228              }
 229              goto retry;
 230  
 231            case '\n':
 232              p--;
 233            case '\0':
 234            case '%':
 235              if (flags != FNONE) {
 236                  rb_raise(rb_eArgError, "illegal format character - %%");
 237              }
 238              PUSH("%", 1);
 239              break;
 240  
 241            case 'c':
 242              {
 243                  VALUE val = GETARG();
 244                  char c;
 245  
 246                  if (!(flags & FMINUS))
 247                      while (--width > 0)
 248                          PUSH(" ", 1);
 249                  c = NUM2INT(val) & 0xff;
 250                  PUSH(&c, 1);
 251                  while (--width > 0)
 252                      PUSH(" ", 1);
 253              }
 254              break;
 255  
 256            case 's':
 257              {
 258                  VALUE arg = GETARG();
 259                  long len;
 260  
 261                  str = rb_obj_as_string(arg);
 262                  if (OBJ_TAINTED(str)) tainted = 1;
 263                  len = RSTRING(str)->len;
 264                  if (flags&FPREC) {
 265                      if (prec < len) {
 266                          len = prec;
 267                      }
 268                  }
 269                  if (flags&FWIDTH) {
 270                      if (width > len) {
 271                          CHECK(width);
 272                          width -= len;
 273                          if (!(flags&FMINUS)) {
 274                              while (width--) {
 275                                  buf[blen++] = ' ';
 276                              }
 277                          }
 278                          memcpy(&buf[blen], RSTRING(str)->ptr, len);
 279                          blen += len;
 280                          if (flags&FMINUS) {
 281                              while (width--) {
 282                                  buf[blen++] = ' ';
 283                              }
 284                          }
 285                          break;
 286                      }
 287                  }
 288                  PUSH(RSTRING(str)->ptr, len);
 289              }
 290              break;
 291  
 292            case 'd':
 293            case 'i':
 294            case 'o':
 295            case 'x':
 296            case 'X':
 297            case 'b':
 298            case 'B':
 299            case 'u':
 300              {
 301                  volatile VALUE val = GETARG();
 302                  char fbuf[32], nbuf[64], *s, *t;
 303                  char *prefix = 0;
 304                  int sign = 0;
 305                  char sc = 0;
 306                  long v;
 307                  int base, bignum = 0;
 308                  int len, pos;
 309  
 310                  switch (*p) {
 311                    case 'd':
 312                    case 'i':
 313                      sign = 1; break;
 314                    case 'o':
 315                    case 'x':
 316                    case 'X':
 317                    case 'b':
 318                    case 'B':
 319                    case 'u':
 320                    default:
 321                      if (flags&(FPLUS|FSPACE)) sign = 1;
 322                      break;
 323                  }
 324                  if (flags & FSHARP) {
 325                      if (*p == 'o') prefix = "0";
 326                      else if (*p == 'x') prefix = "0x";
 327                      else if (*p == 'X') prefix = "0X";
 328                      else if (*p == 'b') prefix = "0b";
 329                      else if (*p == 'B') prefix = "0B";
 330                      if (prefix) {
 331                          width -= strlen(prefix);
 332                      }
 333                  }
 334  
 335                bin_retry:
 336                  switch (TYPE(val)) {
 337                    case T_FLOAT:
 338                      val = rb_dbl2big(RFLOAT(val)->value);
 339                      if (FIXNUM_P(val)) goto bin_retry;
 340                      bignum = 1;
 341                      break;
 342                    case T_STRING:
 343                      val = rb_str_to_inum(val, 0, Qtrue);
 344                      goto bin_retry;
 345                    case T_BIGNUM:
 346                      bignum = 1;
 347                      break;
 348                    case T_FIXNUM:
 349                      v = FIX2LONG(val);
 350                      break;
 351                    default:
 352                      val = rb_Integer(val);
 353                      goto bin_retry;
 354                  }
 355  
 356                  if (*p == 'u' || *p == 'd' || *p == 'i') base = 10;
 357                  else if (*p == 'x' || *p == 'X') base = 16;
 358                  else if (*p == 'o') base = 8;
 359                  else if (*p == 'b' || *p == 'B') base = 2;
 360                  if (!bignum) {
 361                      if (base == 2) {
 362                          val = rb_int2big(v);
 363                          goto bin_retry;
 364                      }
 365                      if (sign) {
 366                          char c = *p;
 367                          if (c == 'i') c = 'd'; /* %d and %i are identical */
 368                          if (v < 0) {
 369                              v = -v;
 370                              sc = '-';
 371                              width--;
 372                          }
 373                          else if (flags & FPLUS) {
 374                              sc = '+';
 375                              width--;
 376                          }
 377                          else if (flags & FSPACE) {
 378                              sc = ' ';
 379                              width--;
 380                          }
 381                          sprintf(fbuf, "%%l%c", c);
 382                          sprintf(nbuf, fbuf, v);
 383                          s = nbuf;
 384                          goto format_integer;
 385                      }
 386                      s = nbuf;
 387                      if (v < 0) {
 388                          if (base == 10) {
 389                              rb_warning("negative number for %%u specifier");
 390                          }
 391                          else {
 392                              strcpy(s, "..");
 393                              s += 2;
 394                          }
 395                      }
 396                      sprintf(fbuf, "%%l%c", *p);
 397                      sprintf(s, fbuf, v);
 398                      if (v < 0) {
 399                          char d = 0;
 400  
 401                          remove_sign_bits(s, base);
 402                          switch (base) {
 403                            case 16:
 404                              d = 'f'; break;
 405                            case 8:
 406                              d = '7'; break;
 407                          }
 408                          if (d && *s != d) {
 409                              memmove(s+1, s, strlen(s)+1);
 410                              *s = d;
 411                          }
 412                      }
 413                      s = nbuf;
 414                      goto format_integer;
 415                  }
 416  
 417                  if (sign) {
 418                      val = rb_big2str(val, base);
 419                      s = RSTRING(val)->ptr;
 420                      if (s[0] == '-') {
 421                          s++;
 422                          sc = '-';
 423                          width--;
 424                      }
 425                      else if (flags & FPLUS) {
 426                          sc = '+';
 427                          width--;
 428                      }
 429                      else if (flags & FSPACE) {
 430                          sc = ' ';
 431                          width--;
 432                      }
 433                      goto format_integer;
 434                  }
 435                  if (!RBIGNUM(val)->sign) {
 436                      val = rb_big_clone(val);
 437                      rb_big_2comp(val);
 438                  }
 439                  val = rb_big2str(val, base);
 440                  s = RSTRING(val)->ptr;
 441                  if (*s == '-') {
 442                      if (base == 10) {
 443                          rb_warning("negative number for %%u specifier");
 444                          s++;
 445                      }
 446                      else {
 447                          remove_sign_bits(++s, base);
 448                          val = rb_str_new(0, 3+strlen(s));
 449                          t = RSTRING(val)->ptr;
 450                          strcpy(t, "..");
 451                          t += 2;
 452                          switch (base) {
 453                            case 16:
 454                              if (s[0] != 'f') strcpy(t++, "f"); break;
 455                            case 8:
 456                              if (s[0] != '7') strcpy(t++, "7"); break;
 457                            case 2:
 458                              if (s[0] != '1') strcpy(t++, "1"); break;
 459                          }
 460                          strcpy(t, s);
 461                          bignum = 2;
 462                      }
 463                  }
 464                  s  = RSTRING(val)->ptr;
 465  
 466                format_integer:
 467                  pos = -1;
 468                  len = strlen(s);
 469  
 470                  if (*p == 'X') {
 471                      char *pp = s;
 472                      while (*pp) {
 473                          *pp = toupper(*pp);
 474                          pp++;
 475                      }
 476                  }
 477                  if (prec < len) prec = len;
 478                  width -= prec;
 479                  if (!(flags&(FZERO|FMINUS)) && s[0] != '.') {
 480                      CHECK(width);
 481                      while (width-->0) {
 482                          buf[blen++] = ' ';
 483                      }
 484                  }
 485                  if (sc) PUSH(&sc, 1);
 486                  if (prefix) {
 487                      int plen = strlen(prefix);
 488                      PUSH(prefix, plen);
 489                      if (pos) pos += plen;
 490                  }
 491                  if (!(flags & FMINUS)) {
 492                      char c = ' ';
 493  
 494                      if (s[0] == '.') {
 495                          c = '.';
 496                          if ((flags & FPREC) && prec > len) {
 497                              pos = blen;
 498                          }
 499                          else {
 500                              pos = blen + 2;
 501                          }
 502                      }
 503                      else if (flags & FZERO) c = '0';
 504                      CHECK(width);
 505                      while (width-->0) {
 506                          buf[blen++] = c;
 507                      }
 508                  }
 509                  CHECK(prec - len);
 510                  while (len < prec--) {
 511                      buf[blen++] = s[0]=='.'?'.':'0';
 512                  }
 513                  PUSH(s, len);
 514                  CHECK(width);
 515                  while (width-->0) {
 516                      buf[blen++] = ' ';
 517                  }
 518                  if (pos >= 0 && buf[pos] == '.') {
 519                      char c = '.';
 520  
 521                      switch (base) {
 522                        case 16:
 523                          if (*p == 'X') c = 'F';
 524                          else c = 'f';
 525                          break;
 526                        case 8:
 527                          c = '7'; break;
 528                        case 2:
 529                          c = '1'; break;
 530                      }
 531                      s = &buf[pos];
 532                      while (*s && *s == '.') {
 533                          *s++ = c;
 534                      }
 535                  }
 536              }
 537              break;
 538  
 539            case 'f':
 540            case 'g':
 541            case 'G':
 542            case 'e':
 543            case 'E':
 544              {
 545                  VALUE val = GETARG();
 546                  double fval;
 547                  int i, need = 6;
 548                  char fbuf[32];
 549  
 550                  fval = RFLOAT(rb_Float(val))->value;
 551                  fmt_setup(fbuf, *p, flags, width, prec);
 552                  need = 0;
 553                  if (*p != 'e' && *p != 'E') {
 554                      i = INT_MIN;
 555                      frexp(fval, &i);
 556                      if (i > 0)
 557                          need = BIT_DIGITS(i);
 558                  }
 559                  need += (flags&FPREC) ? prec : 6;
 560                  if ((flags&FWIDTH) && need < width)
 561                      need = width;
 562                  need += 20;
 563  
 564                  CHECK(need);
 565                  sprintf(&buf[blen], fbuf, fval);
 566                  blen += strlen(&buf[blen]);
 567              }
 568              break;
 569          }
 570          flags = FNONE;
 571      }
 572  
 573    sprint_exit:
 574  #if 0
 575      /* XXX - We cannot validiate the number of arguments because
 576       *       the format string may contain `n$'-style argument selector.
 577       */
 578      if (RTEST(ruby_verbose) && nextarg < argc) {
 579          rb_raise(rb_eArgError, "too many argument for format string");
 580      }
 581  #endif
 582      result = rb_str_new(buf, blen);
 583      free(buf);
 584  
 585      if (tainted) OBJ_TAINT(result);
 586      return result;
 587  }
 588  
 589  static void
 590  fmt_setup(buf, c, flags, width, prec)
 591      char *buf;
 592      int c;
 593      int flags, width, prec;
 594  {
 595      *buf++ = '%';
 596      if (flags & FSHARP) *buf++ = '#';
 597      if (flags & FPLUS)  *buf++ = '+';
 598      if (flags & FMINUS) *buf++ = '-';
 599      if (flags & FZERO)  *buf++ = '0';
 600      if (flags & FSPACE) *buf++ = ' ';
 601  
 602      if (flags & FWIDTH) {
 603          sprintf(buf, "%d", width);
 604          buf += strlen(buf);
 605      }
 606  
 607      if (flags & FPREC) {
 608          sprintf(buf, ".%d", prec);
 609          buf += strlen(buf);
 610      }
 611  
 612      *buf++ = c;
 613      *buf = '\0';
 614  }