Ruby  1.9.3p392(2013-02-22revision39386)
sprintf.c
Go to the documentation of this file.
1 /**********************************************************************
2 
3  sprintf.c -
4 
5  $Author: nagachika $
6  created at: Fri Oct 15 10:39:26 JST 1993
7 
8  Copyright (C) 1993-2007 Yukihiro Matsumoto
9  Copyright (C) 2000 Network Applied Communication Laboratory, Inc.
10  Copyright (C) 2000 Information-technology Promotion Agency, Japan
11 
12 **********************************************************************/
13 
14 #include "ruby/ruby.h"
15 #include "ruby/re.h"
16 #include "ruby/encoding.h"
17 #include <math.h>
18 #include <stdarg.h>
19 
20 #ifdef HAVE_IEEEFP_H
21 #include <ieeefp.h>
22 #endif
23 
24 #define BIT_DIGITS(N) (((N)*146)/485 + 1) /* log2(10) =~ 146/485 */
25 #define BITSPERDIG (SIZEOF_BDIGITS*CHAR_BIT)
26 #define EXTENDSIGN(n, l) (((~0 << (n)) >> (((n)*(l)) % BITSPERDIG)) & ~(~0 << (n)))
27 
28 static void fmt_setup(char*,size_t,int,int,int,int);
29 
30 static char*
31 remove_sign_bits(char *str, int base)
32 {
33  char *t = str;
34 
35  if (base == 16) {
36  while (*t == 'f') {
37  t++;
38  }
39  }
40  else if (base == 8) {
41  *t |= EXTENDSIGN(3, strlen(t));
42  while (*t == '7') {
43  t++;
44  }
45  }
46  else if (base == 2) {
47  while (*t == '1') {
48  t++;
49  }
50  }
51 
52  return t;
53 }
54 
55 static char
56 sign_bits(int base, const char *p)
57 {
58  char c = '.';
59 
60  switch (base) {
61  case 16:
62  if (*p == 'X') c = 'F';
63  else c = 'f';
64  break;
65  case 8:
66  c = '7'; break;
67  case 2:
68  c = '1'; break;
69  }
70  return c;
71 }
72 
73 #define FNONE 0
74 #define FSHARP 1
75 #define FMINUS 2
76 #define FPLUS 4
77 #define FZERO 8
78 #define FSPACE 16
79 #define FWIDTH 32
80 #define FPREC 64
81 #define FPREC0 128
82 
83 #define CHECK(l) do {\
84  int cr = ENC_CODERANGE(result);\
85  while (blen + (l) >= bsiz) {\
86  bsiz*=2;\
87  }\
88  rb_str_resize(result, bsiz);\
89  ENC_CODERANGE_SET(result, cr);\
90  buf = RSTRING_PTR(result);\
91 } while (0)
92 
93 #define PUSH(s, l) do { \
94  CHECK(l);\
95  memcpy(&buf[blen], (s), (l));\
96  blen += (l);\
97 } while (0)
98 
99 #define FILL(c, l) do { \
100  CHECK(l);\
101  memset(&buf[blen], (c), (l));\
102  blen += (l);\
103 } while (0)
104 
105 #define GETARG() (nextvalue != Qundef ? nextvalue : \
106  posarg == -1 ? \
107  (rb_raise(rb_eArgError, "unnumbered(%d) mixed with numbered", nextarg), 0) : \
108  posarg == -2 ? \
109  (rb_raise(rb_eArgError, "unnumbered(%d) mixed with named", nextarg), 0) : \
110  (posarg = nextarg++, GETNTHARG(posarg)))
111 
112 #define GETPOSARG(n) (posarg > 0 ? \
113  (rb_raise(rb_eArgError, "numbered(%d) after unnumbered(%d)", (n), posarg), 0) : \
114  posarg == -2 ? \
115  (rb_raise(rb_eArgError, "numbered(%d) after named", (n)), 0) : \
116  (((n) < 1) ? (rb_raise(rb_eArgError, "invalid index - %d$", (n)), 0) : \
117  (posarg = -1, GETNTHARG(n))))
118 
119 #define GETNTHARG(nth) \
120  (((nth) >= argc) ? (rb_raise(rb_eArgError, "too few arguments"), 0) : argv[(nth)])
121 
122 #define GETNAMEARG(id, name, len) ( \
123  posarg > 0 ? \
124  (rb_raise(rb_eArgError, "named%.*s after unnumbered(%d)", (len), (name), posarg), 0) : \
125  posarg == -1 ? \
126  (rb_raise(rb_eArgError, "named%.*s after numbered", (len), (name)), 0) : \
127  (posarg = -2, rb_hash_lookup2(get_hash(&hash, argc, argv), (id), Qundef)))
128 
129 #define GETNUM(n, val) \
130  for (; p < end && rb_enc_isdigit(*p, enc); p++) { \
131  int next_n = 10 * (n) + (*p - '0'); \
132  if (next_n / 10 != (n)) {\
133  rb_raise(rb_eArgError, #val " too big"); \
134  } \
135  (n) = next_n; \
136  } \
137  if (p >= end) { \
138  rb_raise(rb_eArgError, "malformed format string - %%*[0-9]"); \
139  }
140 
141 #define GETASTER(val) do { \
142  t = p++; \
143  n = 0; \
144  GETNUM(n, (val)); \
145  if (*p == '$') { \
146  tmp = GETPOSARG(n); \
147  } \
148  else { \
149  tmp = GETARG(); \
150  p = t; \
151  } \
152  (val) = NUM2INT(tmp); \
153 } while (0)
154 
155 static VALUE
156 get_hash(volatile VALUE *hash, int argc, const VALUE *argv)
157 {
158  VALUE tmp;
159 
160  if (*hash != Qundef) return *hash;
161  if (argc != 2) {
162  rb_raise(rb_eArgError, "one hash required");
163  }
164  tmp = rb_check_convert_type(argv[1], T_HASH, "Hash", "to_hash");
165  if (NIL_P(tmp)) {
166  rb_raise(rb_eArgError, "one hash required");
167  }
168  return (*hash = tmp);
169 }
170 
171 /*
172  * call-seq:
173  * format(format_string [, arguments...] ) -> string
174  * sprintf(format_string [, arguments...] ) -> string
175  *
176  * Returns the string resulting from applying <i>format_string</i> to
177  * any additional arguments. Within the format string, any characters
178  * other than format sequences are copied to the result.
179  *
180  * The syntax of a format sequence is follows.
181  *
182  * %[flags][width][.precision]type
183  *
184  * A format
185  * sequence consists of a percent sign, followed by optional flags,
186  * width, and precision indicators, then terminated with a field type
187  * character. The field type controls how the corresponding
188  * <code>sprintf</code> argument is to be interpreted, while the flags
189  * modify that interpretation.
190  *
191  * The field type characters are:
192  *
193  * Field | Integer Format
194  * ------+--------------------------------------------------------------
195  * b | Convert argument as a binary number.
196  * | Negative numbers will be displayed as a two's complement
197  * | prefixed with `..1'.
198  * B | Equivalent to `b', but uses an uppercase 0B for prefix
199  * | in the alternative format by #.
200  * d | Convert argument as a decimal number.
201  * i | Identical to `d'.
202  * o | Convert argument as an octal number.
203  * | Negative numbers will be displayed as a two's complement
204  * | prefixed with `..7'.
205  * u | Identical to `d'.
206  * x | Convert argument as a hexadecimal number.
207  * | Negative numbers will be displayed as a two's complement
208  * | prefixed with `..f' (representing an infinite string of
209  * | leading 'ff's).
210  * X | Equivalent to `x', but uses uppercase letters.
211  *
212  * Field | Float Format
213  * ------+--------------------------------------------------------------
214  * e | Convert floating point argument into exponential notation
215  * | with one digit before the decimal point as [-]d.dddddde[+-]dd.
216  * | The precision specifies the number of digits after the decimal
217  * | point (defaulting to six).
218  * E | Equivalent to `e', but uses an uppercase E to indicate
219  * | the exponent.
220  * f | Convert floating point argument as [-]ddd.dddddd,
221  * | where the precision specifies the number of digits after
222  * | the decimal point.
223  * g | Convert a floating point number using exponential form
224  * | if the exponent is less than -4 or greater than or
225  * | equal to the precision, or in dd.dddd form otherwise.
226  * | The precision specifies the number of significant digits.
227  * G | Equivalent to `g', but use an uppercase `E' in exponent form.
228  * a | Convert floating point argument as [-]0xh.hhhhp[+-]dd,
229  * | which is consisted from optional sign, "0x", fraction part
230  * | as hexadecimal, "p", and exponential part as decimal.
231  * A | Equivalent to `a', but use uppercase `X' and `P'.
232  *
233  * Field | Other Format
234  * ------+--------------------------------------------------------------
235  * c | Argument is the numeric code for a single character or
236  * | a single character string itself.
237  * p | The valuing of argument.inspect.
238  * s | Argument is a string to be substituted. If the format
239  * | sequence contains a precision, at most that many characters
240  * | will be copied.
241  * % | A percent sign itself will be displayed. No argument taken.
242  *
243  * The flags modifies the behavior of the formats.
244  * The flag characters are:
245  *
246  * Flag | Applies to | Meaning
247  * ---------+---------------+-----------------------------------------
248  * space | bBdiouxX | Leave a space at the start of
249  * | aAeEfgG | non-negative numbers.
250  * | (numeric fmt) | For `o', `x', `X', `b' and `B', use
251  * | | a minus sign with absolute value for
252  * | | negative values.
253  * ---------+---------------+-----------------------------------------
254  * (digit)$ | all | Specifies the absolute argument number
255  * | | for this field. Absolute and relative
256  * | | argument numbers cannot be mixed in a
257  * | | sprintf string.
258  * ---------+---------------+-----------------------------------------
259  * # | bBoxX | Use an alternative format.
260  * | aAeEfgG | For the conversions `o', increase the precision
261  * | | until the first digit will be `0' if
262  * | | it is not formatted as complements.
263  * | | For the conversions `x', `X', `b' and `B'
264  * | | on non-zero, prefix the result with ``0x'',
265  * | | ``0X'', ``0b'' and ``0B'', respectively.
266  * | | For `a', `A', `e', `E', `f', `g', and 'G',
267  * | | force a decimal point to be added,
268  * | | even if no digits follow.
269  * | | For `g' and 'G', do not remove trailing zeros.
270  * ---------+---------------+-----------------------------------------
271  * + | bBdiouxX | Add a leading plus sign to non-negative
272  * | aAeEfgG | numbers.
273  * | (numeric fmt) | For `o', `x', `X', `b' and `B', use
274  * | | a minus sign with absolute value for
275  * | | negative values.
276  * ---------+---------------+-----------------------------------------
277  * - | all | Left-justify the result of this conversion.
278  * ---------+---------------+-----------------------------------------
279  * 0 (zero) | bBdiouxX | Pad with zeros, not spaces.
280  * | aAeEfgG | For `o', `x', `X', `b' and `B', radix-1
281  * | (numeric fmt) | is used for negative numbers formatted as
282  * | | complements.
283  * ---------+---------------+-----------------------------------------
284  * * | all | Use the next argument as the field width.
285  * | | If negative, left-justify the result. If the
286  * | | asterisk is followed by a number and a dollar
287  * | | sign, use the indicated argument as the width.
288  *
289  * Examples of flags:
290  *
291  * # `+' and space flag specifies the sign of non-negative numbers.
292  * sprintf("%d", 123) #=> "123"
293  * sprintf("%+d", 123) #=> "+123"
294  * sprintf("% d", 123) #=> " 123"
295  *
296  * # `#' flag for `o' increases number of digits to show `0'.
297  * # `+' and space flag changes format of negative numbers.
298  * sprintf("%o", 123) #=> "173"
299  * sprintf("%#o", 123) #=> "0173"
300  * sprintf("%+o", -123) #=> "-173"
301  * sprintf("%o", -123) #=> "..7605"
302  * sprintf("%#o", -123) #=> "..7605"
303  *
304  * # `#' flag for `x' add a prefix `0x' for non-zero numbers.
305  * # `+' and space flag disables complements for negative numbers.
306  * sprintf("%x", 123) #=> "7b"
307  * sprintf("%#x", 123) #=> "0x7b"
308  * sprintf("%+x", -123) #=> "-7b"
309  * sprintf("%x", -123) #=> "..f85"
310  * sprintf("%#x", -123) #=> "0x..f85"
311  * sprintf("%#x", 0) #=> "0"
312  *
313  * # `#' for `X' uses the prefix `0X'.
314  * sprintf("%X", 123) #=> "7B"
315  * sprintf("%#X", 123) #=> "0X7B"
316  *
317  * # `#' flag for `b' add a prefix `0b' for non-zero numbers.
318  * # `+' and space flag disables complements for negative numbers.
319  * sprintf("%b", 123) #=> "1111011"
320  * sprintf("%#b", 123) #=> "0b1111011"
321  * sprintf("%+b", -123) #=> "-1111011"
322  * sprintf("%b", -123) #=> "..10000101"
323  * sprintf("%#b", -123) #=> "0b..10000101"
324  * sprintf("%#b", 0) #=> "0"
325  *
326  * # `#' for `B' uses the prefix `0B'.
327  * sprintf("%B", 123) #=> "1111011"
328  * sprintf("%#B", 123) #=> "0B1111011"
329  *
330  * # `#' for `e' forces to show the decimal point.
331  * sprintf("%.0e", 1) #=> "1e+00"
332  * sprintf("%#.0e", 1) #=> "1.e+00"
333  *
334  * # `#' for `f' forces to show the decimal point.
335  * sprintf("%.0f", 1234) #=> "1234"
336  * sprintf("%#.0f", 1234) #=> "1234."
337  *
338  * # `#' for `g' forces to show the decimal point.
339  * # It also disables stripping lowest zeros.
340  * sprintf("%g", 123.4) #=> "123.4"
341  * sprintf("%#g", 123.4) #=> "123.400"
342  * sprintf("%g", 123456) #=> "123456"
343  * sprintf("%#g", 123456) #=> "123456."
344  *
345  * The field width is an optional integer, followed optionally by a
346  * period and a precision. The width specifies the minimum number of
347  * characters that will be written to the result for this field.
348  *
349  * Examples of width:
350  *
351  * # padding is done by spaces, width=20
352  * # 0 or radix-1. <------------------>
353  * sprintf("%20d", 123) #=> " 123"
354  * sprintf("%+20d", 123) #=> " +123"
355  * sprintf("%020d", 123) #=> "00000000000000000123"
356  * sprintf("%+020d", 123) #=> "+0000000000000000123"
357  * sprintf("% 020d", 123) #=> " 0000000000000000123"
358  * sprintf("%-20d", 123) #=> "123 "
359  * sprintf("%-+20d", 123) #=> "+123 "
360  * sprintf("%- 20d", 123) #=> " 123 "
361  * sprintf("%020x", -123) #=> "..ffffffffffffffff85"
362  *
363  * For
364  * numeric fields, the precision controls the number of decimal places
365  * displayed. For string fields, the precision determines the maximum
366  * number of characters to be copied from the string. (Thus, the format
367  * sequence <code>%10.10s</code> will always contribute exactly ten
368  * characters to the result.)
369  *
370  * Examples of precisions:
371  *
372  * # precision for `d', 'o', 'x' and 'b' is
373  * # minimum number of digits <------>
374  * sprintf("%20.8d", 123) #=> " 00000123"
375  * sprintf("%20.8o", 123) #=> " 00000173"
376  * sprintf("%20.8x", 123) #=> " 0000007b"
377  * sprintf("%20.8b", 123) #=> " 01111011"
378  * sprintf("%20.8d", -123) #=> " -00000123"
379  * sprintf("%20.8o", -123) #=> " ..777605"
380  * sprintf("%20.8x", -123) #=> " ..ffff85"
381  * sprintf("%20.8b", -11) #=> " ..110101"
382  *
383  * # "0x" and "0b" for `#x' and `#b' is not counted for
384  * # precision but "0" for `#o' is counted. <------>
385  * sprintf("%#20.8d", 123) #=> " 00000123"
386  * sprintf("%#20.8o", 123) #=> " 00000173"
387  * sprintf("%#20.8x", 123) #=> " 0x0000007b"
388  * sprintf("%#20.8b", 123) #=> " 0b01111011"
389  * sprintf("%#20.8d", -123) #=> " -00000123"
390  * sprintf("%#20.8o", -123) #=> " ..777605"
391  * sprintf("%#20.8x", -123) #=> " 0x..ffff85"
392  * sprintf("%#20.8b", -11) #=> " 0b..110101"
393  *
394  * # precision for `e' is number of
395  * # digits after the decimal point <------>
396  * sprintf("%20.8e", 1234.56789) #=> " 1.23456789e+03"
397  *
398  * # precision for `f' is number of
399  * # digits after the decimal point <------>
400  * sprintf("%20.8f", 1234.56789) #=> " 1234.56789000"
401  *
402  * # precision for `g' is number of
403  * # significant digits <------->
404  * sprintf("%20.8g", 1234.56789) #=> " 1234.5679"
405  *
406  * # <------->
407  * sprintf("%20.8g", 123456789) #=> " 1.2345679e+08"
408  *
409  * # precision for `s' is
410  * # maximum number of characters <------>
411  * sprintf("%20.8s", "string test") #=> " string t"
412  *
413  * Examples:
414  *
415  * sprintf("%d %04x", 123, 123) #=> "123 007b"
416  * sprintf("%08b '%4s'", 123, 123) #=> "01111011 ' 123'"
417  * sprintf("%1$*2$s %2$d %1$s", "hello", 8) #=> " hello 8 hello"
418  * sprintf("%1$*2$s %2$d", "hello", -8) #=> "hello -8"
419  * sprintf("%+g:% g:%-g", 1.23, 1.23, 1.23) #=> "+1.23: 1.23:1.23"
420  * sprintf("%u", -123) #=> "-123"
421  *
422  * For more complex formatting, Ruby supports a reference by name.
423  * %<name>s style uses format style, but %{name} style doesn't.
424  *
425  * Exapmles:
426  * sprintf("%<foo>d : %<bar>f", { :foo => 1, :bar => 2 })
427  * #=> 1 : 2.000000
428  * sprintf("%{foo}f", { :foo => 1 })
429  * # => "1f"
430  */
431 
432 VALUE
434 {
435  return rb_str_format(argc - 1, argv + 1, GETNTHARG(0));
436 }
437 
438 VALUE
439 rb_str_format(int argc, const VALUE *argv, VALUE fmt)
440 {
441  rb_encoding *enc;
442  const char *p, *end;
443  char *buf;
444  long blen, bsiz;
445  VALUE result;
446 
447  long scanned = 0;
448  int coderange = ENC_CODERANGE_7BIT;
449  int width, prec, flags = FNONE;
450  int nextarg = 1;
451  int posarg = 0;
452  int tainted = 0;
453  VALUE nextvalue;
454  VALUE tmp;
455  VALUE str;
456  volatile VALUE hash = Qundef;
457 
458 #define CHECK_FOR_WIDTH(f) \
459  if ((f) & FWIDTH) { \
460  rb_raise(rb_eArgError, "width given twice"); \
461  } \
462  if ((f) & FPREC0) { \
463  rb_raise(rb_eArgError, "width after precision"); \
464  }
465 #define CHECK_FOR_FLAGS(f) \
466  if ((f) & FWIDTH) { \
467  rb_raise(rb_eArgError, "flag after width"); \
468  } \
469  if ((f) & FPREC0) { \
470  rb_raise(rb_eArgError, "flag after precision"); \
471  }
472 
473  ++argc;
474  --argv;
475  if (OBJ_TAINTED(fmt)) tainted = 1;
476  StringValue(fmt);
477  enc = rb_enc_get(fmt);
478  fmt = rb_str_new4(fmt);
479  p = RSTRING_PTR(fmt);
480  end = p + RSTRING_LEN(fmt);
481  blen = 0;
482  bsiz = 120;
483  result = rb_str_buf_new(bsiz);
484  rb_enc_copy(result, fmt);
485  buf = RSTRING_PTR(result);
486  memset(buf, 0, bsiz);
487  ENC_CODERANGE_SET(result, coderange);
488 
489  for (; p < end; p++) {
490  const char *t;
491  int n;
492  ID id = 0;
493 
494  for (t = p; t < end && *t != '%'; t++) ;
495  PUSH(p, t - p);
496  if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
497  scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &coderange);
498  ENC_CODERANGE_SET(result, coderange);
499  }
500  if (t >= end) {
501  /* end of fmt string */
502  goto sprint_exit;
503  }
504  p = t + 1; /* skip `%' */
505 
506  width = prec = -1;
507  nextvalue = Qundef;
508  retry:
509  switch (*p) {
510  default:
511  if (rb_enc_isprint(*p, enc))
512  rb_raise(rb_eArgError, "malformed format string - %%%c", *p);
513  else
514  rb_raise(rb_eArgError, "malformed format string");
515  break;
516 
517  case ' ':
518  CHECK_FOR_FLAGS(flags);
519  flags |= FSPACE;
520  p++;
521  goto retry;
522 
523  case '#':
524  CHECK_FOR_FLAGS(flags);
525  flags |= FSHARP;
526  p++;
527  goto retry;
528 
529  case '+':
530  CHECK_FOR_FLAGS(flags);
531  flags |= FPLUS;
532  p++;
533  goto retry;
534 
535  case '-':
536  CHECK_FOR_FLAGS(flags);
537  flags |= FMINUS;
538  p++;
539  goto retry;
540 
541  case '0':
542  CHECK_FOR_FLAGS(flags);
543  flags |= FZERO;
544  p++;
545  goto retry;
546 
547  case '1': case '2': case '3': case '4':
548  case '5': case '6': case '7': case '8': case '9':
549  n = 0;
550  GETNUM(n, width);
551  if (*p == '$') {
552  if (nextvalue != Qundef) {
553  rb_raise(rb_eArgError, "value given twice - %d$", n);
554  }
555  nextvalue = GETPOSARG(n);
556  p++;
557  goto retry;
558  }
559  CHECK_FOR_WIDTH(flags);
560  width = n;
561  flags |= FWIDTH;
562  goto retry;
563 
564  case '<':
565  case '{':
566  {
567  const char *start = p;
568  char term = (*p == '<') ? '>' : '}';
569 
570  for (; p < end && *p != term; ) {
571  p += rb_enc_mbclen(p, end, enc);
572  }
573  if (p >= end) {
574  rb_raise(rb_eArgError, "malformed name - unmatched parenthesis");
575  }
576  if (id) {
577  rb_raise(rb_eArgError, "name%.*s after <%s>",
578  (int)(p - start + 1), start, rb_id2name(id));
579  }
580  id = rb_intern3(start + 1, p - start - 1, enc);
581  nextvalue = GETNAMEARG(ID2SYM(id), start, (int)(p - start + 1));
582  if (nextvalue == Qundef) {
583  rb_raise(rb_eKeyError, "key%.*s not found", (int)(p - start + 1), start);
584  }
585  if (term == '}') goto format_s;
586  p++;
587  goto retry;
588  }
589 
590  case '*':
591  CHECK_FOR_WIDTH(flags);
592  flags |= FWIDTH;
593  GETASTER(width);
594  if (width < 0) {
595  flags |= FMINUS;
596  width = -width;
597  }
598  p++;
599  goto retry;
600 
601  case '.':
602  if (flags & FPREC0) {
603  rb_raise(rb_eArgError, "precision given twice");
604  }
605  flags |= FPREC|FPREC0;
606 
607  prec = 0;
608  p++;
609  if (*p == '*') {
610  GETASTER(prec);
611  if (prec < 0) { /* ignore negative precision */
612  flags &= ~FPREC;
613  }
614  p++;
615  goto retry;
616  }
617 
618  GETNUM(prec, precision);
619  goto retry;
620 
621  case '\n':
622  case '\0':
623  p--;
624  case '%':
625  if (flags != FNONE) {
626  rb_raise(rb_eArgError, "invalid format character - %%");
627  }
628  PUSH("%", 1);
629  break;
630 
631  case 'c':
632  {
633  VALUE val = GETARG();
634  VALUE tmp;
635  unsigned int c;
636  int n;
637 
638  tmp = rb_check_string_type(val);
639  if (!NIL_P(tmp)) {
640  if (rb_enc_strlen(RSTRING_PTR(tmp),RSTRING_END(tmp),enc) != 1) {
641  rb_raise(rb_eArgError, "%%c requires a character");
642  }
643  c = rb_enc_codepoint_len(RSTRING_PTR(tmp), RSTRING_END(tmp), &n, enc);
644  RB_GC_GUARD(tmp);
645  }
646  else {
647  c = NUM2INT(val);
648  n = rb_enc_codelen(c, enc);
649  }
650  if (n <= 0) {
651  rb_raise(rb_eArgError, "invalid character");
652  }
653  if (!(flags & FWIDTH)) {
654  CHECK(n);
655  rb_enc_mbcput(c, &buf[blen], enc);
656  blen += n;
657  }
658  else if ((flags & FMINUS)) {
659  CHECK(n);
660  rb_enc_mbcput(c, &buf[blen], enc);
661  blen += n;
662  FILL(' ', width-1);
663  }
664  else {
665  FILL(' ', width-1);
666  CHECK(n);
667  rb_enc_mbcput(c, &buf[blen], enc);
668  blen += n;
669  }
670  }
671  break;
672 
673  case 's':
674  case 'p':
675  format_s:
676  {
677  VALUE arg = GETARG();
678  long len, slen;
679 
680  if (*p == 'p') arg = rb_inspect(arg);
681  str = rb_obj_as_string(arg);
682  if (OBJ_TAINTED(str)) tainted = 1;
683  len = RSTRING_LEN(str);
684  rb_str_set_len(result, blen);
685  if (coderange != ENC_CODERANGE_BROKEN && scanned < blen) {
686  int cr = coderange;
687  scanned += rb_str_coderange_scan_restartable(buf+scanned, buf+blen, enc, &cr);
688  ENC_CODERANGE_SET(result,
689  (cr == ENC_CODERANGE_UNKNOWN ?
690  ENC_CODERANGE_BROKEN : (coderange = cr)));
691  }
692  enc = rb_enc_check(result, str);
693  if (flags&(FPREC|FWIDTH)) {
694  slen = rb_enc_strlen(RSTRING_PTR(str),RSTRING_END(str),enc);
695  if (slen < 0) {
696  rb_raise(rb_eArgError, "invalid mbstring sequence");
697  }
698  if ((flags&FPREC) && (prec < slen)) {
699  char *p = rb_enc_nth(RSTRING_PTR(str), RSTRING_END(str),
700  prec, enc);
701  slen = prec;
702  len = p - RSTRING_PTR(str);
703  }
704  /* need to adjust multi-byte string pos */
705  if ((flags&FWIDTH) && (width > slen)) {
706  width -= (int)slen;
707  if (!(flags&FMINUS)) {
708  CHECK(width);
709  while (width--) {
710  buf[blen++] = ' ';
711  }
712  }
713  CHECK(len);
714  memcpy(&buf[blen], RSTRING_PTR(str), len);
715  RB_GC_GUARD(str);
716  blen += len;
717  if (flags&FMINUS) {
718  CHECK(width);
719  while (width--) {
720  buf[blen++] = ' ';
721  }
722  }
723  rb_enc_associate(result, enc);
724  break;
725  }
726  }
727  PUSH(RSTRING_PTR(str), len);
728  RB_GC_GUARD(str);
729  rb_enc_associate(result, enc);
730  }
731  break;
732 
733  case 'd':
734  case 'i':
735  case 'o':
736  case 'x':
737  case 'X':
738  case 'b':
739  case 'B':
740  case 'u':
741  {
742  volatile VALUE val = GETARG();
743  char fbuf[32], nbuf[64], *s;
744  const char *prefix = 0;
745  int sign = 0, dots = 0;
746  char sc = 0;
747  long v = 0;
748  int base, bignum = 0;
749  int len;
750 
751  switch (*p) {
752  case 'd':
753  case 'i':
754  case 'u':
755  sign = 1; break;
756  case 'o':
757  case 'x':
758  case 'X':
759  case 'b':
760  case 'B':
761  if (flags&(FPLUS|FSPACE)) sign = 1;
762  break;
763  }
764  if (flags & FSHARP) {
765  switch (*p) {
766  case 'o':
767  prefix = "0"; break;
768  case 'x':
769  prefix = "0x"; break;
770  case 'X':
771  prefix = "0X"; break;
772  case 'b':
773  prefix = "0b"; break;
774  case 'B':
775  prefix = "0B"; break;
776  }
777  }
778 
779  bin_retry:
780  switch (TYPE(val)) {
781  case T_FLOAT:
782  if (FIXABLE(RFLOAT_VALUE(val))) {
783  val = LONG2FIX((long)RFLOAT_VALUE(val));
784  goto bin_retry;
785  }
786  val = rb_dbl2big(RFLOAT_VALUE(val));
787  if (FIXNUM_P(val)) goto bin_retry;
788  bignum = 1;
789  break;
790  case T_STRING:
791  val = rb_str_to_inum(val, 0, TRUE);
792  goto bin_retry;
793  case T_BIGNUM:
794  bignum = 1;
795  break;
796  case T_FIXNUM:
797  v = FIX2LONG(val);
798  break;
799  default:
800  val = rb_Integer(val);
801  goto bin_retry;
802  }
803 
804  switch (*p) {
805  case 'o':
806  base = 8; break;
807  case 'x':
808  case 'X':
809  base = 16; break;
810  case 'b':
811  case 'B':
812  base = 2; break;
813  case 'u':
814  case 'd':
815  case 'i':
816  default:
817  base = 10; break;
818  }
819 
820  if (!bignum) {
821  if (base == 2) {
822  val = rb_int2big(v);
823  goto bin_retry;
824  }
825  if (sign) {
826  char c = *p;
827  if (c == 'i') c = 'd'; /* %d and %i are identical */
828  if (v < 0) {
829  v = -v;
830  sc = '-';
831  width--;
832  }
833  else if (flags & FPLUS) {
834  sc = '+';
835  width--;
836  }
837  else if (flags & FSPACE) {
838  sc = ' ';
839  width--;
840  }
841  snprintf(fbuf, sizeof(fbuf), "%%l%c", c);
842  snprintf(nbuf, sizeof(nbuf), fbuf, v);
843  s = nbuf;
844  }
845  else {
846  s = nbuf;
847  if (v < 0) {
848  dots = 1;
849  }
850  snprintf(fbuf, sizeof(fbuf), "%%l%c", *p == 'X' ? 'x' : *p);
851  snprintf(++s, sizeof(nbuf) - 1, fbuf, v);
852  if (v < 0) {
853  char d = 0;
854 
855  s = remove_sign_bits(s, base);
856  switch (base) {
857  case 16:
858  d = 'f'; break;
859  case 8:
860  d = '7'; break;
861  }
862  if (d && *s != d) {
863  *--s = d;
864  }
865  }
866  }
867  len = (int)strlen(s);
868  }
869  else {
870  if (sign) {
871  tmp = rb_big2str(val, base);
872  s = RSTRING_PTR(tmp);
873  if (s[0] == '-') {
874  s++;
875  sc = '-';
876  width--;
877  }
878  else if (flags & FPLUS) {
879  sc = '+';
880  width--;
881  }
882  else if (flags & FSPACE) {
883  sc = ' ';
884  width--;
885  }
886  }
887  else {
888  if (!RBIGNUM_SIGN(val)) {
889  val = rb_big_clone(val);
890  rb_big_2comp(val);
891  }
892  tmp = rb_big2str0(val, base, RBIGNUM_SIGN(val));
893  s = RSTRING_PTR(tmp);
894  if (*s == '-') {
895  dots = 1;
896  if (base == 10) {
897  rb_warning("negative number for %%u specifier");
898  }
899  s = remove_sign_bits(++s, base);
900  switch (base) {
901  case 16:
902  if (s[0] != 'f') *--s = 'f'; break;
903  case 8:
904  if (s[0] != '7') *--s = '7'; break;
905  case 2:
906  if (s[0] != '1') *--s = '1'; break;
907  }
908  }
909  }
910  len = rb_long2int(RSTRING_END(tmp) - s);
911  }
912 
913  if (dots) {
914  prec -= 2;
915  width -= 2;
916  }
917 
918  if (*p == 'X') {
919  char *pp = s;
920  int c;
921  while ((c = (int)(unsigned char)*pp) != 0) {
922  *pp = rb_enc_toupper(c, enc);
923  pp++;
924  }
925  }
926  if (prefix && !prefix[1]) { /* octal */
927  if (dots) {
928  prefix = 0;
929  }
930  else if (len == 1 && *s == '0') {
931  len = 0;
932  if (flags & FPREC) prec--;
933  }
934  else if ((flags & FPREC) && (prec > len)) {
935  prefix = 0;
936  }
937  }
938  else if (len == 1 && *s == '0') {
939  prefix = 0;
940  }
941  if (prefix) {
942  width -= (int)strlen(prefix);
943  }
944  if ((flags & (FZERO|FMINUS|FPREC)) == FZERO) {
945  prec = width;
946  width = 0;
947  }
948  else {
949  if (prec < len) {
950  if (!prefix && prec == 0 && len == 1 && *s == '0') len = 0;
951  prec = len;
952  }
953  width -= prec;
954  }
955  if (!(flags&FMINUS)) {
956  CHECK(width);
957  while (width-- > 0) {
958  buf[blen++] = ' ';
959  }
960  }
961  if (sc) PUSH(&sc, 1);
962  if (prefix) {
963  int plen = (int)strlen(prefix);
964  PUSH(prefix, plen);
965  }
966  CHECK(prec - len);
967  if (dots) PUSH("..", 2);
968  if (!bignum && v < 0) {
969  char c = sign_bits(base, p);
970  while (len < prec--) {
971  buf[blen++] = c;
972  }
973  }
974  else if ((flags & (FMINUS|FPREC)) != FMINUS) {
975  char c;
976 
977  if (!sign && bignum && !RBIGNUM_SIGN(val))
978  c = sign_bits(base, p);
979  else
980  c = '0';
981  while (len < prec--) {
982  buf[blen++] = c;
983  }
984  }
985  PUSH(s, len);
986  RB_GC_GUARD(tmp);
987  CHECK(width);
988  while (width-- > 0) {
989  buf[blen++] = ' ';
990  }
991  }
992  break;
993 
994  case 'f':
995  case 'g':
996  case 'G':
997  case 'e':
998  case 'E':
999  case 'a':
1000  case 'A':
1001  {
1002  VALUE val = GETARG();
1003  double fval;
1004  int i, need = 6;
1005  char fbuf[32];
1006 
1007  fval = RFLOAT_VALUE(rb_Float(val));
1008  if (isnan(fval) || isinf(fval)) {
1009  const char *expr;
1010 
1011  if (isnan(fval)) {
1012  expr = "NaN";
1013  }
1014  else {
1015  expr = "Inf";
1016  }
1017  need = (int)strlen(expr);
1018  if ((!isnan(fval) && fval < 0.0) || (flags & FPLUS))
1019  need++;
1020  if ((flags & FWIDTH) && need < width)
1021  need = width;
1022 
1023  CHECK(need + 1);
1024  snprintf(&buf[blen], need + 1, "%*s", need, "");
1025  if (flags & FMINUS) {
1026  if (!isnan(fval) && fval < 0.0)
1027  buf[blen++] = '-';
1028  else if (flags & FPLUS)
1029  buf[blen++] = '+';
1030  else if (flags & FSPACE)
1031  blen++;
1032  memcpy(&buf[blen], expr, strlen(expr));
1033  }
1034  else {
1035  if (!isnan(fval) && fval < 0.0)
1036  buf[blen + need - strlen(expr) - 1] = '-';
1037  else if (flags & FPLUS)
1038  buf[blen + need - strlen(expr) - 1] = '+';
1039  else if ((flags & FSPACE) && need > width)
1040  blen++;
1041  memcpy(&buf[blen + need - strlen(expr)], expr,
1042  strlen(expr));
1043  }
1044  blen += strlen(&buf[blen]);
1045  break;
1046  }
1047 
1048  fmt_setup(fbuf, sizeof(fbuf), *p, flags, width, prec);
1049  need = 0;
1050  if (*p != 'e' && *p != 'E') {
1051  i = INT_MIN;
1052  frexp(fval, &i);
1053  if (i > 0)
1054  need = BIT_DIGITS(i);
1055  }
1056  need += (flags&FPREC) ? prec : 6;
1057  if ((flags&FWIDTH) && need < width)
1058  need = width;
1059  need += 20;
1060 
1061  CHECK(need);
1062  snprintf(&buf[blen], need, fbuf, fval);
1063  blen += strlen(&buf[blen]);
1064  }
1065  break;
1066  }
1067  flags = FNONE;
1068  }
1069 
1070  sprint_exit:
1071  RB_GC_GUARD(fmt);
1072  /* XXX - We cannot validate the number of arguments if (digit)$ style used.
1073  */
1074  if (posarg >= 0 && nextarg < argc) {
1075  const char *mesg = "too many arguments for format string";
1076  if (RTEST(ruby_debug)) rb_raise(rb_eArgError, "%s", mesg);
1077  if (RTEST(ruby_verbose)) rb_warn("%s", mesg);
1078  }
1079  rb_str_resize(result, blen);
1080 
1081  if (tainted) OBJ_TAINT(result);
1082  return result;
1083 }
1084 
1085 static void
1086 fmt_setup(char *buf, size_t size, int c, int flags, int width, int prec)
1087 {
1088  char *end = buf + size;
1089  *buf++ = '%';
1090  if (flags & FSHARP) *buf++ = '#';
1091  if (flags & FPLUS) *buf++ = '+';
1092  if (flags & FMINUS) *buf++ = '-';
1093  if (flags & FZERO) *buf++ = '0';
1094  if (flags & FSPACE) *buf++ = ' ';
1095 
1096  if (flags & FWIDTH) {
1097  snprintf(buf, end - buf, "%d", width);
1098  buf += strlen(buf);
1099  }
1100 
1101  if (flags & FPREC) {
1102  snprintf(buf, end - buf, ".%d", prec);
1103  buf += strlen(buf);
1104  }
1105 
1106  *buf++ = c;
1107  *buf = '\0';
1108 }
1109 
1110 #undef FILE
1111 #define FILE rb_printf_buffer
1112 #define __sbuf rb_printf_sbuf
1113 #define __sFILE rb_printf_sfile
1114 #undef feof
1115 #undef ferror
1116 #undef clearerr
1117 #undef fileno
1118 #if SIZEOF_LONG < SIZEOF_VOIDP
1119 # if SIZEOF_LONG_LONG == SIZEOF_VOIDP
1120 # define _HAVE_SANE_QUAD_
1121 # define _HAVE_LLP64_
1122 # define quad_t LONG_LONG
1123 # define u_quad_t unsigned LONG_LONG
1124 # endif
1125 #elif SIZEOF_LONG != SIZEOF_LONG_LONG && SIZEOF_LONG_LONG == 8
1126 # define _HAVE_SANE_QUAD_
1127 # define quad_t LONG_LONG
1128 # define u_quad_t unsigned LONG_LONG
1129 #endif
1130 #define FLOATING_POINT 1
1131 #define BSD__dtoa ruby_dtoa
1132 #define BSD__hdtoa ruby_hdtoa
1133 #include "vsnprintf.c"
1134 
1135 static int
1136 ruby__sfvwrite(register rb_printf_buffer *fp, register struct __suio *uio)
1137 {
1138  struct __siov *iov;
1139  VALUE result = (VALUE)fp->_bf._base;
1140  char *buf = (char*)fp->_p;
1141  size_t len, n;
1142  size_t blen = buf - RSTRING_PTR(result), bsiz = fp->_w;
1143 
1144  if (RBASIC(result)->klass) {
1145  rb_raise(rb_eRuntimeError, "rb_vsprintf reentered");
1146  }
1147  if ((len = uio->uio_resid) == 0)
1148  return 0;
1149  CHECK(len);
1150  buf += blen;
1151  fp->_w = bsiz;
1152  for (iov = uio->uio_iov; len > 0; ++iov) {
1153  MEMCPY(buf, iov->iov_base, char, n = iov->iov_len);
1154  buf += n;
1155  len -= n;
1156  }
1157  fp->_p = (unsigned char *)buf;
1158  return 0;
1159 }
1160 
1161 VALUE
1162 rb_enc_vsprintf(rb_encoding *enc, const char *fmt, va_list ap)
1163 {
1164  rb_printf_buffer f;
1165  VALUE result;
1166 
1167  f._flags = __SWR | __SSTR;
1168  f._bf._size = 0;
1169  f._w = 120;
1170  result = rb_str_buf_new(f._w);
1171  if (enc) rb_enc_associate(result, enc);
1172  f._bf._base = (unsigned char *)result;
1173  f._p = (unsigned char *)RSTRING_PTR(result);
1174  RBASIC(result)->klass = 0;
1175  f.vwrite = ruby__sfvwrite;
1176  BSD_vfprintf(&f, fmt, ap);
1177  RBASIC(result)->klass = rb_cString;
1178  rb_str_resize(result, (char *)f._p - RSTRING_PTR(result));
1179 
1180  return result;
1181 }
1182 
1183 VALUE
1184 rb_enc_sprintf(rb_encoding *enc, const char *format, ...)
1185 {
1186  VALUE result;
1187  va_list ap;
1188 
1189  va_start(ap, format);
1190  result = rb_enc_vsprintf(enc, format, ap);
1191  va_end(ap);
1192 
1193  return result;
1194 }
1195 
1196 VALUE
1197 rb_vsprintf(const char *fmt, va_list ap)
1198 {
1199  return rb_enc_vsprintf(NULL, fmt, ap);
1200 }
1201 
1202 VALUE
1203 rb_sprintf(const char *format, ...)
1204 {
1205  VALUE result;
1206  va_list ap;
1207 
1208  va_start(ap, format);
1209  result = rb_vsprintf(format, ap);
1210  va_end(ap);
1211 
1212  return result;
1213 }
1214 
1215 VALUE
1216 rb_str_vcatf(VALUE str, const char *fmt, va_list ap)
1217 {
1218  rb_printf_buffer f;
1219  VALUE klass;
1220 
1221  StringValue(str);
1222  rb_str_modify(str);
1223  f._flags = __SWR | __SSTR;
1224  f._bf._size = 0;
1225  f._w = rb_str_capacity(str);
1226  f._bf._base = (unsigned char *)str;
1227  f._p = (unsigned char *)RSTRING_END(str);
1228  klass = RBASIC(str)->klass;
1229  RBASIC(str)->klass = 0;
1230  f.vwrite = ruby__sfvwrite;
1231  BSD_vfprintf(&f, fmt, ap);
1232  RBASIC(str)->klass = klass;
1233  rb_str_resize(str, (char *)f._p - RSTRING_PTR(str));
1234 
1235  return str;
1236 }
1237 
1238 VALUE
1239 rb_str_catf(VALUE str, const char *format, ...)
1240 {
1241  va_list ap;
1242 
1243  va_start(ap, format);
1244  str = rb_str_vcatf(str, format, ap);
1245  va_end(ap);
1246 
1247  return str;
1248 }
1249