Ruby  1.9.3p392(2013-02-22revision39386)
file.c
Go to the documentation of this file.
1 #include "ruby/ruby.h"
2 #include "ruby/encoding.h"
3 #include <winbase.h>
4 #include <wchar.h>
5 #include <shlwapi.h>
6 
7 #ifndef INVALID_FILE_ATTRIBUTES
8 # define INVALID_FILE_ATTRIBUTES ((DWORD)-1)
9 #endif
10 
11 /* cache 'encoding name' => 'code page' into a hash */
13 
14 #define IS_DIR_SEPARATOR_P(c) (c == L'\\' || c == L'/')
15 #define IS_DIR_UNC_P(c) (IS_DIR_SEPARATOR_P(c[0]) && IS_DIR_SEPARATOR_P(c[1]))
16 
17 /* MultiByteToWideChar() doesn't work with code page 51932 */
18 #define INVALID_CODE_PAGE 51932
19 #define PATH_BUFFER_SIZE MAX_PATH * 2
20 
21 #define insecure_obj_p(obj, level) ((level) >= 4 || ((level) > 0 && OBJ_TAINTED(obj)))
22 
23 static inline void
24 replace_wchar(wchar_t *s, int find, int replace)
25 {
26  while (*s != 0) {
27  if (*s == find)
28  *s = replace;
29  s++;
30  }
31 }
32 
33 /* Convert str from multibyte char to wchar with specified code page */
34 static inline void
35 convert_mb_to_wchar(VALUE str, wchar_t **wstr, wchar_t **wstr_pos, size_t *wstr_len, UINT code_page)
36 {
37  size_t len;
38 
39  if (NIL_P(str))
40  return;
41 
42  len = MultiByteToWideChar(code_page, 0, RSTRING_PTR(str), -1, NULL, 0) + 1;
43  *wstr = (wchar_t *)xmalloc(len * sizeof(wchar_t));
44  if (wstr_pos)
45  *wstr_pos = *wstr;
46 
47  MultiByteToWideChar(code_page, 0, RSTRING_PTR(str), -1, *wstr, len);
48  *wstr_len = len - 2;
49 }
50 
51 static inline void
52 convert_wchar_to_mb(const wchar_t *wstr, char **str, size_t *str_len, UINT code_page)
53 {
54  size_t len;
55 
56  len = WideCharToMultiByte(code_page, 0, wstr, -1, NULL, 0, NULL, NULL);
57  *str = (char *)xmalloc(len * sizeof(char));
58  WideCharToMultiByte(code_page, 0, wstr, -1, *str, len, NULL, NULL);
59 
60  /* do not count terminator as part of the string length */
61  *str_len = len - 1;
62 }
63 
64 /*
65  Return user's home directory using environment variables combinations.
66  Memory allocated by this function should be manually freed afterwards.
67 
68  Try:
69  HOME, HOMEDRIVE + HOMEPATH and USERPROFILE environment variables
70  TODO: Special Folders - Profile and Personal
71 */
72 static wchar_t *
73 home_dir(void)
74 {
75  wchar_t *buffer = NULL;
76  size_t buffer_len = 0, len = 0;
77  size_t home_env = 0;
78 
79  /*
80  GetEnvironmentVariableW when used with NULL will return the required
81  buffer size and its terminating character.
82  http://msdn.microsoft.com/en-us/library/windows/desktop/ms683188(v=vs.85).aspx
83  */
84 
85  if (len = GetEnvironmentVariableW(L"HOME", NULL, 0)) {
86  buffer_len = len;
87  home_env = 1;
88  }
89  else if (len = GetEnvironmentVariableW(L"HOMEDRIVE", NULL, 0)) {
90  buffer_len = len;
91  if (len = GetEnvironmentVariableW(L"HOMEPATH", NULL, 0)) {
92  buffer_len += len;
93  home_env = 2;
94  }
95  else {
96  buffer_len = 0;
97  }
98  }
99  else if (len = GetEnvironmentVariableW(L"USERPROFILE", NULL, 0)) {
100  buffer_len = len;
101  home_env = 3;
102  }
103 
104  /* allocate buffer */
105  if (home_env)
106  buffer = (wchar_t *)xmalloc(buffer_len * sizeof(wchar_t));
107 
108  switch (home_env) {
109  case 1:
110  /* HOME */
111  GetEnvironmentVariableW(L"HOME", buffer, buffer_len);
112  break;
113  case 2:
114  /* HOMEDRIVE + HOMEPATH */
115  len = GetEnvironmentVariableW(L"HOMEDRIVE", buffer, buffer_len);
116  GetEnvironmentVariableW(L"HOMEPATH", buffer + len, buffer_len - len);
117  break;
118  case 3:
119  /* USERPROFILE */
120  GetEnvironmentVariableW(L"USERPROFILE", buffer, buffer_len);
121  break;
122  default:
123  break;
124  }
125 
126  if (home_env) {
127  /* sanitize backslashes with forwardslashes */
128  replace_wchar(buffer, L'\\', L'/');
129 
130  return buffer;
131  }
132 
133  return NULL;
134 }
135 
136 /* Remove trailing invalid ':$DATA' of the path. */
137 static inline size_t
138 remove_invalid_alternative_data(wchar_t *wfullpath, size_t size)
139 {
140  static const wchar_t prime[] = L":$DATA";
141  enum { prime_len = (sizeof(prime) / sizeof(wchar_t)) -1 };
142 
143  if (size <= prime_len || _wcsnicmp(wfullpath + size - prime_len, prime, prime_len) != 0)
144  return size;
145 
146  /* alias of stream */
147  /* get rid of a bug of x64 VC++ */
148  if (wfullpath[size - (prime_len + 1)] == ':') {
149  /* remove trailing '::$DATA' */
150  size -= prime_len + 1; /* prime */
151  wfullpath[size] = L'\0';
152  }
153  else {
154  /* remove trailing ':$DATA' of paths like '/aa:a:$DATA' */
155  wchar_t *pos = wfullpath + size - (prime_len + 1);
156  while (!IS_DIR_SEPARATOR_P(*pos) && pos != wfullpath) {
157  if (*pos == L':') {
158  size -= prime_len; /* alternative */
159  wfullpath[size] = L'\0';
160  break;
161  }
162  pos--;
163  }
164  }
165  return size;
166 }
167 
168 /* Return system code page. */
169 static inline UINT
171 {
172  return AreFileApisANSI() ? CP_ACP : CP_OEMCP;
173 }
174 
175 /*
176  Return code page number of the encoding.
177  Cache code page into a hash for performance since finding the code page in
178  Encoding#names is slow.
179 */
180 static UINT
182 {
183  VALUE code_page_value, name_key;
184  VALUE encoding, names_ary = Qundef, name;
185  char *enc_name;
186  struct RString fake_str;
187  ID names;
188  long i;
189 
190  if (!enc)
191  return system_code_page();
192 
193  enc_name = (char *)rb_enc_name(enc);
194 
195  fake_str.basic.flags = T_STRING|RSTRING_NOEMBED;
196  fake_str.basic.klass = rb_cString;
197  fake_str.as.heap.len = strlen(enc_name);
198  fake_str.as.heap.ptr = enc_name;
199  fake_str.as.heap.aux.capa = fake_str.as.heap.len;
200  name_key = (VALUE)&fake_str;
202 
203  code_page_value = rb_hash_lookup(rb_code_page, name_key);
204  if (code_page_value != Qnil)
205  return (UINT)FIX2INT(code_page_value);
206 
207  name_key = rb_usascii_str_new2(enc_name);
208 
209  encoding = rb_enc_from_encoding(enc);
210  if (!NIL_P(encoding)) {
211  CONST_ID(names, "names");
212  names_ary = rb_funcall(encoding, names, 0);
213  }
214 
215  /* map US-ASCII and ASCII-8bit as code page 20127 (us-ascii) */
216  if (enc == rb_usascii_encoding() || enc == rb_ascii8bit_encoding()) {
217  UINT code_page = 20127;
218  rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page));
219  return code_page;
220  }
221 
222  if (names_ary != Qundef) {
223  for (i = 0; i < RARRAY_LEN(names_ary); i++) {
224  name = RARRAY_PTR(names_ary)[i];
225  if (strncmp("CP", RSTRING_PTR(name), 2) == 0) {
226  int code_page = atoi(RSTRING_PTR(name) + 2);
227  if (code_page != 0) {
228  rb_hash_aset(rb_code_page, name_key, INT2FIX(code_page));
229  return (UINT)code_page;
230  }
231  }
232  }
233  }
234 
236  return INVALID_CODE_PAGE;
237 }
238 
239 static inline VALUE
241 {
242  VALUE result, tmp;
243 
244  tmp = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), encoding);
246 
247  return result;
248 }
249 
250 /*
251  Replace the last part of the path to long name.
252  We try to avoid to call FindFirstFileW() since it takes long time.
253 */
254 static inline size_t
255 replace_to_long_name(wchar_t **wfullpath, size_t size, int heap)
256 {
257  WIN32_FIND_DATAW find_data;
258  HANDLE find_handle;
259 
260  /*
261  Skip long name conversion if the path is already long name.
262  Short name is 8.3 format.
263  http://en.wikipedia.org/wiki/8.3_filename
264  This check can be skipped for directory components that have file
265  extensions longer than 3 characters, or total lengths longer than
266  12 characters.
267  http://msdn.microsoft.com/en-us/library/windows/desktop/aa364980(v=vs.85).aspx
268  */
269  size_t const max_short_name_size = 8 + 1 + 3;
270  size_t const max_extension_size = 3;
271  size_t path_len = 1, extension_len = 0;
272  wchar_t *pos = *wfullpath;
273 
274  if (size == 3 && pos[1] == L':' && pos[2] == L'\\' && pos[3] == L'\0') {
275  /* root path doesn't need short name expansion */
276  return size;
277  }
278 
279  /* skip long name conversion if path contains wildcard characters */
280  if (wcspbrk(pos, L"*?")) {
281  return size;
282  }
283 
284  pos = *wfullpath + size - 1;
285  while (!IS_DIR_SEPARATOR_P(*pos) && pos != *wfullpath) {
286  if (!extension_len && *pos == L'.') {
287  extension_len = path_len - 1;
288  }
289  if (path_len > max_short_name_size || extension_len > max_extension_size) {
290  return size;
291  }
292  path_len++;
293  pos--;
294  }
295 
296  find_handle = FindFirstFileW(*wfullpath, &find_data);
297  if (find_handle != INVALID_HANDLE_VALUE) {
298  size_t trail_pos = wcslen(*wfullpath);
299  size_t file_len = wcslen(find_data.cFileName);
300 
301  FindClose(find_handle);
302  while (trail_pos > 0) {
303  if (IS_DIR_SEPARATOR_P((*wfullpath)[trail_pos]))
304  break;
305  trail_pos--;
306  }
307  size = trail_pos + 1 + file_len;
308  if ((size + 1) > sizeof(*wfullpath) / sizeof((*wfullpath)[0])) {
309  wchar_t *buf = (wchar_t *)xmalloc((size + 1) * sizeof(wchar_t));
310  wcsncpy(buf, *wfullpath, trail_pos + 1);
311  if (heap)
312  xfree(*wfullpath);
313  *wfullpath = buf;
314  }
315  wcsncpy(*wfullpath + trail_pos + 1, find_data.cFileName, file_len + 1);
316  }
317  return size;
318 }
319 
320 VALUE
321 rb_file_expand_path_internal(VALUE fname, VALUE dname, int abs_mode, int long_name, VALUE result)
322 {
323  size_t size = 0, wpath_len = 0, wdir_len = 0, whome_len = 0;
324  size_t buffer_len = 0;
325  char *fullpath = NULL;
326  wchar_t *wfullpath = NULL, *wpath = NULL, *wpath_pos = NULL, *wdir = NULL;
327  wchar_t *whome = NULL, *buffer = NULL, *buffer_pos = NULL;
328  UINT path_cp, cp;
329  VALUE path = fname, dir = dname;
330  wchar_t wfullpath_buffer[PATH_BUFFER_SIZE];
331  wchar_t path_drive = L'\0', dir_drive = L'\0';
332  int ignore_dir = 0;
333  rb_encoding *path_encoding;
334  int tainted = 0;
335 
336  /* tainted if path is tainted */
337  tainted = OBJ_TAINTED(path);
338 
339  /* get path encoding */
340  if (NIL_P(dir)) {
341  path_encoding = rb_enc_get(path);
342  }
343  else {
344  path_encoding = rb_enc_check(path, dir);
345  }
346 
347  cp = path_cp = code_page(path_encoding);
348 
349  /* workaround invalid codepage */
350  if (path_cp == INVALID_CODE_PAGE) {
351  cp = CP_UTF8;
352  if (!NIL_P(path)) {
353  path = fix_string_encoding(path, path_encoding);
354  }
355  }
356 
357  /* convert char * to wchar_t */
358  convert_mb_to_wchar(path, &wpath, &wpath_pos, &wpath_len, cp);
359 
360  /* determine if we need the user's home directory */
361  /* expand '~' only if NOT rb_file_absolute_path() where `abs_mode` is 1 */
362  if (abs_mode == 0 && wpath_len > 0 && wpath_pos[0] == L'~' &&
363  (wpath_len == 1 || IS_DIR_SEPARATOR_P(wpath_pos[1]))) {
364  /* tainted if expanding '~' */
365  tainted = 1;
366 
367  whome = home_dir();
368  if (whome == NULL) {
369  xfree(wpath);
370  rb_raise(rb_eArgError, "couldn't find HOME environment -- expanding `~'");
371  }
372  whome_len = wcslen(whome);
373 
374  if (PathIsRelativeW(whome) && !(whome_len >= 2 && IS_DIR_UNC_P(whome))) {
375  xfree(wpath);
376  rb_raise(rb_eArgError, "non-absolute home");
377  }
378 
379  /* use filesystem encoding if expanding home dir */
380  path_encoding = rb_filesystem_encoding();
381  cp = path_cp = system_code_page();
382 
383  /* ignores dir since we are expading home */
384  ignore_dir = 1;
385 
386  /* exclude ~ from the result */
387  wpath_pos++;
388  wpath_len--;
389 
390  /* exclude separator if present */
391  if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
392  wpath_pos++;
393  wpath_len--;
394  }
395  }
396  else if (wpath_len >= 2 && wpath_pos[1] == L':') {
397  if (wpath_len >= 3 && IS_DIR_SEPARATOR_P(wpath_pos[2])) {
398  /* ignore dir since path contains a drive letter and a root slash */
399  ignore_dir = 1;
400  }
401  else {
402  /* determine if we ignore dir or not later */
403  path_drive = wpath_pos[0];
404  }
405  }
406  else if (abs_mode == 0 && wpath_len >= 2 && wpath_pos[0] == L'~') {
407  wchar_t *wuser = wpath_pos + 1;
408  wchar_t *pos = wuser;
409  char *user;
410 
411  /* tainted if expanding '~' */
412  tainted = 1;
413 
414  while (!IS_DIR_SEPARATOR_P(*pos) && *pos != '\0')
415  pos++;
416 
417  *pos = '\0';
418  convert_wchar_to_mb(wuser, &user, &size, cp);
419 
420  /* convert to VALUE and set the path encoding */
421  if (path_cp == INVALID_CODE_PAGE) {
422  VALUE tmp = rb_enc_str_new(user, size, rb_utf8_encoding());
423  result = rb_str_encode(tmp, rb_enc_from_encoding(path_encoding), 0, Qnil);
424  rb_str_resize(tmp, 0);
425  }
426  else {
427  result = rb_enc_str_new(user, size, path_encoding);
428  }
429 
430  xfree(wpath);
431  if (user)
432  xfree(user);
433 
434  rb_raise(rb_eArgError, "can't find user %s", StringValuePtr(result));
435  }
436 
437  /* convert dir */
438  if (!ignore_dir && !NIL_P(dir)) {
439  /* fix string encoding */
440  if (path_cp == INVALID_CODE_PAGE) {
441  dir = fix_string_encoding(dir, path_encoding);
442  }
443 
444  /* convert char * to wchar_t */
445  convert_mb_to_wchar(dir, &wdir, NULL, &wdir_len, cp);
446 
447  if (wdir_len >= 2 && wdir[1] == L':') {
448  dir_drive = wdir[0];
449  if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
450  wdir_len = 2;
451  }
452  }
453  else if (wdir_len >= 2 && IS_DIR_UNC_P(wdir)) {
454  /* UNC path */
455  if (wpath_len && IS_DIR_SEPARATOR_P(wpath_pos[0])) {
456  /* cut the UNC path tail to '//host/share' */
457  size_t separators = 0;
458  size_t pos = 2;
459  while (pos < wdir_len && separators < 2) {
460  if (IS_DIR_SEPARATOR_P(wdir[pos])) {
461  separators++;
462  }
463  pos++;
464  }
465  if (separators == 2)
466  wdir_len = pos - 1;
467  }
468  }
469  }
470 
471  /* determine if we ignore dir or not */
472  if (!ignore_dir && path_drive && dir_drive) {
473  if (towupper(path_drive) == towupper(dir_drive)) {
474  /* exclude path drive letter to use dir */
475  wpath_pos += 2;
476  wpath_len -= 2;
477  }
478  else {
479  /* ignore dir since path drive is different from dir drive */
480  ignore_dir = 1;
481  wdir_len = 0;
482  }
483  }
484 
485  if (!ignore_dir && wpath_len >= 2 && IS_DIR_UNC_P(wpath)) {
486  /* ignore dir since path has UNC root */
487  ignore_dir = 1;
488  wdir_len = 0;
489  }
490  else if (!ignore_dir && wpath_len >= 1 && IS_DIR_SEPARATOR_P(wpath[0]) &&
491  !dir_drive && !(wdir_len >= 2 && IS_DIR_UNC_P(wdir))) {
492  /* ignore dir since path has root slash and dir doesn't have drive or UNC root */
493  ignore_dir = 1;
494  wdir_len = 0;
495  }
496 
497  buffer_len = wpath_len + 1 + wdir_len + 1 + whome_len + 1;
498 
499  buffer = buffer_pos = (wchar_t *)xmalloc((buffer_len + 1) * sizeof(wchar_t));
500 
501  /* add home */
502  if (whome_len) {
503  wcsncpy(buffer_pos, whome, whome_len);
504  buffer_pos += whome_len;
505  }
506 
507  /* Add separator if required */
508  if (whome_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
509  buffer_pos[0] = L'\\';
510  buffer_pos++;
511  }
512 
513  if (wdir_len) {
514  /* tainted if dir is used and dir is tainted */
515  if (!tainted && OBJ_TAINTED(dir))
516  tainted = 1;
517 
518  wcsncpy(buffer_pos, wdir, wdir_len);
519  buffer_pos += wdir_len;
520  }
521 
522  /* add separator if required */
523  if (wdir_len && wcsrchr(L"\\/:", buffer_pos[-1]) == NULL) {
524  buffer_pos[0] = L'\\';
525  buffer_pos++;
526  }
527 
528  /* now deal with path */
529  if (wpath_len) {
530  wcsncpy(buffer_pos, wpath_pos, wpath_len);
531  buffer_pos += wpath_len;
532  }
533 
534  /* GetFullPathNameW requires at least "." to determine current directory */
535  if (wpath_len == 0) {
536  buffer_pos[0] = L'.';
537  buffer_pos++;
538  }
539 
540  /* Ensure buffer is NULL terminated */
541  buffer_pos[0] = L'\0';
542 
543  /* tainted if path is relative */
544  if (!tainted && PathIsRelativeW(buffer) && !(buffer_len >= 2 && IS_DIR_UNC_P(buffer)))
545  tainted = 1;
546 
547  /* FIXME: Make this more robust */
548  /* Determine require buffer size */
549  size = GetFullPathNameW(buffer, PATH_BUFFER_SIZE, wfullpath_buffer, NULL);
550  if (size > PATH_BUFFER_SIZE) {
551  /* allocate more memory than alloted originally by PATH_BUFFER_SIZE */
552  wfullpath = (wchar_t *)xmalloc(size * sizeof(wchar_t));
553  size = GetFullPathNameW(buffer, size, wfullpath, NULL);
554  }
555  else {
556  wfullpath = wfullpath_buffer;
557  }
558 
559  /* Remove any trailing slashes */
560  if (IS_DIR_SEPARATOR_P(wfullpath[size - 1]) &&
561  wfullpath[size - 2] != L':' &&
562  !(size == 2 && IS_DIR_UNC_P(wfullpath))) {
563  size -= 1;
564  wfullpath[size] = L'\0';
565  }
566 
567  /* Remove any trailing dot */
568  if (wfullpath[size - 1] == L'.') {
569  size -= 1;
570  wfullpath[size] = L'\0';
571  }
572 
573  /* removes trailing invalid ':$DATA' */
574  size = remove_invalid_alternative_data(wfullpath, size);
575 
576  /* Replace the trailing path to long name */
577  if (long_name)
578  size = replace_to_long_name(&wfullpath, size, (wfullpath != wfullpath_buffer));
579 
580  /* sanitize backslashes with forwardslashes */
581  replace_wchar(wfullpath, L'\\', L'/');
582 
583  /* convert to char * */
584  size = WideCharToMultiByte(cp, 0, wfullpath, size, NULL, 0, NULL, NULL);
585  if (size > (size_t)RSTRING_LEN(result)) {
586  rb_str_modify(result);
587  rb_str_resize(result, size);
588  }
589 
590  WideCharToMultiByte(cp, 0, wfullpath, size, RSTRING_PTR(result), size, NULL, NULL);
591  rb_str_set_len(result, size);
592 
593  /* convert to VALUE and set the path encoding */
594  if (path_cp == INVALID_CODE_PAGE) {
595  VALUE tmp;
596  size_t len;
597 
599  ENC_CODERANGE_CLEAR(result);
600  tmp = rb_str_encode(result, rb_enc_from_encoding(path_encoding), 0, Qnil);
601  len = RSTRING_LEN(tmp);
602  rb_str_modify(result);
603  rb_str_resize(result, len);
604  memcpy(RSTRING_PTR(result), RSTRING_PTR(tmp), len);
605  rb_str_resize(tmp, 0);
606  }
607  rb_enc_associate(result, path_encoding);
608  ENC_CODERANGE_CLEAR(result);
609 
610  /* makes the result object tainted if expanding tainted strings or returning modified path */
611  if (tainted)
612  OBJ_TAINT(result);
613 
614  /* TODO: better cleanup */
615  if (buffer)
616  xfree(buffer);
617 
618  if (wpath)
619  xfree(wpath);
620 
621  if (wdir)
622  xfree(wdir);
623 
624  if (whome)
625  xfree(whome);
626 
627  if (wfullpath && wfullpath != wfullpath_buffer)
628  xfree(wfullpath);
629 
630  if (fullpath)
631  xfree(fullpath);
632 
633  return result;
634 }
635 
636 int
637 rb_file_load_ok(const char *path)
638 {
639  int ret = 1;
640  DWORD attr = GetFileAttributes(path);
641  if (attr == INVALID_FILE_ATTRIBUTES ||
642  attr & FILE_ATTRIBUTE_DIRECTORY) {
643  ret = 0;
644  }
645  else {
646  HANDLE h = CreateFile(path, GENERIC_READ,
647  FILE_SHARE_READ | FILE_SHARE_WRITE,
648  NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
649  if (h != INVALID_HANDLE_VALUE) {
650  CloseHandle(h);
651  }
652  else {
653  ret = 0;
654  }
655  }
656  return ret;
657 }
658 
659 void
661 {
663 
664  /* prevent GC removing rb_code_page */
666 }
667