GDB (API)
|
00001 /* Character set conversion support for GDB. 00002 00003 Copyright (C) 2001-2013 Free Software Foundation, Inc. 00004 00005 This file is part of GDB. 00006 00007 This program is free software; you can redistribute it and/or modify 00008 it under the terms of the GNU General Public License as published by 00009 the Free Software Foundation; either version 3 of the License, or 00010 (at your option) any later version. 00011 00012 This program is distributed in the hope that it will be useful, 00013 but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 GNU General Public License for more details. 00016 00017 You should have received a copy of the GNU General Public License 00018 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 00019 00020 #include "defs.h" 00021 #include "charset.h" 00022 #include "gdbcmd.h" 00023 #include "gdb_assert.h" 00024 #include "gdb_obstack.h" 00025 #include "gdb_wait.h" 00026 #include "charset-list.h" 00027 #include "vec.h" 00028 #include "environ.h" 00029 #include "arch-utils.h" 00030 #include "gdb_vecs.h" 00031 00032 #include <stddef.h> 00033 #include "gdb_string.h" 00034 #include <ctype.h> 00035 00036 #ifdef USE_WIN32API 00037 #include <windows.h> 00038 #endif 00039 00040 /* How GDB's character set support works 00041 00042 GDB has three global settings: 00043 00044 - The `current host character set' is the character set GDB should 00045 use in talking to the user, and which (hopefully) the user's 00046 terminal knows how to display properly. Most users should not 00047 change this. 00048 00049 - The `current target character set' is the character set the 00050 program being debugged uses. 00051 00052 - The `current target wide character set' is the wide character set 00053 the program being debugged uses, that is, the encoding used for 00054 wchar_t. 00055 00056 There are commands to set each of these, and mechanisms for 00057 choosing reasonable default values. GDB has a global list of 00058 character sets that it can use as its host or target character 00059 sets. 00060 00061 The header file `charset.h' declares various functions that 00062 different pieces of GDB need to perform tasks like: 00063 00064 - printing target strings and characters to the user's terminal 00065 (mostly target->host conversions), 00066 00067 - building target-appropriate representations of strings and 00068 characters the user enters in expressions (mostly host->target 00069 conversions), 00070 00071 and so on. 00072 00073 To avoid excessive code duplication and maintenance efforts, 00074 GDB simply requires a capable iconv function. Users on platforms 00075 without a suitable iconv can use the GNU iconv library. */ 00076 00077 00078 #ifdef PHONY_ICONV 00079 00080 /* Provide a phony iconv that does as little as possible. Also, 00081 arrange for there to be a single available character set. */ 00082 00083 #undef GDB_DEFAULT_HOST_CHARSET 00084 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" 00085 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" 00086 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1" 00087 #undef DEFAULT_CHARSET_NAMES 00088 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET , 00089 00090 #undef iconv_t 00091 #define iconv_t int 00092 #undef iconv_open 00093 #define iconv_open phony_iconv_open 00094 #undef iconv 00095 #define iconv phony_iconv 00096 #undef iconv_close 00097 #define iconv_close phony_iconv_close 00098 00099 #undef ICONV_CONST 00100 #define ICONV_CONST const 00101 00102 /* Some systems don't have EILSEQ, so we define it here, but not as 00103 EINVAL, because callers of `iconv' want to distinguish EINVAL and 00104 EILSEQ. This is what iconv.h from libiconv does as well. Note 00105 that wchar.h may also define EILSEQ, so this needs to be after we 00106 include wchar.h, which happens in defs.h through gdb_wchar.h. */ 00107 #ifndef EILSEQ 00108 #define EILSEQ ENOENT 00109 #endif 00110 00111 static iconv_t 00112 phony_iconv_open (const char *to, const char *from) 00113 { 00114 /* We allow conversions from UTF-32BE, wchar_t, and the host charset. 00115 We allow conversions to wchar_t and the host charset. */ 00116 if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t") 00117 && strcmp (from, GDB_DEFAULT_HOST_CHARSET)) 00118 return -1; 00119 if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET)) 00120 return -1; 00121 00122 /* Return 1 if we are converting from UTF-32BE, 0 otherwise. This is 00123 used as a flag in calls to iconv. */ 00124 return !strcmp (from, "UTF-32BE"); 00125 } 00126 00127 static int 00128 phony_iconv_close (iconv_t arg) 00129 { 00130 return 0; 00131 } 00132 00133 static size_t 00134 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, 00135 char **outbuf, size_t *outbytesleft) 00136 { 00137 if (utf_flag) 00138 { 00139 while (*inbytesleft >= 4) 00140 { 00141 size_t j; 00142 unsigned long c = 0; 00143 00144 for (j = 0; j < 4; ++j) 00145 { 00146 c <<= 8; 00147 c += (*inbuf)[j] & 0xff; 00148 } 00149 00150 if (c >= 256) 00151 { 00152 errno = EILSEQ; 00153 return -1; 00154 } 00155 **outbuf = c & 0xff; 00156 ++*outbuf; 00157 --*outbytesleft; 00158 00159 ++*inbuf; 00160 *inbytesleft -= 4; 00161 } 00162 if (*inbytesleft < 4) 00163 { 00164 errno = EINVAL; 00165 return -1; 00166 } 00167 } 00168 else 00169 { 00170 /* In all other cases we simply copy input bytes to the 00171 output. */ 00172 size_t amt = *inbytesleft; 00173 00174 if (amt > *outbytesleft) 00175 amt = *outbytesleft; 00176 memcpy (*outbuf, *inbuf, amt); 00177 *inbuf += amt; 00178 *outbuf += amt; 00179 *inbytesleft -= amt; 00180 *outbytesleft -= amt; 00181 } 00182 00183 if (*inbytesleft) 00184 { 00185 errno = E2BIG; 00186 return -1; 00187 } 00188 00189 /* The number of non-reversible conversions -- but they were all 00190 reversible. */ 00191 return 0; 00192 } 00193 00194 #endif 00195 00196 00197 00198 /* The global lists of character sets and translations. */ 00199 00200 00201 #ifndef GDB_DEFAULT_TARGET_CHARSET 00202 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" 00203 #endif 00204 00205 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET 00206 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32" 00207 #endif 00208 00209 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET; 00210 static const char *host_charset_name = "auto"; 00211 static void 00212 show_host_charset_name (struct ui_file *file, int from_tty, 00213 struct cmd_list_element *c, 00214 const char *value) 00215 { 00216 if (!strcmp (value, "auto")) 00217 fprintf_filtered (file, 00218 _("The host character set is \"auto; currently %s\".\n"), 00219 auto_host_charset_name); 00220 else 00221 fprintf_filtered (file, _("The host character set is \"%s\".\n"), value); 00222 } 00223 00224 static const char *target_charset_name = "auto"; 00225 static void 00226 show_target_charset_name (struct ui_file *file, int from_tty, 00227 struct cmd_list_element *c, const char *value) 00228 { 00229 if (!strcmp (value, "auto")) 00230 fprintf_filtered (file, 00231 _("The target character set is \"auto; " 00232 "currently %s\".\n"), 00233 gdbarch_auto_charset (get_current_arch ())); 00234 else 00235 fprintf_filtered (file, _("The target character set is \"%s\".\n"), 00236 value); 00237 } 00238 00239 static const char *target_wide_charset_name = "auto"; 00240 static void 00241 show_target_wide_charset_name (struct ui_file *file, 00242 int from_tty, 00243 struct cmd_list_element *c, 00244 const char *value) 00245 { 00246 if (!strcmp (value, "auto")) 00247 fprintf_filtered (file, 00248 _("The target wide character set is \"auto; " 00249 "currently %s\".\n"), 00250 gdbarch_auto_wide_charset (get_current_arch ())); 00251 else 00252 fprintf_filtered (file, _("The target wide character set is \"%s\".\n"), 00253 value); 00254 } 00255 00256 static const char *default_charset_names[] = 00257 { 00258 DEFAULT_CHARSET_NAMES 00259 0 00260 }; 00261 00262 static const char **charset_enum; 00263 00264 00265 /* If the target wide character set has big- or little-endian 00266 variants, these are the corresponding names. */ 00267 static const char *target_wide_charset_be_name; 00268 static const char *target_wide_charset_le_name; 00269 00270 /* The architecture for which the BE- and LE-names are valid. */ 00271 static struct gdbarch *be_le_arch; 00272 00273 /* A helper function which sets the target wide big- and little-endian 00274 character set names, if possible. */ 00275 00276 static void 00277 set_be_le_names (struct gdbarch *gdbarch) 00278 { 00279 int i, len; 00280 const char *target_wide; 00281 00282 if (be_le_arch == gdbarch) 00283 return; 00284 be_le_arch = gdbarch; 00285 00286 target_wide_charset_le_name = NULL; 00287 target_wide_charset_be_name = NULL; 00288 00289 target_wide = target_wide_charset_name; 00290 if (!strcmp (target_wide, "auto")) 00291 target_wide = gdbarch_auto_wide_charset (gdbarch); 00292 00293 len = strlen (target_wide); 00294 for (i = 0; charset_enum[i]; ++i) 00295 { 00296 if (strncmp (target_wide, charset_enum[i], len)) 00297 continue; 00298 if ((charset_enum[i][len] == 'B' 00299 || charset_enum[i][len] == 'L') 00300 && charset_enum[i][len + 1] == 'E' 00301 && charset_enum[i][len + 2] == '\0') 00302 { 00303 if (charset_enum[i][len] == 'B') 00304 target_wide_charset_be_name = charset_enum[i]; 00305 else 00306 target_wide_charset_le_name = charset_enum[i]; 00307 } 00308 } 00309 } 00310 00311 /* 'Set charset', 'set host-charset', 'set target-charset', 'set 00312 target-wide-charset', 'set charset' sfunc's. */ 00313 00314 static void 00315 validate (struct gdbarch *gdbarch) 00316 { 00317 iconv_t desc; 00318 const char *host_cset = host_charset (); 00319 const char *target_cset = target_charset (gdbarch); 00320 const char *target_wide_cset = target_wide_charset_name; 00321 00322 if (!strcmp (target_wide_cset, "auto")) 00323 target_wide_cset = gdbarch_auto_wide_charset (gdbarch); 00324 00325 desc = iconv_open (target_wide_cset, host_cset); 00326 if (desc == (iconv_t) -1) 00327 error (_("Cannot convert between character sets `%s' and `%s'"), 00328 target_wide_cset, host_cset); 00329 iconv_close (desc); 00330 00331 desc = iconv_open (target_cset, host_cset); 00332 if (desc == (iconv_t) -1) 00333 error (_("Cannot convert between character sets `%s' and `%s'"), 00334 target_cset, host_cset); 00335 iconv_close (desc); 00336 00337 /* Clear the cache. */ 00338 be_le_arch = NULL; 00339 } 00340 00341 /* This is the sfunc for the 'set charset' command. */ 00342 static void 00343 set_charset_sfunc (char *charset, int from_tty, 00344 struct cmd_list_element *c) 00345 { 00346 /* CAREFUL: set the target charset here as well. */ 00347 target_charset_name = host_charset_name; 00348 validate (get_current_arch ()); 00349 } 00350 00351 /* 'set host-charset' command sfunc. We need a wrapper here because 00352 the function needs to have a specific signature. */ 00353 static void 00354 set_host_charset_sfunc (char *charset, int from_tty, 00355 struct cmd_list_element *c) 00356 { 00357 validate (get_current_arch ()); 00358 } 00359 00360 /* Wrapper for the 'set target-charset' command. */ 00361 static void 00362 set_target_charset_sfunc (char *charset, int from_tty, 00363 struct cmd_list_element *c) 00364 { 00365 validate (get_current_arch ()); 00366 } 00367 00368 /* Wrapper for the 'set target-wide-charset' command. */ 00369 static void 00370 set_target_wide_charset_sfunc (char *charset, int from_tty, 00371 struct cmd_list_element *c) 00372 { 00373 validate (get_current_arch ()); 00374 } 00375 00376 /* sfunc for the 'show charset' command. */ 00377 static void 00378 show_charset (struct ui_file *file, int from_tty, 00379 struct cmd_list_element *c, 00380 const char *name) 00381 { 00382 show_host_charset_name (file, from_tty, c, host_charset_name); 00383 show_target_charset_name (file, from_tty, c, target_charset_name); 00384 show_target_wide_charset_name (file, from_tty, c, 00385 target_wide_charset_name); 00386 } 00387 00388 00389 /* Accessor functions. */ 00390 00391 const char * 00392 host_charset (void) 00393 { 00394 if (!strcmp (host_charset_name, "auto")) 00395 return auto_host_charset_name; 00396 return host_charset_name; 00397 } 00398 00399 const char * 00400 target_charset (struct gdbarch *gdbarch) 00401 { 00402 if (!strcmp (target_charset_name, "auto")) 00403 return gdbarch_auto_charset (gdbarch); 00404 return target_charset_name; 00405 } 00406 00407 const char * 00408 target_wide_charset (struct gdbarch *gdbarch) 00409 { 00410 enum bfd_endian byte_order = gdbarch_byte_order (gdbarch); 00411 00412 set_be_le_names (gdbarch); 00413 if (byte_order == BFD_ENDIAN_BIG) 00414 { 00415 if (target_wide_charset_be_name) 00416 return target_wide_charset_be_name; 00417 } 00418 else 00419 { 00420 if (target_wide_charset_le_name) 00421 return target_wide_charset_le_name; 00422 } 00423 00424 if (!strcmp (target_wide_charset_name, "auto")) 00425 return gdbarch_auto_wide_charset (gdbarch); 00426 00427 return target_wide_charset_name; 00428 } 00429 00430 00431 /* Host character set management. For the time being, we assume that 00432 the host character set is some superset of ASCII. */ 00433 00434 char 00435 host_letter_to_control_character (char c) 00436 { 00437 if (c == '?') 00438 return 0177; 00439 return c & 0237; 00440 } 00441 00442 /* Convert a host character, C, to its hex value. C must already have 00443 been validated using isxdigit. */ 00444 00445 int 00446 host_hex_value (char c) 00447 { 00448 if (isdigit (c)) 00449 return c - '0'; 00450 if (c >= 'a' && c <= 'f') 00451 return 10 + c - 'a'; 00452 gdb_assert (c >= 'A' && c <= 'F'); 00453 return 10 + c - 'A'; 00454 } 00455 00456 00457 /* Public character management functions. */ 00458 00459 /* A cleanup function which is run to close an iconv descriptor. */ 00460 00461 static void 00462 cleanup_iconv (void *p) 00463 { 00464 iconv_t *descp = p; 00465 iconv_close (*descp); 00466 } 00467 00468 void 00469 convert_between_encodings (const char *from, const char *to, 00470 const gdb_byte *bytes, unsigned int num_bytes, 00471 int width, struct obstack *output, 00472 enum transliterations translit) 00473 { 00474 iconv_t desc; 00475 struct cleanup *cleanups; 00476 size_t inleft; 00477 ICONV_CONST char *inp; 00478 unsigned int space_request; 00479 00480 /* Often, the host and target charsets will be the same. */ 00481 if (!strcmp (from, to)) 00482 { 00483 obstack_grow (output, bytes, num_bytes); 00484 return; 00485 } 00486 00487 desc = iconv_open (to, from); 00488 if (desc == (iconv_t) -1) 00489 perror_with_name (_("Converting character sets")); 00490 cleanups = make_cleanup (cleanup_iconv, &desc); 00491 00492 inleft = num_bytes; 00493 inp = (ICONV_CONST char *) bytes; 00494 00495 space_request = num_bytes; 00496 00497 while (inleft > 0) 00498 { 00499 char *outp; 00500 size_t outleft, r; 00501 int old_size; 00502 00503 old_size = obstack_object_size (output); 00504 obstack_blank (output, space_request); 00505 00506 outp = obstack_base (output) + old_size; 00507 outleft = space_request; 00508 00509 r = iconv (desc, &inp, &inleft, &outp, &outleft); 00510 00511 /* Now make sure that the object on the obstack only includes 00512 bytes we have converted. */ 00513 obstack_blank (output, - (int) outleft); 00514 00515 if (r == (size_t) -1) 00516 { 00517 switch (errno) 00518 { 00519 case EILSEQ: 00520 { 00521 int i; 00522 00523 /* Invalid input sequence. */ 00524 if (translit == translit_none) 00525 error (_("Could not convert character " 00526 "to `%s' character set"), to); 00527 00528 /* We emit escape sequence for the bytes, skip them, 00529 and try again. */ 00530 for (i = 0; i < width; ++i) 00531 { 00532 char octal[5]; 00533 00534 xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff); 00535 obstack_grow_str (output, octal); 00536 00537 ++inp; 00538 --inleft; 00539 } 00540 } 00541 break; 00542 00543 case E2BIG: 00544 /* We ran out of space in the output buffer. Make it 00545 bigger next time around. */ 00546 space_request *= 2; 00547 break; 00548 00549 case EINVAL: 00550 /* Incomplete input sequence. FIXME: ought to report this 00551 to the caller somehow. */ 00552 inleft = 0; 00553 break; 00554 00555 default: 00556 perror_with_name (_("Internal error while " 00557 "converting character sets")); 00558 } 00559 } 00560 } 00561 00562 do_cleanups (cleanups); 00563 } 00564 00565 00566 00567 /* An iterator that returns host wchar_t's from a target string. */ 00568 struct wchar_iterator 00569 { 00570 /* The underlying iconv descriptor. */ 00571 iconv_t desc; 00572 00573 /* The input string. This is updated as convert characters. */ 00574 const gdb_byte *input; 00575 /* The number of bytes remaining in the input. */ 00576 size_t bytes; 00577 00578 /* The width of an input character. */ 00579 size_t width; 00580 00581 /* The output buffer and its size. */ 00582 gdb_wchar_t *out; 00583 size_t out_size; 00584 }; 00585 00586 /* Create a new iterator. */ 00587 struct wchar_iterator * 00588 make_wchar_iterator (const gdb_byte *input, size_t bytes, 00589 const char *charset, size_t width) 00590 { 00591 struct wchar_iterator *result; 00592 iconv_t desc; 00593 00594 desc = iconv_open (INTERMEDIATE_ENCODING, charset); 00595 if (desc == (iconv_t) -1) 00596 perror_with_name (_("Converting character sets")); 00597 00598 result = XNEW (struct wchar_iterator); 00599 result->desc = desc; 00600 result->input = input; 00601 result->bytes = bytes; 00602 result->width = width; 00603 00604 result->out = XNEW (gdb_wchar_t); 00605 result->out_size = 1; 00606 00607 return result; 00608 } 00609 00610 static void 00611 do_cleanup_iterator (void *p) 00612 { 00613 struct wchar_iterator *iter = p; 00614 00615 iconv_close (iter->desc); 00616 xfree (iter->out); 00617 xfree (iter); 00618 } 00619 00620 struct cleanup * 00621 make_cleanup_wchar_iterator (struct wchar_iterator *iter) 00622 { 00623 return make_cleanup (do_cleanup_iterator, iter); 00624 } 00625 00626 int 00627 wchar_iterate (struct wchar_iterator *iter, 00628 enum wchar_iterate_result *out_result, 00629 gdb_wchar_t **out_chars, 00630 const gdb_byte **ptr, 00631 size_t *len) 00632 { 00633 size_t out_request; 00634 00635 /* Try to convert some characters. At first we try to convert just 00636 a single character. The reason for this is that iconv does not 00637 necessarily update its outgoing arguments when it encounters an 00638 invalid input sequence -- but we want to reliably report this to 00639 our caller so it can emit an escape sequence. */ 00640 out_request = 1; 00641 while (iter->bytes > 0) 00642 { 00643 ICONV_CONST char *inptr = (ICONV_CONST char *) iter->input; 00644 char *outptr = (char *) &iter->out[0]; 00645 const gdb_byte *orig_inptr = iter->input; 00646 size_t orig_in = iter->bytes; 00647 size_t out_avail = out_request * sizeof (gdb_wchar_t); 00648 size_t num; 00649 size_t r = iconv (iter->desc, &inptr, &iter->bytes, &outptr, &out_avail); 00650 00651 iter->input = (gdb_byte *) inptr; 00652 00653 if (r == (size_t) -1) 00654 { 00655 switch (errno) 00656 { 00657 case EILSEQ: 00658 /* Invalid input sequence. We still might have 00659 converted a character; if so, return it. */ 00660 if (out_avail < out_request * sizeof (gdb_wchar_t)) 00661 break; 00662 00663 /* Otherwise skip the first invalid character, and let 00664 the caller know about it. */ 00665 *out_result = wchar_iterate_invalid; 00666 *ptr = iter->input; 00667 *len = iter->width; 00668 iter->input += iter->width; 00669 iter->bytes -= iter->width; 00670 return 0; 00671 00672 case E2BIG: 00673 /* We ran out of space. We still might have converted a 00674 character; if so, return it. Otherwise, grow the 00675 buffer and try again. */ 00676 if (out_avail < out_request * sizeof (gdb_wchar_t)) 00677 break; 00678 00679 ++out_request; 00680 if (out_request > iter->out_size) 00681 { 00682 iter->out_size = out_request; 00683 iter->out = xrealloc (iter->out, 00684 out_request * sizeof (gdb_wchar_t)); 00685 } 00686 continue; 00687 00688 case EINVAL: 00689 /* Incomplete input sequence. Let the caller know, and 00690 arrange for future calls to see EOF. */ 00691 *out_result = wchar_iterate_incomplete; 00692 *ptr = iter->input; 00693 *len = iter->bytes; 00694 iter->bytes = 0; 00695 return 0; 00696 00697 default: 00698 perror_with_name (_("Internal error while " 00699 "converting character sets")); 00700 } 00701 } 00702 00703 /* We converted something. */ 00704 num = out_request - out_avail / sizeof (gdb_wchar_t); 00705 *out_result = wchar_iterate_ok; 00706 *out_chars = iter->out; 00707 *ptr = orig_inptr; 00708 *len = orig_in - iter->bytes; 00709 return num; 00710 } 00711 00712 /* Really done. */ 00713 *out_result = wchar_iterate_eof; 00714 return -1; 00715 } 00716 00717 00718 /* The charset.c module initialization function. */ 00719 00720 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */ 00721 00722 static VEC (char_ptr) *charsets; 00723 00724 #ifdef PHONY_ICONV 00725 00726 static void 00727 find_charset_names (void) 00728 { 00729 VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET); 00730 VEC_safe_push (char_ptr, charsets, NULL); 00731 } 00732 00733 #else /* PHONY_ICONV */ 00734 00735 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but 00736 provides different symbols in the static and dynamic libraries. 00737 So, configure may see libiconvlist but not iconvlist. But, calling 00738 iconvlist is the right thing to do and will work. Hence we do a 00739 check here but unconditionally call iconvlist below. */ 00740 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST) 00741 00742 /* A helper function that adds some character sets to the vector of 00743 all character sets. This is a callback function for iconvlist. */ 00744 00745 static int 00746 add_one (unsigned int count, const char *const *names, void *data) 00747 { 00748 unsigned int i; 00749 00750 for (i = 0; i < count; ++i) 00751 VEC_safe_push (char_ptr, charsets, xstrdup (names[i])); 00752 00753 return 0; 00754 } 00755 00756 static void 00757 find_charset_names (void) 00758 { 00759 iconvlist (add_one, NULL); 00760 VEC_safe_push (char_ptr, charsets, NULL); 00761 } 00762 00763 #else 00764 00765 /* Return non-zero if LINE (output from iconv) should be ignored. 00766 Older iconv programs (e.g. 2.2.2) include the human readable 00767 introduction even when stdout is not a tty. Newer versions omit 00768 the intro if stdout is not a tty. */ 00769 00770 static int 00771 ignore_line_p (const char *line) 00772 { 00773 /* This table is used to filter the output. If this text appears 00774 anywhere in the line, it is ignored (strstr is used). */ 00775 static const char * const ignore_lines[] = 00776 { 00777 "The following", 00778 "not necessarily", 00779 "the FROM and TO", 00780 "listed with several", 00781 NULL 00782 }; 00783 int i; 00784 00785 for (i = 0; ignore_lines[i] != NULL; ++i) 00786 { 00787 if (strstr (line, ignore_lines[i]) != NULL) 00788 return 1; 00789 } 00790 00791 return 0; 00792 } 00793 00794 static void 00795 find_charset_names (void) 00796 { 00797 struct pex_obj *child; 00798 char *args[3]; 00799 int err, status; 00800 int fail = 1; 00801 int flags; 00802 struct gdb_environ *iconv_env; 00803 char *iconv_program; 00804 00805 /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is 00806 not a tty. We need to recognize it and ignore it. This text is 00807 subject to translation, so force LANGUAGE=C. */ 00808 iconv_env = make_environ (); 00809 init_environ (iconv_env); 00810 set_in_environ (iconv_env, "LANGUAGE", "C"); 00811 set_in_environ (iconv_env, "LC_ALL", "C"); 00812 00813 child = pex_init (PEX_USE_PIPES, "iconv", NULL); 00814 00815 #ifdef ICONV_BIN 00816 { 00817 char *iconv_dir = relocate_gdb_directory (ICONV_BIN, 00818 ICONV_BIN_RELOCATABLE); 00819 iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL); 00820 xfree (iconv_dir); 00821 } 00822 #else 00823 iconv_program = xstrdup ("iconv"); 00824 #endif 00825 args[0] = iconv_program; 00826 args[1] = "-l"; 00827 args[2] = NULL; 00828 flags = PEX_STDERR_TO_STDOUT; 00829 #ifndef ICONV_BIN 00830 flags |= PEX_SEARCH; 00831 #endif 00832 /* Note that we simply ignore errors here. */ 00833 if (!pex_run_in_environment (child, flags, 00834 args[0], args, environ_vector (iconv_env), 00835 NULL, NULL, &err)) 00836 { 00837 FILE *in = pex_read_output (child, 0); 00838 00839 /* POSIX says that iconv -l uses an unspecified format. We 00840 parse the glibc and libiconv formats; feel free to add others 00841 as needed. */ 00842 00843 while (in != NULL && !feof (in)) 00844 { 00845 /* The size of buf is chosen arbitrarily. */ 00846 char buf[1024]; 00847 char *start, *r; 00848 int len; 00849 00850 r = fgets (buf, sizeof (buf), in); 00851 if (!r) 00852 break; 00853 len = strlen (r); 00854 if (len <= 3) 00855 continue; 00856 if (ignore_line_p (r)) 00857 continue; 00858 00859 /* Strip off the newline. */ 00860 --len; 00861 /* Strip off one or two '/'s. glibc will print lines like 00862 "8859_7//", but also "10646-1:1993/UCS4/". */ 00863 if (buf[len - 1] == '/') 00864 --len; 00865 if (buf[len - 1] == '/') 00866 --len; 00867 buf[len] = '\0'; 00868 00869 /* libiconv will print multiple entries per line, separated 00870 by spaces. Older iconvs will print multiple entries per 00871 line, indented by two spaces, and separated by ", " 00872 (i.e. the human readable form). */ 00873 start = buf; 00874 while (1) 00875 { 00876 int keep_going; 00877 char *p; 00878 00879 /* Skip leading blanks. */ 00880 for (p = start; *p && *p == ' '; ++p) 00881 ; 00882 start = p; 00883 /* Find the next space, comma, or end-of-line. */ 00884 for ( ; *p && *p != ' ' && *p != ','; ++p) 00885 ; 00886 /* Ignore an empty result. */ 00887 if (p == start) 00888 break; 00889 keep_going = *p; 00890 *p = '\0'; 00891 VEC_safe_push (char_ptr, charsets, xstrdup (start)); 00892 if (!keep_going) 00893 break; 00894 /* Skip any extra spaces. */ 00895 for (start = p + 1; *start && *start == ' '; ++start) 00896 ; 00897 } 00898 } 00899 00900 if (pex_get_status (child, 1, &status) 00901 && WIFEXITED (status) && !WEXITSTATUS (status)) 00902 fail = 0; 00903 00904 } 00905 00906 xfree (iconv_program); 00907 pex_free (child); 00908 free_environ (iconv_env); 00909 00910 if (fail) 00911 { 00912 /* Some error occurred, so drop the vector. */ 00913 free_char_ptr_vec (charsets); 00914 charsets = NULL; 00915 } 00916 else 00917 VEC_safe_push (char_ptr, charsets, NULL); 00918 } 00919 00920 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */ 00921 #endif /* PHONY_ICONV */ 00922 00923 /* The "auto" target charset used by default_auto_charset. */ 00924 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET; 00925 00926 const char * 00927 default_auto_charset (void) 00928 { 00929 return auto_target_charset_name; 00930 } 00931 00932 const char * 00933 default_auto_wide_charset (void) 00934 { 00935 return GDB_DEFAULT_TARGET_WIDE_CHARSET; 00936 } 00937 00938 00939 #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION 00940 /* Macro used for UTF or UCS endianness suffix. */ 00941 #if WORDS_BIGENDIAN 00942 #define ENDIAN_SUFFIX "BE" 00943 #else 00944 #define ENDIAN_SUFFIX "LE" 00945 #endif 00946 00947 /* The code below serves to generate a compile time error if 00948 gdb_wchar_t type is not of size 2 nor 4, despite the fact that 00949 macro __STDC_ISO_10646__ is defined. 00950 This is better than a gdb_assert call, because GDB cannot handle 00951 strings correctly if this size is different. */ 00952 00953 extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2 00954 || sizeof (gdb_wchar_t) == 4) 00955 ? 1 : -1]; 00956 00957 /* intermediate_encoding returns the charset unsed internally by 00958 GDB to convert between target and host encodings. As the test above 00959 compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes. 00960 UTF-16/32 is tested first, UCS-2/4 is tested as a second option, 00961 otherwise an error is generated. */ 00962 00963 const char * 00964 intermediate_encoding (void) 00965 { 00966 iconv_t desc; 00967 static const char *stored_result = NULL; 00968 char *result; 00969 00970 if (stored_result) 00971 return stored_result; 00972 result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8), 00973 ENDIAN_SUFFIX); 00974 /* Check that the name is supported by iconv_open. */ 00975 desc = iconv_open (result, host_charset ()); 00976 if (desc != (iconv_t) -1) 00977 { 00978 iconv_close (desc); 00979 stored_result = result; 00980 return result; 00981 } 00982 /* Not valid, free the allocated memory. */ 00983 xfree (result); 00984 /* Second try, with UCS-2 type. */ 00985 result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t), 00986 ENDIAN_SUFFIX); 00987 /* Check that the name is supported by iconv_open. */ 00988 desc = iconv_open (result, host_charset ()); 00989 if (desc != (iconv_t) -1) 00990 { 00991 iconv_close (desc); 00992 stored_result = result; 00993 return result; 00994 } 00995 /* Not valid, free the allocated memory. */ 00996 xfree (result); 00997 /* No valid charset found, generate error here. */ 00998 error (_("Unable to find a vaild charset for string conversions")); 00999 } 01000 01001 #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */ 01002 01003 void 01004 _initialize_charset (void) 01005 { 01006 /* The first element is always "auto". */ 01007 VEC_safe_push (char_ptr, charsets, xstrdup ("auto")); 01008 find_charset_names (); 01009 01010 if (VEC_length (char_ptr, charsets) > 1) 01011 charset_enum = (const char **) VEC_address (char_ptr, charsets); 01012 else 01013 charset_enum = default_charset_names; 01014 01015 #ifndef PHONY_ICONV 01016 #ifdef HAVE_LANGINFO_CODESET 01017 /* The result of nl_langinfo may be overwritten later. This may 01018 leak a little memory, if the user later changes the host charset, 01019 but that doesn't matter much. */ 01020 auto_host_charset_name = xstrdup (nl_langinfo (CODESET)); 01021 /* Solaris will return `646' here -- but the Solaris iconv then does 01022 not accept this. Darwin (and maybe FreeBSD) may return "" here, 01023 which GNU libiconv doesn't like (infinite loop). */ 01024 if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name) 01025 auto_host_charset_name = "ASCII"; 01026 auto_target_charset_name = auto_host_charset_name; 01027 #elif defined (USE_WIN32API) 01028 { 01029 /* "CP" + x<=5 digits + paranoia. */ 01030 static char w32_host_default_charset[16]; 01031 01032 snprintf (w32_host_default_charset, sizeof w32_host_default_charset, 01033 "CP%d", GetACP()); 01034 auto_host_charset_name = w32_host_default_charset; 01035 auto_target_charset_name = auto_host_charset_name; 01036 } 01037 #endif 01038 #endif 01039 01040 add_setshow_enum_cmd ("charset", class_support, 01041 charset_enum, &host_charset_name, _("\ 01042 Set the host and target character sets."), _("\ 01043 Show the host and target character sets."), _("\ 01044 The `host character set' is the one used by the system GDB is running on.\n\ 01045 The `target character set' is the one used by the program being debugged.\n\ 01046 You may only use supersets of ASCII for your host character set; GDB does\n\ 01047 not support any others.\n\ 01048 To see a list of the character sets GDB supports, type `set charset <TAB>'."), 01049 /* Note that the sfunc below needs to set 01050 target_charset_name, because the 'set 01051 charset' command sets two variables. */ 01052 set_charset_sfunc, 01053 show_charset, 01054 &setlist, &showlist); 01055 01056 add_setshow_enum_cmd ("host-charset", class_support, 01057 charset_enum, &host_charset_name, _("\ 01058 Set the host character set."), _("\ 01059 Show the host character set."), _("\ 01060 The `host character set' is the one used by the system GDB is running on.\n\ 01061 You may only use supersets of ASCII for your host character set; GDB does\n\ 01062 not support any others.\n\ 01063 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."), 01064 set_host_charset_sfunc, 01065 show_host_charset_name, 01066 &setlist, &showlist); 01067 01068 add_setshow_enum_cmd ("target-charset", class_support, 01069 charset_enum, &target_charset_name, _("\ 01070 Set the target character set."), _("\ 01071 Show the target character set."), _("\ 01072 The `target character set' is the one used by the program being debugged.\n\ 01073 GDB translates characters and strings between the host and target\n\ 01074 character sets as needed.\n\ 01075 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"), 01076 set_target_charset_sfunc, 01077 show_target_charset_name, 01078 &setlist, &showlist); 01079 01080 add_setshow_enum_cmd ("target-wide-charset", class_support, 01081 charset_enum, &target_wide_charset_name, 01082 _("\ 01083 Set the target wide character set."), _("\ 01084 Show the target wide character set."), _("\ 01085 The `target wide character set' is the one used by the program being debugged.\ 01086 \nIn particular it is the encoding used by `wchar_t'.\n\ 01087 GDB translates characters and strings between the host and target\n\ 01088 character sets as needed.\n\ 01089 To see a list of the character sets GDB supports, type\n\ 01090 `set target-wide-charset'<TAB>"), 01091 set_target_wide_charset_sfunc, 01092 show_target_wide_charset_name, 01093 &setlist, &showlist); 01094 }