GDB (API): /home/stan/gdb/src/gdb/charset.c Source File

Go to the documentation of this file.
00001 /* Character set conversion support for GDB.
00002 
00003    Copyright (C) 2001-2013 Free Software Foundation, Inc.
00004 
00005    This file is part of GDB.
00006 
00007    This program is free software; you can redistribute it and/or modify
00008    it under the terms of the GNU General Public License as published by
00009    the Free Software Foundation; either version 3 of the License, or
00010    (at your option) any later version.
00011 
00012    This program is distributed in the hope that it will be useful,
00013    but WITHOUT ANY WARRANTY; without even the implied warranty of
00014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015    GNU General Public License for more details.
00016 
00017    You should have received a copy of the GNU General Public License
00018    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
00019 
00020 #include "defs.h"
00021 #include "charset.h"
00022 #include "gdbcmd.h"
00023 #include "gdb_assert.h"
00024 #include "gdb_obstack.h"
00025 #include "gdb_wait.h"
00026 #include "charset-list.h"
00027 #include "vec.h"
00028 #include "environ.h"
00029 #include "arch-utils.h"
00030 #include "gdb_vecs.h"
00031 
00032 #include <stddef.h>
00033 #include "gdb_string.h"
00034 #include <ctype.h>
00035 
00036 #ifdef USE_WIN32API
00037 #include <windows.h>
00038 #endif
00039 
00040 /* How GDB's character set support works
00041 
00042    GDB has three global settings:
00043 
00044    - The `current host character set' is the character set GDB should
00045      use in talking to the user, and which (hopefully) the user's
00046      terminal knows how to display properly.  Most users should not
00047      change this.
00048 
00049    - The `current target character set' is the character set the
00050      program being debugged uses.
00051 
00052    - The `current target wide character set' is the wide character set
00053      the program being debugged uses, that is, the encoding used for
00054      wchar_t.
00055 
00056    There are commands to set each of these, and mechanisms for
00057    choosing reasonable default values.  GDB has a global list of
00058    character sets that it can use as its host or target character
00059    sets.
00060 
00061    The header file `charset.h' declares various functions that
00062    different pieces of GDB need to perform tasks like:
00063 
00064    - printing target strings and characters to the user's terminal
00065      (mostly target->host conversions),
00066 
00067    - building target-appropriate representations of strings and
00068      characters the user enters in expressions (mostly host->target
00069      conversions),
00070 
00071      and so on.
00072      
00073    To avoid excessive code duplication and maintenance efforts,
00074    GDB simply requires a capable iconv function.  Users on platforms
00075    without a suitable iconv can use the GNU iconv library.  */
00076 
00077 
00078 #ifdef PHONY_ICONV
00079 
00080 /* Provide a phony iconv that does as little as possible.  Also,
00081    arrange for there to be a single available character set.  */
00082 
00083 #undef GDB_DEFAULT_HOST_CHARSET
00084 #define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
00085 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
00086 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "ISO-8859-1"
00087 #undef DEFAULT_CHARSET_NAMES
00088 #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET ,
00089 
00090 #undef iconv_t
00091 #define iconv_t int
00092 #undef iconv_open
00093 #define iconv_open phony_iconv_open
00094 #undef iconv
00095 #define iconv phony_iconv
00096 #undef iconv_close
00097 #define iconv_close phony_iconv_close
00098 
00099 #undef ICONV_CONST
00100 #define ICONV_CONST const
00101 
00102 /* Some systems don't have EILSEQ, so we define it here, but not as
00103    EINVAL, because callers of `iconv' want to distinguish EINVAL and
00104    EILSEQ.  This is what iconv.h from libiconv does as well.  Note
00105    that wchar.h may also define EILSEQ, so this needs to be after we
00106    include wchar.h, which happens in defs.h through gdb_wchar.h.  */
00107 #ifndef EILSEQ
00108 #define EILSEQ ENOENT
00109 #endif
00110 
00111 static iconv_t
00112 phony_iconv_open (const char *to, const char *from)
00113 {
00114   /* We allow conversions from UTF-32BE, wchar_t, and the host charset.
00115      We allow conversions to wchar_t and the host charset.  */
00116   if (strcmp (from, "UTF-32BE") && strcmp (from, "wchar_t")
00117       && strcmp (from, GDB_DEFAULT_HOST_CHARSET))
00118     return -1;
00119   if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET))
00120     return -1;
00121 
00122   /* Return 1 if we are converting from UTF-32BE, 0 otherwise.  This is
00123      used as a flag in calls to iconv.  */
00124   return !strcmp (from, "UTF-32BE");
00125 }
00126 
00127 static int
00128 phony_iconv_close (iconv_t arg)
00129 {
00130   return 0;
00131 }
00132 
00133 static size_t
00134 phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft,
00135              char **outbuf, size_t *outbytesleft)
00136 {
00137   if (utf_flag)
00138     {
00139       while (*inbytesleft >= 4)
00140         {
00141           size_t j;
00142           unsigned long c = 0;
00143 
00144           for (j = 0; j < 4; ++j)
00145             {
00146               c <<= 8;
00147               c += (*inbuf)[j] & 0xff;
00148             }
00149 
00150           if (c >= 256)
00151             {
00152               errno = EILSEQ;
00153               return -1;
00154             }
00155           **outbuf = c & 0xff;
00156           ++*outbuf;
00157           --*outbytesleft;
00158 
00159           ++*inbuf;
00160           *inbytesleft -= 4;
00161         }
00162       if (*inbytesleft < 4)
00163         {
00164           errno = EINVAL;
00165           return -1;
00166         }
00167     }
00168   else
00169     {
00170       /* In all other cases we simply copy input bytes to the
00171          output.  */
00172       size_t amt = *inbytesleft;
00173 
00174       if (amt > *outbytesleft)
00175         amt = *outbytesleft;
00176       memcpy (*outbuf, *inbuf, amt);
00177       *inbuf += amt;
00178       *outbuf += amt;
00179       *inbytesleft -= amt;
00180       *outbytesleft -= amt;
00181     }
00182 
00183   if (*inbytesleft)
00184     {
00185       errno = E2BIG;
00186       return -1;
00187     }
00188 
00189   /* The number of non-reversible conversions -- but they were all
00190      reversible.  */
00191   return 0;
00192 }
00193 
00194 #endif
00195 
00196 
00197 
00198 /* The global lists of character sets and translations.  */
00199 
00200 
00201 #ifndef GDB_DEFAULT_TARGET_CHARSET
00202 #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
00203 #endif
00204 
00205 #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET
00206 #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32"
00207 #endif
00208 
00209 static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET;
00210 static const char *host_charset_name = "auto";
00211 static void
00212 show_host_charset_name (struct ui_file *file, int from_tty,
00213                         struct cmd_list_element *c,
00214                         const char *value)
00215 {
00216   if (!strcmp (value, "auto"))
00217     fprintf_filtered (file,
00218                       _("The host character set is \"auto; currently %s\".\n"),
00219                       auto_host_charset_name);
00220   else
00221     fprintf_filtered (file, _("The host character set is \"%s\".\n"), value);
00222 }
00223 
00224 static const char *target_charset_name = "auto";
00225 static void
00226 show_target_charset_name (struct ui_file *file, int from_tty,
00227                           struct cmd_list_element *c, const char *value)
00228 {
00229   if (!strcmp (value, "auto"))
00230     fprintf_filtered (file,
00231                       _("The target character set is \"auto; "
00232                         "currently %s\".\n"),
00233                       gdbarch_auto_charset (get_current_arch ()));
00234   else
00235     fprintf_filtered (file, _("The target character set is \"%s\".\n"),
00236                       value);
00237 }
00238 
00239 static const char *target_wide_charset_name = "auto";
00240 static void
00241 show_target_wide_charset_name (struct ui_file *file, 
00242                                int from_tty,
00243                                struct cmd_list_element *c, 
00244                                const char *value)
00245 {
00246   if (!strcmp (value, "auto"))
00247     fprintf_filtered (file,
00248                       _("The target wide character set is \"auto; "
00249                         "currently %s\".\n"),
00250                       gdbarch_auto_wide_charset (get_current_arch ()));
00251   else
00252     fprintf_filtered (file, _("The target wide character set is \"%s\".\n"),
00253                       value);
00254 }
00255 
00256 static const char *default_charset_names[] =
00257 {
00258   DEFAULT_CHARSET_NAMES
00259   0
00260 };
00261 
00262 static const char **charset_enum;
00263 
00264 
00265 /* If the target wide character set has big- or little-endian
00266    variants, these are the corresponding names.  */
00267 static const char *target_wide_charset_be_name;
00268 static const char *target_wide_charset_le_name;
00269 
00270 /* The architecture for which the BE- and LE-names are valid.  */
00271 static struct gdbarch *be_le_arch;
00272 
00273 /* A helper function which sets the target wide big- and little-endian
00274    character set names, if possible.  */
00275 
00276 static void
00277 set_be_le_names (struct gdbarch *gdbarch)
00278 {
00279   int i, len;
00280   const char *target_wide;
00281 
00282   if (be_le_arch == gdbarch)
00283     return;
00284   be_le_arch = gdbarch;
00285 
00286   target_wide_charset_le_name = NULL;
00287   target_wide_charset_be_name = NULL;
00288 
00289   target_wide = target_wide_charset_name;
00290   if (!strcmp (target_wide, "auto"))
00291     target_wide = gdbarch_auto_wide_charset (gdbarch);
00292 
00293   len = strlen (target_wide);
00294   for (i = 0; charset_enum[i]; ++i)
00295     {
00296       if (strncmp (target_wide, charset_enum[i], len))
00297         continue;
00298       if ((charset_enum[i][len] == 'B'
00299            || charset_enum[i][len] == 'L')
00300           && charset_enum[i][len + 1] == 'E'
00301           && charset_enum[i][len + 2] == '\0')
00302         {
00303           if (charset_enum[i][len] == 'B')
00304             target_wide_charset_be_name = charset_enum[i];
00305           else
00306             target_wide_charset_le_name = charset_enum[i];
00307         }
00308     }
00309 }
00310 
00311 /* 'Set charset', 'set host-charset', 'set target-charset', 'set
00312    target-wide-charset', 'set charset' sfunc's.  */
00313 
00314 static void
00315 validate (struct gdbarch *gdbarch)
00316 {
00317   iconv_t desc;
00318   const char *host_cset = host_charset ();
00319   const char *target_cset = target_charset (gdbarch);
00320   const char *target_wide_cset = target_wide_charset_name;
00321 
00322   if (!strcmp (target_wide_cset, "auto"))
00323     target_wide_cset = gdbarch_auto_wide_charset (gdbarch);
00324 
00325   desc = iconv_open (target_wide_cset, host_cset);
00326   if (desc == (iconv_t) -1)
00327     error (_("Cannot convert between character sets `%s' and `%s'"),
00328            target_wide_cset, host_cset);
00329   iconv_close (desc);
00330 
00331   desc = iconv_open (target_cset, host_cset);
00332   if (desc == (iconv_t) -1)
00333     error (_("Cannot convert between character sets `%s' and `%s'"),
00334            target_cset, host_cset);
00335   iconv_close (desc);
00336 
00337   /* Clear the cache.  */
00338   be_le_arch = NULL;
00339 }
00340 
00341 /* This is the sfunc for the 'set charset' command.  */
00342 static void
00343 set_charset_sfunc (char *charset, int from_tty, 
00344                    struct cmd_list_element *c)
00345 {
00346   /* CAREFUL: set the target charset here as well.  */
00347   target_charset_name = host_charset_name;
00348   validate (get_current_arch ());
00349 }
00350 
00351 /* 'set host-charset' command sfunc.  We need a wrapper here because
00352    the function needs to have a specific signature.  */
00353 static void
00354 set_host_charset_sfunc (char *charset, int from_tty,
00355                         struct cmd_list_element *c)
00356 {
00357   validate (get_current_arch ());
00358 }
00359 
00360 /* Wrapper for the 'set target-charset' command.  */
00361 static void
00362 set_target_charset_sfunc (char *charset, int from_tty,
00363                           struct cmd_list_element *c)
00364 {
00365   validate (get_current_arch ());
00366 }
00367 
00368 /* Wrapper for the 'set target-wide-charset' command.  */
00369 static void
00370 set_target_wide_charset_sfunc (char *charset, int from_tty,
00371                                struct cmd_list_element *c)
00372 {
00373   validate (get_current_arch ());
00374 }
00375 
00376 /* sfunc for the 'show charset' command.  */
00377 static void
00378 show_charset (struct ui_file *file, int from_tty, 
00379               struct cmd_list_element *c,
00380               const char *name)
00381 {
00382   show_host_charset_name (file, from_tty, c, host_charset_name);
00383   show_target_charset_name (file, from_tty, c, target_charset_name);
00384   show_target_wide_charset_name (file, from_tty, c, 
00385                                  target_wide_charset_name);
00386 }
00387 
00388 
00389 /* Accessor functions.  */
00390 
00391 const char *
00392 host_charset (void)
00393 {
00394   if (!strcmp (host_charset_name, "auto"))
00395     return auto_host_charset_name;
00396   return host_charset_name;
00397 }
00398 
00399 const char *
00400 target_charset (struct gdbarch *gdbarch)
00401 {
00402   if (!strcmp (target_charset_name, "auto"))
00403     return gdbarch_auto_charset (gdbarch);
00404   return target_charset_name;
00405 }
00406 
00407 const char *
00408 target_wide_charset (struct gdbarch *gdbarch)
00409 {
00410   enum bfd_endian byte_order = gdbarch_byte_order (gdbarch);
00411 
00412   set_be_le_names (gdbarch);
00413   if (byte_order == BFD_ENDIAN_BIG)
00414     {
00415       if (target_wide_charset_be_name)
00416         return target_wide_charset_be_name;
00417     }
00418   else
00419     {
00420       if (target_wide_charset_le_name)
00421         return target_wide_charset_le_name;
00422     }
00423 
00424   if (!strcmp (target_wide_charset_name, "auto"))
00425     return gdbarch_auto_wide_charset (gdbarch);
00426 
00427   return target_wide_charset_name;
00428 }
00429 
00430 
00431 /* Host character set management.  For the time being, we assume that
00432    the host character set is some superset of ASCII.  */
00433 
00434 char
00435 host_letter_to_control_character (char c)
00436 {
00437   if (c == '?')
00438     return 0177;
00439   return c & 0237;
00440 }
00441 
00442 /* Convert a host character, C, to its hex value.  C must already have
00443    been validated using isxdigit.  */
00444 
00445 int
00446 host_hex_value (char c)
00447 {
00448   if (isdigit (c))
00449     return c - '0';
00450   if (c >= 'a' && c <= 'f')
00451     return 10 + c - 'a';
00452   gdb_assert (c >= 'A' && c <= 'F');
00453   return 10 + c - 'A';
00454 }
00455 
00456 
00457 /* Public character management functions.  */
00458 
00459 /* A cleanup function which is run to close an iconv descriptor.  */
00460 
00461 static void
00462 cleanup_iconv (void *p)
00463 {
00464   iconv_t *descp = p;
00465   iconv_close (*descp);
00466 }
00467 
00468 void
00469 convert_between_encodings (const char *from, const char *to,
00470                            const gdb_byte *bytes, unsigned int num_bytes,
00471                            int width, struct obstack *output,
00472                            enum transliterations translit)
00473 {
00474   iconv_t desc;
00475   struct cleanup *cleanups;
00476   size_t inleft;
00477   ICONV_CONST char *inp;
00478   unsigned int space_request;
00479 
00480   /* Often, the host and target charsets will be the same.  */
00481   if (!strcmp (from, to))
00482     {
00483       obstack_grow (output, bytes, num_bytes);
00484       return;
00485     }
00486 
00487   desc = iconv_open (to, from);
00488   if (desc == (iconv_t) -1)
00489     perror_with_name (_("Converting character sets"));
00490   cleanups = make_cleanup (cleanup_iconv, &desc);
00491 
00492   inleft = num_bytes;
00493   inp = (ICONV_CONST char *) bytes;
00494 
00495   space_request = num_bytes;
00496 
00497   while (inleft > 0)
00498     {
00499       char *outp;
00500       size_t outleft, r;
00501       int old_size;
00502 
00503       old_size = obstack_object_size (output);
00504       obstack_blank (output, space_request);
00505 
00506       outp = obstack_base (output) + old_size;
00507       outleft = space_request;
00508 
00509       r = iconv (desc, &inp, &inleft, &outp, &outleft);
00510 
00511       /* Now make sure that the object on the obstack only includes
00512          bytes we have converted.  */
00513       obstack_blank (output, - (int) outleft);
00514 
00515       if (r == (size_t) -1)
00516         {
00517           switch (errno)
00518             {
00519             case EILSEQ:
00520               {
00521                 int i;
00522 
00523                 /* Invalid input sequence.  */
00524                 if (translit == translit_none)
00525                   error (_("Could not convert character "
00526                            "to `%s' character set"), to);
00527 
00528                 /* We emit escape sequence for the bytes, skip them,
00529                    and try again.  */
00530                 for (i = 0; i < width; ++i)
00531                   {
00532                     char octal[5];
00533 
00534                     xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff);
00535                     obstack_grow_str (output, octal);
00536 
00537                     ++inp;
00538                     --inleft;
00539                   }
00540               }
00541               break;
00542 
00543             case E2BIG:
00544               /* We ran out of space in the output buffer.  Make it
00545                  bigger next time around.  */
00546               space_request *= 2;
00547               break;
00548 
00549             case EINVAL:
00550               /* Incomplete input sequence.  FIXME: ought to report this
00551                  to the caller somehow.  */
00552               inleft = 0;
00553               break;
00554 
00555             default:
00556               perror_with_name (_("Internal error while "
00557                                   "converting character sets"));
00558             }
00559         }
00560     }
00561 
00562   do_cleanups (cleanups);
00563 }
00564 
00565 
00566 
00567 /* An iterator that returns host wchar_t's from a target string.  */
00568 struct wchar_iterator
00569 {
00570   /* The underlying iconv descriptor.  */
00571   iconv_t desc;
00572 
00573   /* The input string.  This is updated as convert characters.  */
00574   const gdb_byte *input;
00575   /* The number of bytes remaining in the input.  */
00576   size_t bytes;
00577 
00578   /* The width of an input character.  */
00579   size_t width;
00580 
00581   /* The output buffer and its size.  */
00582   gdb_wchar_t *out;
00583   size_t out_size;
00584 };
00585 
00586 /* Create a new iterator.  */
00587 struct wchar_iterator *
00588 make_wchar_iterator (const gdb_byte *input, size_t bytes, 
00589                      const char *charset, size_t width)
00590 {
00591   struct wchar_iterator *result;
00592   iconv_t desc;
00593 
00594   desc = iconv_open (INTERMEDIATE_ENCODING, charset);
00595   if (desc == (iconv_t) -1)
00596     perror_with_name (_("Converting character sets"));
00597 
00598   result = XNEW (struct wchar_iterator);
00599   result->desc = desc;
00600   result->input = input;
00601   result->bytes = bytes;
00602   result->width = width;
00603 
00604   result->out = XNEW (gdb_wchar_t);
00605   result->out_size = 1;
00606 
00607   return result;
00608 }
00609 
00610 static void
00611 do_cleanup_iterator (void *p)
00612 {
00613   struct wchar_iterator *iter = p;
00614 
00615   iconv_close (iter->desc);
00616   xfree (iter->out);
00617   xfree (iter);
00618 }
00619 
00620 struct cleanup *
00621 make_cleanup_wchar_iterator (struct wchar_iterator *iter)
00622 {
00623   return make_cleanup (do_cleanup_iterator, iter);
00624 }
00625 
00626 int
00627 wchar_iterate (struct wchar_iterator *iter,
00628                enum wchar_iterate_result *out_result,
00629                gdb_wchar_t **out_chars,
00630                const gdb_byte **ptr,
00631                size_t *len)
00632 {
00633   size_t out_request;
00634 
00635   /* Try to convert some characters.  At first we try to convert just
00636      a single character.  The reason for this is that iconv does not
00637      necessarily update its outgoing arguments when it encounters an
00638      invalid input sequence -- but we want to reliably report this to
00639      our caller so it can emit an escape sequence.  */
00640   out_request = 1;
00641   while (iter->bytes > 0)
00642     {
00643       ICONV_CONST char *inptr = (ICONV_CONST char *) iter->input;
00644       char *outptr = (char *) &iter->out[0];
00645       const gdb_byte *orig_inptr = iter->input;
00646       size_t orig_in = iter->bytes;
00647       size_t out_avail = out_request * sizeof (gdb_wchar_t);
00648       size_t num;
00649       size_t r = iconv (iter->desc, &inptr, &iter->bytes, &outptr, &out_avail);
00650 
00651       iter->input = (gdb_byte *) inptr;
00652 
00653       if (r == (size_t) -1)
00654         {
00655           switch (errno)
00656             {
00657             case EILSEQ:
00658               /* Invalid input sequence.  We still might have
00659                  converted a character; if so, return it.  */
00660               if (out_avail < out_request * sizeof (gdb_wchar_t))
00661                 break;
00662               
00663               /* Otherwise skip the first invalid character, and let
00664                  the caller know about it.  */
00665               *out_result = wchar_iterate_invalid;
00666               *ptr = iter->input;
00667               *len = iter->width;
00668               iter->input += iter->width;
00669               iter->bytes -= iter->width;
00670               return 0;
00671 
00672             case E2BIG:
00673               /* We ran out of space.  We still might have converted a
00674                  character; if so, return it.  Otherwise, grow the
00675                  buffer and try again.  */
00676               if (out_avail < out_request * sizeof (gdb_wchar_t))
00677                 break;
00678 
00679               ++out_request;
00680               if (out_request > iter->out_size)
00681                 {
00682                   iter->out_size = out_request;
00683                   iter->out = xrealloc (iter->out,
00684                                         out_request * sizeof (gdb_wchar_t));
00685                 }
00686               continue;
00687 
00688             case EINVAL:
00689               /* Incomplete input sequence.  Let the caller know, and
00690                  arrange for future calls to see EOF.  */
00691               *out_result = wchar_iterate_incomplete;
00692               *ptr = iter->input;
00693               *len = iter->bytes;
00694               iter->bytes = 0;
00695               return 0;
00696 
00697             default:
00698               perror_with_name (_("Internal error while "
00699                                   "converting character sets"));
00700             }
00701         }
00702 
00703       /* We converted something.  */
00704       num = out_request - out_avail / sizeof (gdb_wchar_t);
00705       *out_result = wchar_iterate_ok;
00706       *out_chars = iter->out;
00707       *ptr = orig_inptr;
00708       *len = orig_in - iter->bytes;
00709       return num;
00710     }
00711 
00712   /* Really done.  */
00713   *out_result = wchar_iterate_eof;
00714   return -1;
00715 }
00716 
00717 
00718 /* The charset.c module initialization function.  */
00719 
00720 extern initialize_file_ftype _initialize_charset; /* -Wmissing-prototype */
00721 
00722 static VEC (char_ptr) *charsets;
00723 
00724 #ifdef PHONY_ICONV
00725 
00726 static void
00727 find_charset_names (void)
00728 {
00729   VEC_safe_push (char_ptr, charsets, GDB_DEFAULT_HOST_CHARSET);
00730   VEC_safe_push (char_ptr, charsets, NULL);
00731 }
00732 
00733 #else /* PHONY_ICONV */
00734 
00735 /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but
00736    provides different symbols in the static and dynamic libraries.
00737    So, configure may see libiconvlist but not iconvlist.  But, calling
00738    iconvlist is the right thing to do and will work.  Hence we do a
00739    check here but unconditionally call iconvlist below.  */
00740 #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST)
00741 
00742 /* A helper function that adds some character sets to the vector of
00743    all character sets.  This is a callback function for iconvlist.  */
00744 
00745 static int
00746 add_one (unsigned int count, const char *const *names, void *data)
00747 {
00748   unsigned int i;
00749 
00750   for (i = 0; i < count; ++i)
00751     VEC_safe_push (char_ptr, charsets, xstrdup (names[i]));
00752 
00753   return 0;
00754 }
00755 
00756 static void
00757 find_charset_names (void)
00758 {
00759   iconvlist (add_one, NULL);
00760   VEC_safe_push (char_ptr, charsets, NULL);
00761 }
00762 
00763 #else
00764 
00765 /* Return non-zero if LINE (output from iconv) should be ignored.
00766    Older iconv programs (e.g. 2.2.2) include the human readable
00767    introduction even when stdout is not a tty.  Newer versions omit
00768    the intro if stdout is not a tty.  */
00769 
00770 static int
00771 ignore_line_p (const char *line)
00772 {
00773   /* This table is used to filter the output.  If this text appears
00774      anywhere in the line, it is ignored (strstr is used).  */
00775   static const char * const ignore_lines[] =
00776     {
00777       "The following",
00778       "not necessarily",
00779       "the FROM and TO",
00780       "listed with several",
00781       NULL
00782     };
00783   int i;
00784 
00785   for (i = 0; ignore_lines[i] != NULL; ++i)
00786     {
00787       if (strstr (line, ignore_lines[i]) != NULL)
00788         return 1;
00789     }
00790 
00791   return 0;
00792 }
00793 
00794 static void
00795 find_charset_names (void)
00796 {
00797   struct pex_obj *child;
00798   char *args[3];
00799   int err, status;
00800   int fail = 1;
00801   int flags;
00802   struct gdb_environ *iconv_env;
00803   char *iconv_program;
00804 
00805   /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is
00806      not a tty.  We need to recognize it and ignore it.  This text is
00807      subject to translation, so force LANGUAGE=C.  */
00808   iconv_env = make_environ ();
00809   init_environ (iconv_env);
00810   set_in_environ (iconv_env, "LANGUAGE", "C");
00811   set_in_environ (iconv_env, "LC_ALL", "C");
00812 
00813   child = pex_init (PEX_USE_PIPES, "iconv", NULL);
00814 
00815 #ifdef ICONV_BIN
00816   {
00817     char *iconv_dir = relocate_gdb_directory (ICONV_BIN,
00818                                               ICONV_BIN_RELOCATABLE);
00819     iconv_program = concat (iconv_dir, SLASH_STRING, "iconv", NULL);
00820     xfree (iconv_dir);
00821   }
00822 #else
00823   iconv_program = xstrdup ("iconv");
00824 #endif
00825   args[0] = iconv_program;
00826   args[1] = "-l";
00827   args[2] = NULL;
00828   flags = PEX_STDERR_TO_STDOUT;
00829 #ifndef ICONV_BIN
00830   flags |= PEX_SEARCH;
00831 #endif
00832   /* Note that we simply ignore errors here.  */
00833   if (!pex_run_in_environment (child, flags,
00834                                args[0], args, environ_vector (iconv_env),
00835                                NULL, NULL, &err))
00836     {
00837       FILE *in = pex_read_output (child, 0);
00838 
00839       /* POSIX says that iconv -l uses an unspecified format.  We
00840          parse the glibc and libiconv formats; feel free to add others
00841          as needed.  */
00842 
00843       while (in != NULL && !feof (in))
00844         {
00845           /* The size of buf is chosen arbitrarily.  */
00846           char buf[1024];
00847           char *start, *r;
00848           int len;
00849 
00850           r = fgets (buf, sizeof (buf), in);
00851           if (!r)
00852             break;
00853           len = strlen (r);
00854           if (len <= 3)
00855             continue;
00856           if (ignore_line_p (r))
00857             continue;
00858 
00859           /* Strip off the newline.  */
00860           --len;
00861           /* Strip off one or two '/'s.  glibc will print lines like
00862              "8859_7//", but also "10646-1:1993/UCS4/".  */
00863           if (buf[len - 1] == '/')
00864             --len;
00865           if (buf[len - 1] == '/')
00866             --len;
00867           buf[len] = '\0';
00868 
00869           /* libiconv will print multiple entries per line, separated
00870              by spaces.  Older iconvs will print multiple entries per
00871              line, indented by two spaces, and separated by ", "
00872              (i.e. the human readable form).  */
00873           start = buf;
00874           while (1)
00875             {
00876               int keep_going;
00877               char *p;
00878 
00879               /* Skip leading blanks.  */
00880               for (p = start; *p && *p == ' '; ++p)
00881                 ;
00882               start = p;
00883               /* Find the next space, comma, or end-of-line.  */
00884               for ( ; *p && *p != ' ' && *p != ','; ++p)
00885                 ;
00886               /* Ignore an empty result.  */
00887               if (p == start)
00888                 break;
00889               keep_going = *p;
00890               *p = '\0';
00891               VEC_safe_push (char_ptr, charsets, xstrdup (start));
00892               if (!keep_going)
00893                 break;
00894               /* Skip any extra spaces.  */
00895               for (start = p + 1; *start && *start == ' '; ++start)
00896                 ;
00897             }
00898         }
00899 
00900       if (pex_get_status (child, 1, &status)
00901           && WIFEXITED (status) && !WEXITSTATUS (status))
00902         fail = 0;
00903 
00904     }
00905 
00906   xfree (iconv_program);
00907   pex_free (child);
00908   free_environ (iconv_env);
00909 
00910   if (fail)
00911     {
00912       /* Some error occurred, so drop the vector.  */
00913       free_char_ptr_vec (charsets);
00914       charsets = NULL;
00915     }
00916   else
00917     VEC_safe_push (char_ptr, charsets, NULL);
00918 }
00919 
00920 #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */
00921 #endif /* PHONY_ICONV */
00922 
00923 /* The "auto" target charset used by default_auto_charset.  */
00924 static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET;
00925 
00926 const char *
00927 default_auto_charset (void)
00928 {
00929   return auto_target_charset_name;
00930 }
00931 
00932 const char *
00933 default_auto_wide_charset (void)
00934 {
00935   return GDB_DEFAULT_TARGET_WIDE_CHARSET;
00936 }
00937 
00938 
00939 #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION
00940 /* Macro used for UTF or UCS endianness suffix.  */
00941 #if WORDS_BIGENDIAN
00942 #define ENDIAN_SUFFIX "BE"
00943 #else
00944 #define ENDIAN_SUFFIX "LE"
00945 #endif
00946 
00947 /* The code below serves to generate a compile time error if
00948    gdb_wchar_t type is not of size 2 nor 4, despite the fact that
00949    macro __STDC_ISO_10646__ is defined.
00950    This is better than a gdb_assert call, because GDB cannot handle
00951    strings correctly if this size is different.  */
00952 
00953 extern char your_gdb_wchar_t_is_bogus[(sizeof (gdb_wchar_t) == 2
00954                                        || sizeof (gdb_wchar_t) == 4)
00955                                       ? 1 : -1];
00956 
00957 /* intermediate_encoding returns the charset unsed internally by
00958    GDB to convert between target and host encodings. As the test above
00959    compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes.
00960    UTF-16/32 is tested first, UCS-2/4 is tested as a second option,
00961    otherwise an error is generated.  */
00962 
00963 const char *
00964 intermediate_encoding (void)
00965 {
00966   iconv_t desc;
00967   static const char *stored_result = NULL;
00968   char *result;
00969 
00970   if (stored_result)
00971     return stored_result;
00972   result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8),
00973                        ENDIAN_SUFFIX);
00974   /* Check that the name is supported by iconv_open.  */
00975   desc = iconv_open (result, host_charset ());
00976   if (desc != (iconv_t) -1)
00977     {
00978       iconv_close (desc);
00979       stored_result = result;
00980       return result;
00981     }
00982   /* Not valid, free the allocated memory.  */
00983   xfree (result);
00984   /* Second try, with UCS-2 type.  */
00985   result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t),
00986                        ENDIAN_SUFFIX);
00987   /* Check that the name is supported by iconv_open.  */
00988   desc = iconv_open (result, host_charset ());
00989   if (desc != (iconv_t) -1)
00990     {
00991       iconv_close (desc);
00992       stored_result = result;
00993       return result;
00994     }
00995   /* Not valid, free the allocated memory.  */
00996   xfree (result);
00997   /* No valid charset found, generate error here.  */
00998   error (_("Unable to find a vaild charset for string conversions"));
00999 }
01000 
01001 #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */
01002 
01003 void
01004 _initialize_charset (void)
01005 {
01006   /* The first element is always "auto".  */
01007   VEC_safe_push (char_ptr, charsets, xstrdup ("auto"));
01008   find_charset_names ();
01009 
01010   if (VEC_length (char_ptr, charsets) > 1)
01011     charset_enum = (const char **) VEC_address (char_ptr, charsets);
01012   else
01013     charset_enum = default_charset_names;
01014 
01015 #ifndef PHONY_ICONV
01016 #ifdef HAVE_LANGINFO_CODESET
01017   /* The result of nl_langinfo may be overwritten later.  This may
01018      leak a little memory, if the user later changes the host charset,
01019      but that doesn't matter much.  */
01020   auto_host_charset_name = xstrdup (nl_langinfo (CODESET));
01021   /* Solaris will return `646' here -- but the Solaris iconv then does
01022      not accept this.  Darwin (and maybe FreeBSD) may return "" here,
01023      which GNU libiconv doesn't like (infinite loop).  */
01024   if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name)
01025     auto_host_charset_name = "ASCII";
01026   auto_target_charset_name = auto_host_charset_name;
01027 #elif defined (USE_WIN32API)
01028   {
01029     /* "CP" + x<=5 digits + paranoia.  */
01030     static char w32_host_default_charset[16];
01031 
01032     snprintf (w32_host_default_charset, sizeof w32_host_default_charset,
01033               "CP%d", GetACP());
01034     auto_host_charset_name = w32_host_default_charset;
01035     auto_target_charset_name = auto_host_charset_name;
01036   }
01037 #endif
01038 #endif
01039 
01040   add_setshow_enum_cmd ("charset", class_support,
01041                         charset_enum, &host_charset_name, _("\
01042 Set the host and target character sets."), _("\
01043 Show the host and target character sets."), _("\
01044 The `host character set' is the one used by the system GDB is running on.\n\
01045 The `target character set' is the one used by the program being debugged.\n\
01046 You may only use supersets of ASCII for your host character set; GDB does\n\
01047 not support any others.\n\
01048 To see a list of the character sets GDB supports, type `set charset <TAB>'."),
01049                         /* Note that the sfunc below needs to set
01050                            target_charset_name, because the 'set
01051                            charset' command sets two variables.  */
01052                         set_charset_sfunc,
01053                         show_charset,
01054                         &setlist, &showlist);
01055 
01056   add_setshow_enum_cmd ("host-charset", class_support,
01057                         charset_enum, &host_charset_name, _("\
01058 Set the host character set."), _("\
01059 Show the host character set."), _("\
01060 The `host character set' is the one used by the system GDB is running on.\n\
01061 You may only use supersets of ASCII for your host character set; GDB does\n\
01062 not support any others.\n\
01063 To see a list of the character sets GDB supports, type `set host-charset <TAB>'."),
01064                         set_host_charset_sfunc,
01065                         show_host_charset_name,
01066                         &setlist, &showlist);
01067 
01068   add_setshow_enum_cmd ("target-charset", class_support,
01069                         charset_enum, &target_charset_name, _("\
01070 Set the target character set."), _("\
01071 Show the target character set."), _("\
01072 The `target character set' is the one used by the program being debugged.\n\
01073 GDB translates characters and strings between the host and target\n\
01074 character sets as needed.\n\
01075 To see a list of the character sets GDB supports, type `set target-charset'<TAB>"),
01076                         set_target_charset_sfunc,
01077                         show_target_charset_name,
01078                         &setlist, &showlist);
01079 
01080   add_setshow_enum_cmd ("target-wide-charset", class_support,
01081                         charset_enum, &target_wide_charset_name,
01082                         _("\
01083 Set the target wide character set."), _("\
01084 Show the target wide character set."), _("\
01085 The `target wide character set' is the one used by the program being debugged.\
01086 \nIn particular it is the encoding used by `wchar_t'.\n\
01087 GDB translates characters and strings between the host and target\n\
01088 character sets as needed.\n\
01089 To see a list of the character sets GDB supports, type\n\
01090 `set target-wide-charset'<TAB>"),
01091                         set_target_wide_charset_sfunc,
01092                         show_target_wide_charset_name,
01093                         &setlist, &showlist);
01094 }