GDB (API)
|
00001 /* C preprocessor macro expansion for GDB. 00002 Copyright (C) 2002-2013 Free Software Foundation, Inc. 00003 Contributed by Red Hat, Inc. 00004 00005 This file is part of GDB. 00006 00007 This program is free software; you can redistribute it and/or modify 00008 it under the terms of the GNU General Public License as published by 00009 the Free Software Foundation; either version 3 of the License, or 00010 (at your option) any later version. 00011 00012 This program is distributed in the hope that it will be useful, 00013 but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 GNU General Public License for more details. 00016 00017 You should have received a copy of the GNU General Public License 00018 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 00019 00020 #include "defs.h" 00021 #include "gdb_obstack.h" 00022 #include "bcache.h" 00023 #include "macrotab.h" 00024 #include "macroexp.h" 00025 #include "gdb_assert.h" 00026 #include "c-lang.h" 00027 00028 00029 00030 /* A resizeable, substringable string type. */ 00031 00032 00033 /* A string type that we can resize, quickly append to, and use to 00034 refer to substrings of other strings. */ 00035 struct macro_buffer 00036 { 00037 /* An array of characters. The first LEN bytes are the real text, 00038 but there are SIZE bytes allocated to the array. If SIZE is 00039 zero, then this doesn't point to a malloc'ed block. If SHARED is 00040 non-zero, then this buffer is actually a pointer into some larger 00041 string, and we shouldn't append characters to it, etc. Because 00042 of sharing, we can't assume in general that the text is 00043 null-terminated. */ 00044 char *text; 00045 00046 /* The number of characters in the string. */ 00047 int len; 00048 00049 /* The number of characters allocated to the string. If SHARED is 00050 non-zero, this is meaningless; in this case, we set it to zero so 00051 that any "do we have room to append something?" tests will fail, 00052 so we don't always have to check SHARED before using this field. */ 00053 int size; 00054 00055 /* Zero if TEXT can be safely realloc'ed (i.e., it's its own malloc 00056 block). Non-zero if TEXT is actually pointing into the middle of 00057 some other block, and we shouldn't reallocate it. */ 00058 int shared; 00059 00060 /* For detecting token splicing. 00061 00062 This is the index in TEXT of the first character of the token 00063 that abuts the end of TEXT. If TEXT contains no tokens, then we 00064 set this equal to LEN. If TEXT ends in whitespace, then there is 00065 no token abutting the end of TEXT (it's just whitespace), and 00066 again, we set this equal to LEN. We set this to -1 if we don't 00067 know the nature of TEXT. */ 00068 int last_token; 00069 00070 /* If this buffer is holding the result from get_token, then this 00071 is non-zero if it is an identifier token, zero otherwise. */ 00072 int is_identifier; 00073 }; 00074 00075 00076 /* Set the macro buffer *B to the empty string, guessing that its 00077 final contents will fit in N bytes. (It'll get resized if it 00078 doesn't, so the guess doesn't have to be right.) Allocate the 00079 initial storage with xmalloc. */ 00080 static void 00081 init_buffer (struct macro_buffer *b, int n) 00082 { 00083 b->size = n; 00084 if (n > 0) 00085 b->text = (char *) xmalloc (n); 00086 else 00087 b->text = NULL; 00088 b->len = 0; 00089 b->shared = 0; 00090 b->last_token = -1; 00091 } 00092 00093 00094 /* Set the macro buffer *BUF to refer to the LEN bytes at ADDR, as a 00095 shared substring. */ 00096 static void 00097 init_shared_buffer (struct macro_buffer *buf, char *addr, int len) 00098 { 00099 buf->text = addr; 00100 buf->len = len; 00101 buf->shared = 1; 00102 buf->size = 0; 00103 buf->last_token = -1; 00104 } 00105 00106 00107 /* Free the text of the buffer B. Raise an error if B is shared. */ 00108 static void 00109 free_buffer (struct macro_buffer *b) 00110 { 00111 gdb_assert (! b->shared); 00112 if (b->size) 00113 xfree (b->text); 00114 } 00115 00116 /* Like free_buffer, but return the text as an xstrdup()d string. 00117 This only exists to try to make the API relatively clean. */ 00118 00119 static char * 00120 free_buffer_return_text (struct macro_buffer *b) 00121 { 00122 gdb_assert (! b->shared); 00123 gdb_assert (b->size); 00124 /* Nothing to do. */ 00125 return b->text; 00126 } 00127 00128 /* A cleanup function for macro buffers. */ 00129 static void 00130 cleanup_macro_buffer (void *untyped_buf) 00131 { 00132 free_buffer ((struct macro_buffer *) untyped_buf); 00133 } 00134 00135 00136 /* Resize the buffer B to be at least N bytes long. Raise an error if 00137 B shouldn't be resized. */ 00138 static void 00139 resize_buffer (struct macro_buffer *b, int n) 00140 { 00141 /* We shouldn't be trying to resize shared strings. */ 00142 gdb_assert (! b->shared); 00143 00144 if (b->size == 0) 00145 b->size = n; 00146 else 00147 while (b->size <= n) 00148 b->size *= 2; 00149 00150 b->text = xrealloc (b->text, b->size); 00151 } 00152 00153 00154 /* Append the character C to the buffer B. */ 00155 static void 00156 appendc (struct macro_buffer *b, int c) 00157 { 00158 int new_len = b->len + 1; 00159 00160 if (new_len > b->size) 00161 resize_buffer (b, new_len); 00162 00163 b->text[b->len] = c; 00164 b->len = new_len; 00165 } 00166 00167 00168 /* Append the LEN bytes at ADDR to the buffer B. */ 00169 static void 00170 appendmem (struct macro_buffer *b, char *addr, int len) 00171 { 00172 int new_len = b->len + len; 00173 00174 if (new_len > b->size) 00175 resize_buffer (b, new_len); 00176 00177 memcpy (b->text + b->len, addr, len); 00178 b->len = new_len; 00179 } 00180 00181 00182 00183 /* Recognizing preprocessor tokens. */ 00184 00185 00186 int 00187 macro_is_whitespace (int c) 00188 { 00189 return (c == ' ' 00190 || c == '\t' 00191 || c == '\n' 00192 || c == '\v' 00193 || c == '\f'); 00194 } 00195 00196 00197 int 00198 macro_is_digit (int c) 00199 { 00200 return ('0' <= c && c <= '9'); 00201 } 00202 00203 00204 int 00205 macro_is_identifier_nondigit (int c) 00206 { 00207 return (c == '_' 00208 || ('a' <= c && c <= 'z') 00209 || ('A' <= c && c <= 'Z')); 00210 } 00211 00212 00213 static void 00214 set_token (struct macro_buffer *tok, char *start, char *end) 00215 { 00216 init_shared_buffer (tok, start, end - start); 00217 tok->last_token = 0; 00218 00219 /* Presumed; get_identifier may overwrite this. */ 00220 tok->is_identifier = 0; 00221 } 00222 00223 00224 static int 00225 get_comment (struct macro_buffer *tok, char *p, char *end) 00226 { 00227 if (p + 2 > end) 00228 return 0; 00229 else if (p[0] == '/' 00230 && p[1] == '*') 00231 { 00232 char *tok_start = p; 00233 00234 p += 2; 00235 00236 for (; p < end; p++) 00237 if (p + 2 <= end 00238 && p[0] == '*' 00239 && p[1] == '/') 00240 { 00241 p += 2; 00242 set_token (tok, tok_start, p); 00243 return 1; 00244 } 00245 00246 error (_("Unterminated comment in macro expansion.")); 00247 } 00248 else if (p[0] == '/' 00249 && p[1] == '/') 00250 { 00251 char *tok_start = p; 00252 00253 p += 2; 00254 for (; p < end; p++) 00255 if (*p == '\n') 00256 break; 00257 00258 set_token (tok, tok_start, p); 00259 return 1; 00260 } 00261 else 00262 return 0; 00263 } 00264 00265 00266 static int 00267 get_identifier (struct macro_buffer *tok, char *p, char *end) 00268 { 00269 if (p < end 00270 && macro_is_identifier_nondigit (*p)) 00271 { 00272 char *tok_start = p; 00273 00274 while (p < end 00275 && (macro_is_identifier_nondigit (*p) 00276 || macro_is_digit (*p))) 00277 p++; 00278 00279 set_token (tok, tok_start, p); 00280 tok->is_identifier = 1; 00281 return 1; 00282 } 00283 else 00284 return 0; 00285 } 00286 00287 00288 static int 00289 get_pp_number (struct macro_buffer *tok, char *p, char *end) 00290 { 00291 if (p < end 00292 && (macro_is_digit (*p) 00293 || (*p == '.' 00294 && p + 2 <= end 00295 && macro_is_digit (p[1])))) 00296 { 00297 char *tok_start = p; 00298 00299 while (p < end) 00300 { 00301 if (p + 2 <= end 00302 && strchr ("eEpP", *p) 00303 && (p[1] == '+' || p[1] == '-')) 00304 p += 2; 00305 else if (macro_is_digit (*p) 00306 || macro_is_identifier_nondigit (*p) 00307 || *p == '.') 00308 p++; 00309 else 00310 break; 00311 } 00312 00313 set_token (tok, tok_start, p); 00314 return 1; 00315 } 00316 else 00317 return 0; 00318 } 00319 00320 00321 00322 /* If the text starting at P going up to (but not including) END 00323 starts with a character constant, set *TOK to point to that 00324 character constant, and return 1. Otherwise, return zero. 00325 Signal an error if it contains a malformed or incomplete character 00326 constant. */ 00327 static int 00328 get_character_constant (struct macro_buffer *tok, char *p, char *end) 00329 { 00330 /* ISO/IEC 9899:1999 (E) Section 6.4.4.4 paragraph 1 00331 But of course, what really matters is that we handle it the same 00332 way GDB's C/C++ lexer does. So we call parse_escape in utils.c 00333 to handle escape sequences. */ 00334 if ((p + 1 <= end && *p == '\'') 00335 || (p + 2 <= end 00336 && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U') 00337 && p[1] == '\'')) 00338 { 00339 char *tok_start = p; 00340 int char_count = 0; 00341 00342 if (*p == '\'') 00343 p++; 00344 else if (*p == 'L' || *p == 'u' || *p == 'U') 00345 p += 2; 00346 else 00347 gdb_assert_not_reached ("unexpected character constant"); 00348 00349 for (;;) 00350 { 00351 if (p >= end) 00352 error (_("Unmatched single quote.")); 00353 else if (*p == '\'') 00354 { 00355 if (!char_count) 00356 error (_("A character constant must contain at least one " 00357 "character.")); 00358 p++; 00359 break; 00360 } 00361 else if (*p == '\\') 00362 { 00363 const char *s, *o; 00364 00365 s = o = ++p; 00366 char_count += c_parse_escape (&s, NULL); 00367 p += s - o; 00368 } 00369 else 00370 { 00371 p++; 00372 char_count++; 00373 } 00374 } 00375 00376 set_token (tok, tok_start, p); 00377 return 1; 00378 } 00379 else 00380 return 0; 00381 } 00382 00383 00384 /* If the text starting at P going up to (but not including) END 00385 starts with a string literal, set *TOK to point to that string 00386 literal, and return 1. Otherwise, return zero. Signal an error if 00387 it contains a malformed or incomplete string literal. */ 00388 static int 00389 get_string_literal (struct macro_buffer *tok, char *p, char *end) 00390 { 00391 if ((p + 1 <= end 00392 && *p == '"') 00393 || (p + 2 <= end 00394 && (p[0] == 'L' || p[0] == 'u' || p[0] == 'U') 00395 && p[1] == '"')) 00396 { 00397 char *tok_start = p; 00398 00399 if (*p == '"') 00400 p++; 00401 else if (*p == 'L' || *p == 'u' || *p == 'U') 00402 p += 2; 00403 else 00404 gdb_assert_not_reached ("unexpected string literal"); 00405 00406 for (;;) 00407 { 00408 if (p >= end) 00409 error (_("Unterminated string in expression.")); 00410 else if (*p == '"') 00411 { 00412 p++; 00413 break; 00414 } 00415 else if (*p == '\n') 00416 error (_("Newline characters may not appear in string " 00417 "constants.")); 00418 else if (*p == '\\') 00419 { 00420 const char *s, *o; 00421 00422 s = o = ++p; 00423 c_parse_escape (&s, NULL); 00424 p += s - o; 00425 } 00426 else 00427 p++; 00428 } 00429 00430 set_token (tok, tok_start, p); 00431 return 1; 00432 } 00433 else 00434 return 0; 00435 } 00436 00437 00438 static int 00439 get_punctuator (struct macro_buffer *tok, char *p, char *end) 00440 { 00441 /* Here, speed is much less important than correctness and clarity. */ 00442 00443 /* ISO/IEC 9899:1999 (E) Section 6.4.6 Paragraph 1. 00444 Note that this table is ordered in a special way. A punctuator 00445 which is a prefix of another punctuator must appear after its 00446 "extension". Otherwise, the wrong token will be returned. */ 00447 static const char * const punctuators[] = { 00448 "[", "]", "(", ")", "{", "}", "?", ";", ",", "~", 00449 "...", ".", 00450 "->", "--", "-=", "-", 00451 "++", "+=", "+", 00452 "*=", "*", 00453 "!=", "!", 00454 "&&", "&=", "&", 00455 "/=", "/", 00456 "%>", "%:%:", "%:", "%=", "%", 00457 "^=", "^", 00458 "##", "#", 00459 ":>", ":", 00460 "||", "|=", "|", 00461 "<<=", "<<", "<=", "<:", "<%", "<", 00462 ">>=", ">>", ">=", ">", 00463 "==", "=", 00464 0 00465 }; 00466 00467 int i; 00468 00469 if (p + 1 <= end) 00470 { 00471 for (i = 0; punctuators[i]; i++) 00472 { 00473 const char *punctuator = punctuators[i]; 00474 00475 if (p[0] == punctuator[0]) 00476 { 00477 int len = strlen (punctuator); 00478 00479 if (p + len <= end 00480 && ! memcmp (p, punctuator, len)) 00481 { 00482 set_token (tok, p, p + len); 00483 return 1; 00484 } 00485 } 00486 } 00487 } 00488 00489 return 0; 00490 } 00491 00492 00493 /* Peel the next preprocessor token off of SRC, and put it in TOK. 00494 Mutate TOK to refer to the first token in SRC, and mutate SRC to 00495 refer to the text after that token. SRC must be a shared buffer; 00496 the resulting TOK will be shared, pointing into the same string SRC 00497 does. Initialize TOK's last_token field. Return non-zero if we 00498 succeed, or 0 if we didn't find any more tokens in SRC. */ 00499 static int 00500 get_token (struct macro_buffer *tok, 00501 struct macro_buffer *src) 00502 { 00503 char *p = src->text; 00504 char *end = p + src->len; 00505 00506 gdb_assert (src->shared); 00507 00508 /* From the ISO C standard, ISO/IEC 9899:1999 (E), section 6.4: 00509 00510 preprocessing-token: 00511 header-name 00512 identifier 00513 pp-number 00514 character-constant 00515 string-literal 00516 punctuator 00517 each non-white-space character that cannot be one of the above 00518 00519 We don't have to deal with header-name tokens, since those can 00520 only occur after a #include, which we will never see. */ 00521 00522 while (p < end) 00523 if (macro_is_whitespace (*p)) 00524 p++; 00525 else if (get_comment (tok, p, end)) 00526 p += tok->len; 00527 else if (get_pp_number (tok, p, end) 00528 || get_character_constant (tok, p, end) 00529 || get_string_literal (tok, p, end) 00530 /* Note: the grammar in the standard seems to be 00531 ambiguous: L'x' can be either a wide character 00532 constant, or an identifier followed by a normal 00533 character constant. By trying `get_identifier' after 00534 we try get_character_constant and get_string_literal, 00535 we give the wide character syntax precedence. Now, 00536 since GDB doesn't handle wide character constants 00537 anyway, is this the right thing to do? */ 00538 || get_identifier (tok, p, end) 00539 || get_punctuator (tok, p, end)) 00540 { 00541 /* How many characters did we consume, including whitespace? */ 00542 int consumed = p - src->text + tok->len; 00543 00544 src->text += consumed; 00545 src->len -= consumed; 00546 return 1; 00547 } 00548 else 00549 { 00550 /* We have found a "non-whitespace character that cannot be 00551 one of the above." Make a token out of it. */ 00552 int consumed; 00553 00554 set_token (tok, p, p + 1); 00555 consumed = p - src->text + tok->len; 00556 src->text += consumed; 00557 src->len -= consumed; 00558 return 1; 00559 } 00560 00561 return 0; 00562 } 00563 00564 00565 00566 /* Appending token strings, with and without splicing */ 00567 00568 00569 /* Append the macro buffer SRC to the end of DEST, and ensure that 00570 doing so doesn't splice the token at the end of SRC with the token 00571 at the beginning of DEST. SRC and DEST must have their last_token 00572 fields set. Upon return, DEST's last_token field is set correctly. 00573 00574 For example: 00575 00576 If DEST is "(" and SRC is "y", then we can return with 00577 DEST set to "(y" --- we've simply appended the two buffers. 00578 00579 However, if DEST is "x" and SRC is "y", then we must not return 00580 with DEST set to "xy" --- that would splice the two tokens "x" and 00581 "y" together to make a single token "xy". However, it would be 00582 fine to return with DEST set to "x y". Similarly, "<" and "<" must 00583 yield "< <", not "<<", etc. */ 00584 static void 00585 append_tokens_without_splicing (struct macro_buffer *dest, 00586 struct macro_buffer *src) 00587 { 00588 int original_dest_len = dest->len; 00589 struct macro_buffer dest_tail, new_token; 00590 00591 gdb_assert (src->last_token != -1); 00592 gdb_assert (dest->last_token != -1); 00593 00594 /* First, just try appending the two, and call get_token to see if 00595 we got a splice. */ 00596 appendmem (dest, src->text, src->len); 00597 00598 /* If DEST originally had no token abutting its end, then we can't 00599 have spliced anything, so we're done. */ 00600 if (dest->last_token == original_dest_len) 00601 { 00602 dest->last_token = original_dest_len + src->last_token; 00603 return; 00604 } 00605 00606 /* Set DEST_TAIL to point to the last token in DEST, followed by 00607 all the stuff we just appended. */ 00608 init_shared_buffer (&dest_tail, 00609 dest->text + dest->last_token, 00610 dest->len - dest->last_token); 00611 00612 /* Re-parse DEST's last token. We know that DEST used to contain 00613 at least one token, so if it doesn't contain any after the 00614 append, then we must have spliced "/" and "*" or "/" and "/" to 00615 make a comment start. (Just for the record, I got this right 00616 the first time. This is not a bug fix.) */ 00617 if (get_token (&new_token, &dest_tail) 00618 && (new_token.text + new_token.len 00619 == dest->text + original_dest_len)) 00620 { 00621 /* No splice, so we're done. */ 00622 dest->last_token = original_dest_len + src->last_token; 00623 return; 00624 } 00625 00626 /* Okay, a simple append caused a splice. Let's chop dest back to 00627 its original length and try again, but separate the texts with a 00628 space. */ 00629 dest->len = original_dest_len; 00630 appendc (dest, ' '); 00631 appendmem (dest, src->text, src->len); 00632 00633 init_shared_buffer (&dest_tail, 00634 dest->text + dest->last_token, 00635 dest->len - dest->last_token); 00636 00637 /* Try to re-parse DEST's last token, as above. */ 00638 if (get_token (&new_token, &dest_tail) 00639 && (new_token.text + new_token.len 00640 == dest->text + original_dest_len)) 00641 { 00642 /* No splice, so we're done. */ 00643 dest->last_token = original_dest_len + 1 + src->last_token; 00644 return; 00645 } 00646 00647 /* As far as I know, there's no case where inserting a space isn't 00648 enough to prevent a splice. */ 00649 internal_error (__FILE__, __LINE__, 00650 _("unable to avoid splicing tokens during macro expansion")); 00651 } 00652 00653 /* Stringify an argument, and insert it into DEST. ARG is the text to 00654 stringify; it is LEN bytes long. */ 00655 00656 static void 00657 stringify (struct macro_buffer *dest, const char *arg, int len) 00658 { 00659 /* Trim initial whitespace from ARG. */ 00660 while (len > 0 && macro_is_whitespace (*arg)) 00661 { 00662 ++arg; 00663 --len; 00664 } 00665 00666 /* Trim trailing whitespace from ARG. */ 00667 while (len > 0 && macro_is_whitespace (arg[len - 1])) 00668 --len; 00669 00670 /* Insert the string. */ 00671 appendc (dest, '"'); 00672 while (len > 0) 00673 { 00674 /* We could try to handle strange cases here, like control 00675 characters, but there doesn't seem to be much point. */ 00676 if (macro_is_whitespace (*arg)) 00677 { 00678 /* Replace a sequence of whitespace with a single space. */ 00679 appendc (dest, ' '); 00680 while (len > 1 && macro_is_whitespace (arg[1])) 00681 { 00682 ++arg; 00683 --len; 00684 } 00685 } 00686 else if (*arg == '\\' || *arg == '"') 00687 { 00688 appendc (dest, '\\'); 00689 appendc (dest, *arg); 00690 } 00691 else 00692 appendc (dest, *arg); 00693 ++arg; 00694 --len; 00695 } 00696 appendc (dest, '"'); 00697 dest->last_token = dest->len; 00698 } 00699 00700 /* See macroexp.h. */ 00701 00702 char * 00703 macro_stringify (const char *str) 00704 { 00705 struct macro_buffer buffer; 00706 int len = strlen (str); 00707 00708 init_buffer (&buffer, len); 00709 stringify (&buffer, str, len); 00710 appendc (&buffer, '\0'); 00711 00712 return free_buffer_return_text (&buffer); 00713 } 00714 00715 00716 /* Expanding macros! */ 00717 00718 00719 /* A singly-linked list of the names of the macros we are currently 00720 expanding --- for detecting expansion loops. */ 00721 struct macro_name_list { 00722 const char *name; 00723 struct macro_name_list *next; 00724 }; 00725 00726 00727 /* Return non-zero if we are currently expanding the macro named NAME, 00728 according to LIST; otherwise, return zero. 00729 00730 You know, it would be possible to get rid of all the NO_LOOP 00731 arguments to these functions by simply generating a new lookup 00732 function and baton which refuses to find the definition for a 00733 particular macro, and otherwise delegates the decision to another 00734 function/baton pair. But that makes the linked list of excluded 00735 macros chained through untyped baton pointers, which will make it 00736 harder to debug. :( */ 00737 static int 00738 currently_rescanning (struct macro_name_list *list, const char *name) 00739 { 00740 for (; list; list = list->next) 00741 if (strcmp (name, list->name) == 0) 00742 return 1; 00743 00744 return 0; 00745 } 00746 00747 00748 /* Gather the arguments to a macro expansion. 00749 00750 NAME is the name of the macro being invoked. (It's only used for 00751 printing error messages.) 00752 00753 Assume that SRC is the text of the macro invocation immediately 00754 following the macro name. For example, if we're processing the 00755 text foo(bar, baz), then NAME would be foo and SRC will be (bar, 00756 baz). 00757 00758 If SRC doesn't start with an open paren ( token at all, return 00759 zero, leave SRC unchanged, and don't set *ARGC_P to anything. 00760 00761 If SRC doesn't contain a properly terminated argument list, then 00762 raise an error. 00763 00764 For a variadic macro, NARGS holds the number of formal arguments to 00765 the macro. For a GNU-style variadic macro, this should be the 00766 number of named arguments. For a non-variadic macro, NARGS should 00767 be -1. 00768 00769 Otherwise, return a pointer to the first element of an array of 00770 macro buffers referring to the argument texts, and set *ARGC_P to 00771 the number of arguments we found --- the number of elements in the 00772 array. The macro buffers share their text with SRC, and their 00773 last_token fields are initialized. The array is allocated with 00774 xmalloc, and the caller is responsible for freeing it. 00775 00776 NOTE WELL: if SRC starts with a open paren ( token followed 00777 immediately by a close paren ) token (e.g., the invocation looks 00778 like "foo()"), we treat that as one argument, which happens to be 00779 the empty list of tokens. The caller should keep in mind that such 00780 a sequence of tokens is a valid way to invoke one-parameter 00781 function-like macros, but also a valid way to invoke zero-parameter 00782 function-like macros. Eeew. 00783 00784 Consume the tokens from SRC; after this call, SRC contains the text 00785 following the invocation. */ 00786 00787 static struct macro_buffer * 00788 gather_arguments (const char *name, struct macro_buffer *src, 00789 int nargs, int *argc_p) 00790 { 00791 struct macro_buffer tok; 00792 int args_len, args_size; 00793 struct macro_buffer *args = NULL; 00794 struct cleanup *back_to = make_cleanup (free_current_contents, &args); 00795 00796 /* Does SRC start with an opening paren token? Read from a copy of 00797 SRC, so SRC itself is unaffected if we don't find an opening 00798 paren. */ 00799 { 00800 struct macro_buffer temp; 00801 00802 init_shared_buffer (&temp, src->text, src->len); 00803 00804 if (! get_token (&tok, &temp) 00805 || tok.len != 1 00806 || tok.text[0] != '(') 00807 { 00808 discard_cleanups (back_to); 00809 return 0; 00810 } 00811 } 00812 00813 /* Consume SRC's opening paren. */ 00814 get_token (&tok, src); 00815 00816 args_len = 0; 00817 args_size = 6; 00818 args = (struct macro_buffer *) xmalloc (sizeof (*args) * args_size); 00819 00820 for (;;) 00821 { 00822 struct macro_buffer *arg; 00823 int depth; 00824 00825 /* Make sure we have room for the next argument. */ 00826 if (args_len >= args_size) 00827 { 00828 args_size *= 2; 00829 args = xrealloc (args, sizeof (*args) * args_size); 00830 } 00831 00832 /* Initialize the next argument. */ 00833 arg = &args[args_len++]; 00834 set_token (arg, src->text, src->text); 00835 00836 /* Gather the argument's tokens. */ 00837 depth = 0; 00838 for (;;) 00839 { 00840 if (! get_token (&tok, src)) 00841 error (_("Malformed argument list for macro `%s'."), name); 00842 00843 /* Is tok an opening paren? */ 00844 if (tok.len == 1 && tok.text[0] == '(') 00845 depth++; 00846 00847 /* Is tok is a closing paren? */ 00848 else if (tok.len == 1 && tok.text[0] == ')') 00849 { 00850 /* If it's a closing paren at the top level, then that's 00851 the end of the argument list. */ 00852 if (depth == 0) 00853 { 00854 /* In the varargs case, the last argument may be 00855 missing. Add an empty argument in this case. */ 00856 if (nargs != -1 && args_len == nargs - 1) 00857 { 00858 /* Make sure we have room for the argument. */ 00859 if (args_len >= args_size) 00860 { 00861 args_size++; 00862 args = xrealloc (args, sizeof (*args) * args_size); 00863 } 00864 arg = &args[args_len++]; 00865 set_token (arg, src->text, src->text); 00866 } 00867 00868 discard_cleanups (back_to); 00869 *argc_p = args_len; 00870 return args; 00871 } 00872 00873 depth--; 00874 } 00875 00876 /* If tok is a comma at top level, then that's the end of 00877 the current argument. However, if we are handling a 00878 variadic macro and we are computing the last argument, we 00879 want to include the comma and remaining tokens. */ 00880 else if (tok.len == 1 && tok.text[0] == ',' && depth == 0 00881 && (nargs == -1 || args_len < nargs)) 00882 break; 00883 00884 /* Extend the current argument to enclose this token. If 00885 this is the current argument's first token, leave out any 00886 leading whitespace, just for aesthetics. */ 00887 if (arg->len == 0) 00888 { 00889 arg->text = tok.text; 00890 arg->len = tok.len; 00891 arg->last_token = 0; 00892 } 00893 else 00894 { 00895 arg->len = (tok.text + tok.len) - arg->text; 00896 arg->last_token = tok.text - arg->text; 00897 } 00898 } 00899 } 00900 } 00901 00902 00903 /* The `expand' and `substitute_args' functions both invoke `scan' 00904 recursively, so we need a forward declaration somewhere. */ 00905 static void scan (struct macro_buffer *dest, 00906 struct macro_buffer *src, 00907 struct macro_name_list *no_loop, 00908 macro_lookup_ftype *lookup_func, 00909 void *lookup_baton); 00910 00911 00912 /* A helper function for substitute_args. 00913 00914 ARGV is a vector of all the arguments; ARGC is the number of 00915 arguments. IS_VARARGS is true if the macro being substituted is a 00916 varargs macro; in this case VA_ARG_NAME is the name of the 00917 "variable" argument. VA_ARG_NAME is ignored if IS_VARARGS is 00918 false. 00919 00920 If the token TOK is the name of a parameter, return the parameter's 00921 index. If TOK is not an argument, return -1. */ 00922 00923 static int 00924 find_parameter (const struct macro_buffer *tok, 00925 int is_varargs, const struct macro_buffer *va_arg_name, 00926 int argc, const char * const *argv) 00927 { 00928 int i; 00929 00930 if (! tok->is_identifier) 00931 return -1; 00932 00933 for (i = 0; i < argc; ++i) 00934 if (tok->len == strlen (argv[i]) 00935 && !memcmp (tok->text, argv[i], tok->len)) 00936 return i; 00937 00938 if (is_varargs && tok->len == va_arg_name->len 00939 && ! memcmp (tok->text, va_arg_name->text, tok->len)) 00940 return argc - 1; 00941 00942 return -1; 00943 } 00944 00945 /* Given the macro definition DEF, being invoked with the actual 00946 arguments given by ARGC and ARGV, substitute the arguments into the 00947 replacement list, and store the result in DEST. 00948 00949 IS_VARARGS should be true if DEF is a varargs macro. In this case, 00950 VA_ARG_NAME should be the name of the "variable" argument -- either 00951 __VA_ARGS__ for c99-style varargs, or the final argument name, for 00952 GNU-style varargs. If IS_VARARGS is false, this parameter is 00953 ignored. 00954 00955 If it is necessary to expand macro invocations in one of the 00956 arguments, use LOOKUP_FUNC and LOOKUP_BATON to find the macro 00957 definitions, and don't expand invocations of the macros listed in 00958 NO_LOOP. */ 00959 00960 static void 00961 substitute_args (struct macro_buffer *dest, 00962 struct macro_definition *def, 00963 int is_varargs, const struct macro_buffer *va_arg_name, 00964 int argc, struct macro_buffer *argv, 00965 struct macro_name_list *no_loop, 00966 macro_lookup_ftype *lookup_func, 00967 void *lookup_baton) 00968 { 00969 /* A macro buffer for the macro's replacement list. */ 00970 struct macro_buffer replacement_list; 00971 /* The token we are currently considering. */ 00972 struct macro_buffer tok; 00973 /* The replacement list's pointer from just before TOK was lexed. */ 00974 char *original_rl_start; 00975 /* We have a single lookahead token to handle token splicing. */ 00976 struct macro_buffer lookahead; 00977 /* The lookahead token might not be valid. */ 00978 int lookahead_valid; 00979 /* The replacement list's pointer from just before LOOKAHEAD was 00980 lexed. */ 00981 char *lookahead_rl_start; 00982 00983 init_shared_buffer (&replacement_list, (char *) def->replacement, 00984 strlen (def->replacement)); 00985 00986 gdb_assert (dest->len == 0); 00987 dest->last_token = 0; 00988 00989 original_rl_start = replacement_list.text; 00990 if (! get_token (&tok, &replacement_list)) 00991 return; 00992 lookahead_rl_start = replacement_list.text; 00993 lookahead_valid = get_token (&lookahead, &replacement_list); 00994 00995 for (;;) 00996 { 00997 /* Just for aesthetics. If we skipped some whitespace, copy 00998 that to DEST. */ 00999 if (tok.text > original_rl_start) 01000 { 01001 appendmem (dest, original_rl_start, tok.text - original_rl_start); 01002 dest->last_token = dest->len; 01003 } 01004 01005 /* Is this token the stringification operator? */ 01006 if (tok.len == 1 01007 && tok.text[0] == '#') 01008 { 01009 int arg; 01010 01011 if (!lookahead_valid) 01012 error (_("Stringification operator requires an argument.")); 01013 01014 arg = find_parameter (&lookahead, is_varargs, va_arg_name, 01015 def->argc, def->argv); 01016 if (arg == -1) 01017 error (_("Argument to stringification operator must name " 01018 "a macro parameter.")); 01019 01020 stringify (dest, argv[arg].text, argv[arg].len); 01021 01022 /* Read one token and let the loop iteration code handle the 01023 rest. */ 01024 lookahead_rl_start = replacement_list.text; 01025 lookahead_valid = get_token (&lookahead, &replacement_list); 01026 } 01027 /* Is this token the splicing operator? */ 01028 else if (tok.len == 2 01029 && tok.text[0] == '#' 01030 && tok.text[1] == '#') 01031 error (_("Stray splicing operator")); 01032 /* Is the next token the splicing operator? */ 01033 else if (lookahead_valid 01034 && lookahead.len == 2 01035 && lookahead.text[0] == '#' 01036 && lookahead.text[1] == '#') 01037 { 01038 int finished = 0; 01039 int prev_was_comma = 0; 01040 01041 /* Note that GCC warns if the result of splicing is not a 01042 token. In the debugger there doesn't seem to be much 01043 benefit from doing this. */ 01044 01045 /* Insert the first token. */ 01046 if (tok.len == 1 && tok.text[0] == ',') 01047 prev_was_comma = 1; 01048 else 01049 { 01050 int arg = find_parameter (&tok, is_varargs, va_arg_name, 01051 def->argc, def->argv); 01052 01053 if (arg != -1) 01054 appendmem (dest, argv[arg].text, argv[arg].len); 01055 else 01056 appendmem (dest, tok.text, tok.len); 01057 } 01058 01059 /* Apply a possible sequence of ## operators. */ 01060 for (;;) 01061 { 01062 if (! get_token (&tok, &replacement_list)) 01063 error (_("Splicing operator at end of macro")); 01064 01065 /* Handle a comma before a ##. If we are handling 01066 varargs, and the token on the right hand side is the 01067 varargs marker, and the final argument is empty or 01068 missing, then drop the comma. This is a GNU 01069 extension. There is one ambiguous case here, 01070 involving pedantic behavior with an empty argument, 01071 but we settle that in favor of GNU-style (GCC uses an 01072 option). If we aren't dealing with varargs, we 01073 simply insert the comma. */ 01074 if (prev_was_comma) 01075 { 01076 if (! (is_varargs 01077 && tok.len == va_arg_name->len 01078 && !memcmp (tok.text, va_arg_name->text, tok.len) 01079 && argv[argc - 1].len == 0)) 01080 appendmem (dest, ",", 1); 01081 prev_was_comma = 0; 01082 } 01083 01084 /* Insert the token. If it is a parameter, insert the 01085 argument. If it is a comma, treat it specially. */ 01086 if (tok.len == 1 && tok.text[0] == ',') 01087 prev_was_comma = 1; 01088 else 01089 { 01090 int arg = find_parameter (&tok, is_varargs, va_arg_name, 01091 def->argc, def->argv); 01092 01093 if (arg != -1) 01094 appendmem (dest, argv[arg].text, argv[arg].len); 01095 else 01096 appendmem (dest, tok.text, tok.len); 01097 } 01098 01099 /* Now read another token. If it is another splice, we 01100 loop. */ 01101 original_rl_start = replacement_list.text; 01102 if (! get_token (&tok, &replacement_list)) 01103 { 01104 finished = 1; 01105 break; 01106 } 01107 01108 if (! (tok.len == 2 01109 && tok.text[0] == '#' 01110 && tok.text[1] == '#')) 01111 break; 01112 } 01113 01114 if (prev_was_comma) 01115 { 01116 /* We saw a comma. Insert it now. */ 01117 appendmem (dest, ",", 1); 01118 } 01119 01120 dest->last_token = dest->len; 01121 if (finished) 01122 lookahead_valid = 0; 01123 else 01124 { 01125 /* Set up for the loop iterator. */ 01126 lookahead = tok; 01127 lookahead_rl_start = original_rl_start; 01128 lookahead_valid = 1; 01129 } 01130 } 01131 else 01132 { 01133 /* Is this token an identifier? */ 01134 int substituted = 0; 01135 int arg = find_parameter (&tok, is_varargs, va_arg_name, 01136 def->argc, def->argv); 01137 01138 if (arg != -1) 01139 { 01140 struct macro_buffer arg_src; 01141 01142 /* Expand any macro invocations in the argument text, 01143 and append the result to dest. Remember that scan 01144 mutates its source, so we need to scan a new buffer 01145 referring to the argument's text, not the argument 01146 itself. */ 01147 init_shared_buffer (&arg_src, argv[arg].text, argv[arg].len); 01148 scan (dest, &arg_src, no_loop, lookup_func, lookup_baton); 01149 substituted = 1; 01150 } 01151 01152 /* If it wasn't a parameter, then just copy it across. */ 01153 if (! substituted) 01154 append_tokens_without_splicing (dest, &tok); 01155 } 01156 01157 if (! lookahead_valid) 01158 break; 01159 01160 tok = lookahead; 01161 original_rl_start = lookahead_rl_start; 01162 01163 lookahead_rl_start = replacement_list.text; 01164 lookahead_valid = get_token (&lookahead, &replacement_list); 01165 } 01166 } 01167 01168 01169 /* Expand a call to a macro named ID, whose definition is DEF. Append 01170 its expansion to DEST. SRC is the input text following the ID 01171 token. We are currently rescanning the expansions of the macros 01172 named in NO_LOOP; don't re-expand them. Use LOOKUP_FUNC and 01173 LOOKUP_BATON to find definitions for any nested macro references. 01174 01175 Return 1 if we decided to expand it, zero otherwise. (If it's a 01176 function-like macro name that isn't followed by an argument list, 01177 we don't expand it.) If we return zero, leave SRC unchanged. */ 01178 static int 01179 expand (const char *id, 01180 struct macro_definition *def, 01181 struct macro_buffer *dest, 01182 struct macro_buffer *src, 01183 struct macro_name_list *no_loop, 01184 macro_lookup_ftype *lookup_func, 01185 void *lookup_baton) 01186 { 01187 struct macro_name_list new_no_loop; 01188 01189 /* Create a new node to be added to the front of the no-expand list. 01190 This list is appropriate for re-scanning replacement lists, but 01191 it is *not* appropriate for scanning macro arguments; invocations 01192 of the macro whose arguments we are gathering *do* get expanded 01193 there. */ 01194 new_no_loop.name = id; 01195 new_no_loop.next = no_loop; 01196 01197 /* What kind of macro are we expanding? */ 01198 if (def->kind == macro_object_like) 01199 { 01200 struct macro_buffer replacement_list; 01201 01202 init_shared_buffer (&replacement_list, (char *) def->replacement, 01203 strlen (def->replacement)); 01204 01205 scan (dest, &replacement_list, &new_no_loop, lookup_func, lookup_baton); 01206 return 1; 01207 } 01208 else if (def->kind == macro_function_like) 01209 { 01210 struct cleanup *back_to = make_cleanup (null_cleanup, 0); 01211 int argc = 0; 01212 struct macro_buffer *argv = NULL; 01213 struct macro_buffer substituted; 01214 struct macro_buffer substituted_src; 01215 struct macro_buffer va_arg_name = {0}; 01216 int is_varargs = 0; 01217 01218 if (def->argc >= 1) 01219 { 01220 if (strcmp (def->argv[def->argc - 1], "...") == 0) 01221 { 01222 /* In C99-style varargs, substitution is done using 01223 __VA_ARGS__. */ 01224 init_shared_buffer (&va_arg_name, "__VA_ARGS__", 01225 strlen ("__VA_ARGS__")); 01226 is_varargs = 1; 01227 } 01228 else 01229 { 01230 int len = strlen (def->argv[def->argc - 1]); 01231 01232 if (len > 3 01233 && strcmp (def->argv[def->argc - 1] + len - 3, "...") == 0) 01234 { 01235 /* In GNU-style varargs, the name of the 01236 substitution parameter is the name of the formal 01237 argument without the "...". */ 01238 init_shared_buffer (&va_arg_name, 01239 (char *) def->argv[def->argc - 1], 01240 len - 3); 01241 is_varargs = 1; 01242 } 01243 } 01244 } 01245 01246 make_cleanup (free_current_contents, &argv); 01247 argv = gather_arguments (id, src, is_varargs ? def->argc : -1, 01248 &argc); 01249 01250 /* If we couldn't find any argument list, then we don't expand 01251 this macro. */ 01252 if (! argv) 01253 { 01254 do_cleanups (back_to); 01255 return 0; 01256 } 01257 01258 /* Check that we're passing an acceptable number of arguments for 01259 this macro. */ 01260 if (argc != def->argc) 01261 { 01262 if (is_varargs && argc >= def->argc - 1) 01263 { 01264 /* Ok. */ 01265 } 01266 /* Remember that a sequence of tokens like "foo()" is a 01267 valid invocation of a macro expecting either zero or one 01268 arguments. */ 01269 else if (! (argc == 1 01270 && argv[0].len == 0 01271 && def->argc == 0)) 01272 error (_("Wrong number of arguments to macro `%s' " 01273 "(expected %d, got %d)."), 01274 id, def->argc, argc); 01275 } 01276 01277 /* Note that we don't expand macro invocations in the arguments 01278 yet --- we let subst_args take care of that. Parameters that 01279 appear as operands of the stringifying operator "#" or the 01280 splicing operator "##" don't get macro references expanded, 01281 so we can't really tell whether it's appropriate to macro- 01282 expand an argument until we see how it's being used. */ 01283 init_buffer (&substituted, 0); 01284 make_cleanup (cleanup_macro_buffer, &substituted); 01285 substitute_args (&substituted, def, is_varargs, &va_arg_name, 01286 argc, argv, no_loop, lookup_func, lookup_baton); 01287 01288 /* Now `substituted' is the macro's replacement list, with all 01289 argument values substituted into it properly. Re-scan it for 01290 macro references, but don't expand invocations of this macro. 01291 01292 We create a new buffer, `substituted_src', which points into 01293 `substituted', and scan that. We can't scan `substituted' 01294 itself, since the tokenization process moves the buffer's 01295 text pointer around, and we still need to be able to find 01296 `substituted's original text buffer after scanning it so we 01297 can free it. */ 01298 init_shared_buffer (&substituted_src, substituted.text, substituted.len); 01299 scan (dest, &substituted_src, &new_no_loop, lookup_func, lookup_baton); 01300 01301 do_cleanups (back_to); 01302 01303 return 1; 01304 } 01305 else 01306 internal_error (__FILE__, __LINE__, _("bad macro definition kind")); 01307 } 01308 01309 01310 /* If the single token in SRC_FIRST followed by the tokens in SRC_REST 01311 constitute a macro invokation not forbidden in NO_LOOP, append its 01312 expansion to DEST and return non-zero. Otherwise, return zero, and 01313 leave DEST unchanged. 01314 01315 SRC_FIRST and SRC_REST must be shared buffers; DEST must not be one. 01316 SRC_FIRST must be a string built by get_token. */ 01317 static int 01318 maybe_expand (struct macro_buffer *dest, 01319 struct macro_buffer *src_first, 01320 struct macro_buffer *src_rest, 01321 struct macro_name_list *no_loop, 01322 macro_lookup_ftype *lookup_func, 01323 void *lookup_baton) 01324 { 01325 gdb_assert (src_first->shared); 01326 gdb_assert (src_rest->shared); 01327 gdb_assert (! dest->shared); 01328 01329 /* Is this token an identifier? */ 01330 if (src_first->is_identifier) 01331 { 01332 /* Make a null-terminated copy of it, since that's what our 01333 lookup function expects. */ 01334 char *id = xmalloc (src_first->len + 1); 01335 struct cleanup *back_to = make_cleanup (xfree, id); 01336 01337 memcpy (id, src_first->text, src_first->len); 01338 id[src_first->len] = 0; 01339 01340 /* If we're currently re-scanning the result of expanding 01341 this macro, don't expand it again. */ 01342 if (! currently_rescanning (no_loop, id)) 01343 { 01344 /* Does this identifier have a macro definition in scope? */ 01345 struct macro_definition *def = lookup_func (id, lookup_baton); 01346 01347 if (def && expand (id, def, dest, src_rest, no_loop, 01348 lookup_func, lookup_baton)) 01349 { 01350 do_cleanups (back_to); 01351 return 1; 01352 } 01353 } 01354 01355 do_cleanups (back_to); 01356 } 01357 01358 return 0; 01359 } 01360 01361 01362 /* Expand macro references in SRC, appending the results to DEST. 01363 Assume we are re-scanning the result of expanding the macros named 01364 in NO_LOOP, and don't try to re-expand references to them. 01365 01366 SRC must be a shared buffer; DEST must not be one. */ 01367 static void 01368 scan (struct macro_buffer *dest, 01369 struct macro_buffer *src, 01370 struct macro_name_list *no_loop, 01371 macro_lookup_ftype *lookup_func, 01372 void *lookup_baton) 01373 { 01374 gdb_assert (src->shared); 01375 gdb_assert (! dest->shared); 01376 01377 for (;;) 01378 { 01379 struct macro_buffer tok; 01380 char *original_src_start = src->text; 01381 01382 /* Find the next token in SRC. */ 01383 if (! get_token (&tok, src)) 01384 break; 01385 01386 /* Just for aesthetics. If we skipped some whitespace, copy 01387 that to DEST. */ 01388 if (tok.text > original_src_start) 01389 { 01390 appendmem (dest, original_src_start, tok.text - original_src_start); 01391 dest->last_token = dest->len; 01392 } 01393 01394 if (! maybe_expand (dest, &tok, src, no_loop, lookup_func, lookup_baton)) 01395 /* We didn't end up expanding tok as a macro reference, so 01396 simply append it to dest. */ 01397 append_tokens_without_splicing (dest, &tok); 01398 } 01399 01400 /* Just for aesthetics. If there was any trailing whitespace in 01401 src, copy it to dest. */ 01402 if (src->len) 01403 { 01404 appendmem (dest, src->text, src->len); 01405 dest->last_token = dest->len; 01406 } 01407 } 01408 01409 01410 char * 01411 macro_expand (const char *source, 01412 macro_lookup_ftype *lookup_func, 01413 void *lookup_func_baton) 01414 { 01415 struct macro_buffer src, dest; 01416 struct cleanup *back_to; 01417 01418 init_shared_buffer (&src, (char *) source, strlen (source)); 01419 01420 init_buffer (&dest, 0); 01421 dest.last_token = 0; 01422 back_to = make_cleanup (cleanup_macro_buffer, &dest); 01423 01424 scan (&dest, &src, 0, lookup_func, lookup_func_baton); 01425 01426 appendc (&dest, '\0'); 01427 01428 discard_cleanups (back_to); 01429 return dest.text; 01430 } 01431 01432 01433 char * 01434 macro_expand_once (const char *source, 01435 macro_lookup_ftype *lookup_func, 01436 void *lookup_func_baton) 01437 { 01438 error (_("Expand-once not implemented yet.")); 01439 } 01440 01441 01442 char * 01443 macro_expand_next (const char **lexptr, 01444 macro_lookup_ftype *lookup_func, 01445 void *lookup_baton) 01446 { 01447 struct macro_buffer src, dest, tok; 01448 struct cleanup *back_to; 01449 01450 /* Set up SRC to refer to the input text, pointed to by *lexptr. */ 01451 init_shared_buffer (&src, (char *) *lexptr, strlen (*lexptr)); 01452 01453 /* Set up DEST to receive the expansion, if there is one. */ 01454 init_buffer (&dest, 0); 01455 dest.last_token = 0; 01456 back_to = make_cleanup (cleanup_macro_buffer, &dest); 01457 01458 /* Get the text's first preprocessing token. */ 01459 if (! get_token (&tok, &src)) 01460 { 01461 do_cleanups (back_to); 01462 return 0; 01463 } 01464 01465 /* If it's a macro invocation, expand it. */ 01466 if (maybe_expand (&dest, &tok, &src, 0, lookup_func, lookup_baton)) 01467 { 01468 /* It was a macro invocation! Package up the expansion as a 01469 null-terminated string and return it. Set *lexptr to the 01470 start of the next token in the input. */ 01471 appendc (&dest, '\0'); 01472 discard_cleanups (back_to); 01473 *lexptr = src.text; 01474 return dest.text; 01475 } 01476 else 01477 { 01478 /* It wasn't a macro invocation. */ 01479 do_cleanups (back_to); 01480 return 0; 01481 } 01482 }