From e68c3d59acd09748a65233879033b947a9d1ad51 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 7 Jun 2021 12:05:02 +0200 Subject: [PATCH] x86: better respect quotes in parse_operands() When d02603dc201f ("Allow symbol and label names to be enclosed in double quotes") added the check for a double quote to the loop body there, it didn't go quite far enough: Parentheses inside quotes shouldn't be counted, and character restrictions also shouldn't apply inside quoted regions. In i386_att_operand(), which needs adjustment to remain in sync, besides respecting double quotes now, also change the logic such that we don't count parentheses anymore: Finding any opening or closing parenthesis or any double quote means we're done, because the subsequent parsing code wouldn't accept (extra) instances of these anyway. Note that in parse_operands() this mimics get_symbol_name()'s questionable behavior of treating \ specially only when ahead of ". (The behavior is suspicious because the meaning of \\ then is ambiguous. It is in particular impossible to have a (quoted) symbol name end in a single \.) I would have used get_symbol_name() here, if that didn't require fiddling with input_line_pointer. --- gas/ChangeLog | 10 +++++++++ gas/config/tc-i386.c | 37 ++++++++++++++++++++------------- gas/testsuite/gas/i386/quoted.d | 3 +++ gas/testsuite/gas/i386/quoted.s | 4 ++++ 4 files changed, 40 insertions(+), 14 deletions(-) diff --git a/gas/ChangeLog b/gas/ChangeLog index cc6f332d832..f490eeff4b2 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,13 @@ +2021-06-07 Jan Beulich + + * config/tc-i386.c (parse_operands): Reduce scope of + paren_not_balanced, to match the new in_quotes. Skip over quoted + regions of operands. + (i386_att_operand): Remove (mis-named) parens_balanced. Respect + double quote. + * testsuite/gas/i386/unary.s: Add more cases. + * testsuite/gas/i386/unary.d: Adjust expectations. + 2021-06-07 Jan Beulich * config/tc-i386.c (digit_chars, is_digit_char): Delete. diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c index bbdb5bd4c57..c838823e088 100644 --- a/gas/config/tc-i386.c +++ b/gas/config/tc-i386.c @@ -5523,11 +5523,13 @@ parse_operands (char *l, const char *mnemonic) /* 1 if operand is pending after ','. */ unsigned int expecting_operand = 0; - /* Non-zero if operand parens not balanced. */ - unsigned int paren_not_balanced; - while (*l != END_OF_INSN) { + /* Non-zero if operand parens not balanced. */ + unsigned int paren_not_balanced = 0; + /* True if inside double quotes. */ + bool in_quotes = false; + /* Skip optional white space before operand. */ if (is_space_char (*l)) ++l; @@ -5539,11 +5541,16 @@ parse_operands (char *l, const char *mnemonic) return NULL; } token_start = l; /* After white space. */ - paren_not_balanced = 0; - while (paren_not_balanced || *l != ',') + while (in_quotes || paren_not_balanced || *l != ',') { if (*l == END_OF_INSN) { + if (in_quotes) + { + as_bad (_("unbalanced double quotes in operand %d."), + i.operands + 1); + return NULL; + } if (paren_not_balanced) { know (!intel_syntax); @@ -5554,14 +5561,18 @@ parse_operands (char *l, const char *mnemonic) else break; /* we are done */ } - else if (!is_operand_char (*l) && !is_space_char (*l) && *l != '"') + else if (*l == '\\' && l[1] == '"') + ++l; + else if (*l == '"') + in_quotes = !in_quotes; + else if (!in_quotes && !is_operand_char (*l) && !is_space_char (*l)) { as_bad (_("invalid character %s in operand %d"), output_invalid (*l), i.operands + 1); return NULL; } - if (!intel_syntax) + if (!intel_syntax && !in_quotes) { if (*l == '(') ++paren_not_balanced; @@ -11517,23 +11528,21 @@ i386_att_operand (char *operand_string) if (*base_string == ')') { char *temp_string; - unsigned int parens_balanced = 1; + /* We've already checked that the number of left & right ()'s are equal, so this loop will not be infinite. */ do { base_string--; - if (*base_string == ')') - parens_balanced++; - if (*base_string == '(') - parens_balanced--; } - while (parens_balanced); + while (*base_string != '(' && *base_string != ')' + && *base_string != '"'); temp_string = base_string; /* Skip past '(' and whitespace. */ - ++base_string; + if (*base_string == '(') + ++base_string; if (is_space_char (*base_string)) ++base_string; diff --git a/gas/testsuite/gas/i386/quoted.d b/gas/testsuite/gas/i386/quoted.d index 704329db1f9..11b28c03fc3 100644 --- a/gas/testsuite/gas/i386/quoted.d +++ b/gas/testsuite/gas/i386/quoted.d @@ -10,6 +10,9 @@ Disassembly of section .text: [ ]*[a-f0-9]+:[ ]*8b 80 00 00 00 00[ ]+mov 0x0\(%eax\),%eax [a-f0-9]+: (R_386_|dir)32 x\(y\) [ ]*[a-f0-9]+:[ ]*26 a1 00 00 00 00[ ]+mov %es:0x0,%eax [a-f0-9]+: (R_386_|dir)32 x\(y\) [ ]*[a-f0-9]+:[ ]*26 8b 80 00 00 00 00[ ]+mov %es:0x0\(%eax\),%eax [a-f0-9]+: (R_386_|dir)32 x\(y\) +[ ]*[a-f0-9]+:[ ]*a1 00 00 00 00[ ]+mov 0x0,%eax [a-f0-9]+: (R_386_|dir)32 x\(y +[ ]*[a-f0-9]+:[ ]*a1 00 00 00 00[ ]+mov 0x0,%eax [a-f0-9]+: (R_386_|dir)32 x\)y +[ ]*[a-f0-9]+:[ ]*a1 00 00 00 00[ ]+mov 0x0,%eax [a-f0-9]+: (R_386_|dir)32 x\?y [ ]*[a-f0-9]+:[ ]*ff 15 00 00 00 00[ ]+call \*0x0 [a-f0-9]+: (R_386_|dir)32 x\(y\) [ ]*[a-f0-9]+:[ ]*26 ff 15 00 00 00 00[ ]+call \*%es:0x0 [a-f0-9]+: (R_386_|dir)32 x\(y\) [ ]*[a-f0-9]+:[ ]*26 ff 15 00 00 00 00[ ]+call \*%es:0x0 [a-f0-9]+: (R_386_|dir)32 x\(y\) diff --git a/gas/testsuite/gas/i386/quoted.s b/gas/testsuite/gas/i386/quoted.s index 3bbeedfa0bc..1e57c86007f 100644 --- a/gas/testsuite/gas/i386/quoted.s +++ b/gas/testsuite/gas/i386/quoted.s @@ -5,6 +5,10 @@ quoted: mov %es:"x(y)", %eax mov %es:"x(y)"(%eax), %eax + mov "x(y", %eax + mov "x)y", %eax + mov "x?y", %eax + call *"x(y)" call *%es:"x(y)" call %es:*"x(y)" -- 2.30.2