From 7a29ee290307087e1749ce610207e93a15d0b78d Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 25 Apr 2023 11:19:18 +0200 Subject: [PATCH] RISC-V: adjust logic to avoid register name symbols Special casing GPR names in my_getSmallExpression() leads to a number of inconsistencies. Generalize this by utilizing the md_parse_name() hook, limited to when instruction operands are being parsed (really: probed). Then both the GPR lookup there and the yet more ad hoc workaround for PR/gas 29940 can be removed (including its extension needed for making the compressed form JAL work again). --- gas/config/tc-riscv.c | 121 ++++++++++++++++++++++++++++++++---------- gas/config/tc-riscv.h | 4 ++ 2 files changed, 98 insertions(+), 27 deletions(-) diff --git a/gas/config/tc-riscv.c b/gas/config/tc-riscv.c index c99bab9b1c4..a94d4f69df6 100644 --- a/gas/config/tc-riscv.c +++ b/gas/config/tc-riscv.c @@ -171,6 +171,8 @@ static enum float_abi float_abi = FLOAT_ABI_DEFAULT; static unsigned elf_flags = 0; +static bool probing_insn_operands; + /* Set the default_isa_spec. Return 0 if the spec isn't supported. Otherwise, return 1. */ @@ -2242,21 +2244,10 @@ my_getSmallExpression (expressionS *ep, bfd_reloc_code_real_type *reloc, char *str, const struct percent_op_match *percent_op) { size_t reloc_index; - unsigned crux_depth, str_depth, regno; + unsigned crux_depth, str_depth; + bool orig_probing = probing_insn_operands; char *crux; - /* First, check for integer registers. No callers can accept a reg, but - we need to avoid accidentally creating a useless undefined symbol below, - if this is an instruction pattern that can't match. A glibc build fails - if this is removed. */ - if (reg_lookup (&str, RCLASS_GPR, ®no)) - { - ep->X_op = O_register; - ep->X_add_number = regno; - expr_parse_end = str; - return 0; - } - /* Search for the start of the main expression. End the loop with CRUX pointing to the start of the main expression and @@ -2288,9 +2279,17 @@ my_getSmallExpression (expressionS *ep, bfd_reloc_code_real_type *reloc, return 0; } + /* Anything inside parentheses or subject to a relocation operator cannot + be a register and hence can be treated the same as operands to + directives (other than .insn). */ + if (str_depth || reloc_index) + probing_insn_operands = false; + my_getExpression (ep, crux); str = expr_parse_end; + probing_insn_operands = orig_probing; + /* Match every open bracket. */ while (crux_depth > 0 && (*str == ')' || *str == ' ' || *str == '\t')) if (*str++ == ')') @@ -2476,6 +2475,13 @@ riscv_is_priv_insn (insn_t insn) || ((insn ^ MATCH_SFENCE_VM) & MASK_SFENCE_VM) == 0); } +static symbolS *deferred_sym_rootP; +static symbolS *deferred_sym_lastP; +/* Since symbols can't easily be freed, try to recycle ones which weren't + committed. */ +static symbolS *orphan_sym_rootP; +static symbolS *orphan_sym_lastP; + /* This routine assembles an instruction into its binary format. As a side effect, it sets the global variable imm_reloc to the type of relocation to do if one of the operands is an address expression. */ @@ -2511,6 +2517,8 @@ riscv_ip (char *str, struct riscv_cl_insn *ip, expressionS *imm_expr, insn = (struct riscv_opcode *) str_hash_find (hash, str); + probing_insn_operands = true; + asargStart = asarg; for ( ; insn && insn->name && strcmp (insn->name, str) == 0; insn++) { @@ -2527,6 +2535,17 @@ riscv_ip (char *str, struct riscv_cl_insn *ip, expressionS *imm_expr, /* Reset error message of the previous round. */ error.msg = _("illegal operands"); error.missing_ext = NULL; + + /* Purge deferred symbols from the previous round, if any. */ + while (deferred_sym_rootP) + { + symbolS *sym = deferred_sym_rootP; + + symbol_remove (sym, &deferred_sym_rootP, &deferred_sym_lastP); + symbol_append (sym, orphan_sym_lastP, &orphan_sym_rootP, + &orphan_sym_lastP); + } + create_insn (ip, insn); imm_expr->X_op = O_absent; @@ -2581,9 +2600,22 @@ riscv_ip (char *str, struct riscv_cl_insn *ip, expressionS *imm_expr, } if (*asarg != '\0') break; + /* Successful assembly. */ error.msg = NULL; insn_with_csr = false; + + /* Commit deferred symbols, if any. */ + while (deferred_sym_rootP) + { + symbolS *sym = deferred_sym_rootP; + + symbol_remove (sym, &deferred_sym_rootP, + &deferred_sym_lastP); + symbol_append (sym, symbol_lastP, &symbol_rootP, + &symbol_lastP); + symbol_table_insert (sym); + } goto out; case 'C': /* RVC */ @@ -2787,8 +2819,6 @@ riscv_ip (char *str, struct riscv_cl_insn *ip, expressionS *imm_expr, case 'p': goto branch; case 'a': - if (oparg == insn->args + 1) - goto jump_check_gpr; goto jump; case 'S': /* Floating-point RS1 x8-x15. */ if (!reg_lookup (&asarg, RCLASS_FPR, ®no) @@ -3292,18 +3322,6 @@ riscv_ip (char *str, struct riscv_cl_insn *ip, expressionS *imm_expr, continue; case 'a': /* 20-bit PC-relative offset. */ - /* Like in my_getSmallExpression() we need to avoid emitting - a stray undefined symbol if the 1st JAL entry doesn't match, - but the 2nd (with 2 operands) might. */ - if (oparg == insn->args) - { - jump_check_gpr: - asargStart = asarg; - if (reg_lookup (&asarg, RCLASS_GPR, NULL) - && (*asarg == ',' || (ISSPACE (*asarg) && asarg[1] == ','))) - break; - asarg = asargStart; - } jump: my_getExpression (imm_expr, asarg); asarg = expr_parse_end; @@ -3542,6 +3560,8 @@ riscv_ip (char *str, struct riscv_cl_insn *ip, expressionS *imm_expr, if (save_c) *(asargStart - 1) = save_c; + probing_insn_operands = false; + return error; } @@ -3838,6 +3858,53 @@ riscv_after_parse_args (void) flag_dwarf_cie_version = 3; } +bool riscv_parse_name (const char *name, struct expressionS *ep, + enum expr_mode mode) +{ + unsigned int regno; + symbolS *sym; + + if (!probing_insn_operands) + return false; + + gas_assert (mode == expr_normal); + + regno = reg_lookup_internal (name, RCLASS_GPR); + if (regno == (unsigned int)-1) + return false; + + if (symbol_find (name) != NULL) + return false; + + /* Create a symbol without adding it to the symbol table yet. + Insertion will happen only once we commit to using the insn + we're probing operands for. */ + for (sym = deferred_sym_rootP; sym; sym = symbol_next (sym)) + if (strcmp (name, S_GET_NAME (sym)) == 0) + break; + if (!sym) + { + for (sym = orphan_sym_rootP; sym; sym = symbol_next (sym)) + if (strcmp (name, S_GET_NAME (sym)) == 0) + { + symbol_remove (sym, &orphan_sym_rootP, &orphan_sym_lastP); + break; + } + if (!sym) + sym = symbol_create (name, undefined_section, + &zero_address_frag, 0); + + symbol_append (sym, deferred_sym_lastP, &deferred_sym_rootP, + &deferred_sym_lastP); + } + + ep->X_op = O_symbol; + ep->X_add_symbol = sym; + ep->X_add_number = 0; + + return true; +} + long md_pcrel_from (fixS *fixP) { diff --git a/gas/config/tc-riscv.h b/gas/config/tc-riscv.h index 111492a330b..0c70c7d4739 100644 --- a/gas/config/tc-riscv.h +++ b/gas/config/tc-riscv.h @@ -123,6 +123,10 @@ extern void riscv_elf_final_processing (void); /* Adjust debug_line after relaxation. */ #define DWARF2_USE_FIXED_ADVANCE_PC 1 +#define md_parse_name(name, exp, mode, c) \ + riscv_parse_name (name, exp, mode) +bool riscv_parse_name (const char *, struct expressionS *, enum expr_mode); + #define md_finish riscv_md_finish #define CONVERT_SYMBOLIC_ATTRIBUTE riscv_convert_symbolic_attribute -- 2.30.2