From ff5a51b377212532a0cf2acea37c6a5672893d8f Mon Sep 17 00:00:00 2001 From: "Guillermo E. Martinez" Date: Thu, 20 Apr 2023 16:37:01 +0200 Subject: [PATCH] gas: support for the BPF pseudo-c assembly syntax This patch adds support to the GNU assembler for an alternative assembly syntax used in BPF. This syntax is C-like and very unconventional for an assembly language, but it is generated by clang/llvm and is also used in inline asm templates in kernel code, so we ought to support it. After this patch, the assembler is able to parse instructions in both supported syntax: the normal assembly-like syntax and the pseudo-C syntax. Instruction formats can be mixed in the source program: the assembler recognizes the right syntax to use. gas/ChangeLog: 2023-04-20 Guillermo E. Martinez PR gas/29728 * config/tc-bpf.h (TC_EQUAL_IN_INSN): Define. * config/tc-bpf.c (LEX_IS_SYMBOL_COMPONENT): Define. (LEX_IS_WHITESPACE): Likewise. (LEX_IS_NEWLINE): Likewise. (LEX_IS_ARITHM_OP): Likewise. (LEX_IS_STAR): Likewise. (LEX_IS_CLSE_BR): Likewise. (LEX_IS_OPEN_BR): Likewise. (LEX_IS_EQUAL): Likewise. (LEX_IS_EXCLA): Likewise. (ST_EOI): Likewise. (MAX_TOKEN_SZ): Likewise. (init_pseudoc_lex): New function. (md_begin): Call init_pseudoc_lex. (valid_expr): New function. (build_bpf_non_generic_load): Likewise. (build_bpf_atomic_insn): Likewise. (build_bpf_jmp_insn): Likewise. (build_bpf_arithm_insn): Likewise. (build_bpf_endianness): Likewise. (build_bpf_load_store_insn): Likewise. (look_for_reserved_word): Likewise. (is_register): Likewise. (is_cast): Likewise. (get_token): Likewise. (bpf_pseudoc_to_normal_syntax): Likewise. (md_assemble): Try pseudo-C syntax if an instruction cannot be parsed. --- gas/ChangeLog | 32 + gas/config/tc-bpf.c | 1521 ++++++++++++++++++++++++++++++++++++++++++- gas/config/tc-bpf.h | 2 + 3 files changed, 1551 insertions(+), 4 deletions(-) diff --git a/gas/ChangeLog b/gas/ChangeLog index 80548d24ba6..e1cfcec4abf 100644 --- a/gas/ChangeLog +++ b/gas/ChangeLog @@ -1,3 +1,35 @@ +2023-04-20 Guillermo E. Martinez + + PR gas/29728 + * config/tc-bpf.h (TC_EQUAL_IN_INSN): Define. + * config/tc-bpf.c (LEX_IS_SYMBOL_COMPONENT): Define. + (LEX_IS_WHITESPACE): Likewise. + (LEX_IS_NEWLINE): Likewise. + (LEX_IS_ARITHM_OP): Likewise. + (LEX_IS_STAR): Likewise. + (LEX_IS_CLSE_BR): Likewise. + (LEX_IS_OPEN_BR): Likewise. + (LEX_IS_EQUAL): Likewise. + (LEX_IS_EXCLA): Likewise. + (ST_EOI): Likewise. + (MAX_TOKEN_SZ): Likewise. + (init_pseudoc_lex): New function. + (md_begin): Call init_pseudoc_lex. + (valid_expr): New function. + (build_bpf_non_generic_load): Likewise. + (build_bpf_atomic_insn): Likewise. + (build_bpf_jmp_insn): Likewise. + (build_bpf_arithm_insn): Likewise. + (build_bpf_endianness): Likewise. + (build_bpf_load_store_insn): Likewise. + (look_for_reserved_word): Likewise. + (is_register): Likewise. + (is_cast): Likewise. + (get_token): Likewise. + (bpf_pseudoc_to_normal_syntax): Likewise. + (md_assemble): Try pseudo-C syntax if an instruction cannot be + parsed. + 2023-04-18 mengqinggang * config/tc-loongarch.c (loongarch_fix_adjustable): Symbols with diff --git a/gas/config/tc-bpf.c b/gas/config/tc-bpf.c index 1f8b0cc2ede..171fc682806 100644 --- a/gas/config/tc-bpf.c +++ b/gas/config/tc-bpf.c @@ -28,13 +28,36 @@ #include "elf/common.h" #include "elf/bpf.h" #include "dwarf2dbg.h" +#include const char comment_chars[] = ";"; -const char line_comment_chars[] = "#"; +const char line_comment_chars[] = "#"; const char line_separator_chars[] = "`"; const char EXP_CHARS[] = "eE"; const char FLT_CHARS[] = "fFdD"; +static const char *invalid_expression; +static char pseudoc_lex[256]; +static const char symbol_chars[] = +"_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + +static const char arithm_op[] = "+-/<>%&|^"; + +static void init_pseudoc_lex (void); + +#define LEX_IS_SYMBOL_COMPONENT 1 +#define LEX_IS_WHITESPACE 2 +#define LEX_IS_NEWLINE 3 +#define LEX_IS_ARITHM_OP 4 +#define LEX_IS_STAR 6 +#define LEX_IS_CLSE_BR 7 +#define LEX_IS_OPEN_BR 8 +#define LEX_IS_EQUAL 9 +#define LEX_IS_EXCLA 10 + +#define ST_EOI 100 +#define MAX_TOKEN_SZ 100 + /* Like s_lcomm_internal in gas/read.c but the alignment string is allowed to be optional. */ @@ -158,6 +181,32 @@ md_show_usage (FILE * stream) } + +static void +init_pseudoc_lex (void) +{ + const char *p; + + for (p = symbol_chars; *p; ++p) + pseudoc_lex[(unsigned char) *p] = LEX_IS_SYMBOL_COMPONENT; + + pseudoc_lex[' '] = LEX_IS_WHITESPACE; + pseudoc_lex['\t'] = LEX_IS_WHITESPACE; + pseudoc_lex['\r'] = LEX_IS_WHITESPACE; + pseudoc_lex['\n'] = LEX_IS_NEWLINE; + pseudoc_lex['*'] = LEX_IS_STAR; + pseudoc_lex[')'] = LEX_IS_CLSE_BR; + pseudoc_lex['('] = LEX_IS_OPEN_BR; + pseudoc_lex[']'] = LEX_IS_CLSE_BR; + pseudoc_lex['['] = LEX_IS_OPEN_BR; + + for (p = arithm_op; *p; ++p) + pseudoc_lex[(unsigned char) *p] = LEX_IS_ARITHM_OP; + + pseudoc_lex['='] = LEX_IS_EQUAL; + pseudoc_lex['!'] = LEX_IS_EXCLA; +} + void md_begin (void) { @@ -196,6 +245,9 @@ md_begin (void) cgen_bitset_set (bpf_isa, ISA_EBPFLE); } + /* Ensure that lines can begin with '*' in BPF store pseudoc instruction. */ + lex_type['*'] |= LEX_BEGIN_NAME; + /* Set the machine number and endian. */ gas_cgen_cpu_desc = bpf_cgen_cpu_open (CGEN_CPU_OPEN_ENDIAN, target_big_endian ? @@ -212,6 +264,7 @@ md_begin (void) /* Set the machine type. */ bfd_default_set_arch_mach (stdoutput, bfd_arch_bpf, bfd_mach_bpf); + init_pseudoc_lex(); } valueT @@ -362,12 +415,1456 @@ md_apply_fix (fixS *fixP, valueT *valP, segT seg) gas_cgen_md_apply_fix (fixP, valP, seg); } +/* + The BPF pseudo grammar: + + instruction : bpf_alu_insn + | bpf_alu32_insn + | bpf_jump_insn + | bpf_load_store_insn + | bpf_load_store32_insn + | bpf_non_generic_load + | bpf_endianness_conv_insn + | bpf_64_imm_load_insn + | bpf_atomic_insn + ; + + bpf_alu_insn : BPF_REG bpf_alu_operator register_or_imm32 + ; + + bpf_alu32_insn : BPF_REG32 bpf_alu_operator register32_or_imm32 + ; + + bpf_jump_insn : BPF_JA offset + | IF BPF_REG bpf_jump_operator register_or_imm32 BPF_JA offset + | IF BPF_REG32 bpf_jump_operator register_or_imm32 BPF_JA offset + | BPF_CALL offset + | BPF_EXIT + ; + + bpf_load_store_insn : BPF_REG CHR_EQUAL bpf_size_cast BPF_CHR_OPEN_BR \ + register_and_offset BPF_CHR_CLSE_BR + | bpf_size_cast register_and_offset CHR_EQUAL BPF_REG + ; + + bpf_load_store32_insn : BPF_REG CHR_EQUAL bpf_size_cast BPF_CHR_OPEN_BR \ + register32_and_offset BPF_CHR_CLSE_BR + | bpf_size_cast register_and_offset CHR_EQUAL BPF_REG32 + ; + + bpf_non_generic_load : BPF_REG_R0 CHR_EQUAL bpf_size_cast BPF_LD BPF_CHR_OPEN_BR \ + imm32 BPF_CHR_CLSE_BR + ; + + bpf_endianness_conv_insn : BPF_REG_N bpf_endianness_mnem BPF_REG_N + ; + + bpf_64_imm_load_insn : BPF_REG imm64 BPF_LL + ; + + bpf_atomic_insn : BPF_LOCK bpf_size_cast_32_64 register_and_offset BPF_ADD BPF_REG + + register_and_offset : BPF_CHR_OPEN_BR BPF_REG offset BPF_CHR_CLSE_BR + ; + + register32_and_offset : BPF_CHR_OPEN_BR BPF_REG32 offset BPF_CHR_CLSE_BR + ; + + bpf_size_cast : CHR_START BPF_CHR_OPEN_BR bpf_size CHR_START BPF_CHR_CLSE_BR + ; + + bpf_size_cast_32_64 : CHR_START BPF_CHR_OPEN_BR bpf_size_cast_32_64 CHR_STAR BPF_CHR_CLSE_BR + ; + + bpf_size_32_64 : BPF_CAST_U32 + | BPF_CAST_U64 + ; + + bpf_size : BPF_CAST_U8 + | BPF_CAST_U16 + | BPF_CAST_U32 + | BPF_CAST_U64 + ; + + bpf_jump_operator : BPF_JEQ + | BPF_JGT + | BPF_JGE + | BPF_JNE + | BPF_JSGT + | BPF_JSGE + | BPF_JLT + | BPF_JLE + | BPF_JSLT + | BPF_JSLE + ; + + bpf_alu_operator : BPF_ADD + | BPF_SUB + | BPF_MUL + | BPF_DIV + | BPF_OR + | BPF_AND + | BPF_LSH + | BPF_RSH + | BPF_NEG + | BPF_MOD + | BPF_XOR + | BPF_ARSH + | CHR_EQUAL + ; + + bpf_endianness_mnem : BPF_LE16 + | BPF_LE32 + | BPF_LE64 + | BPF_BE16 + | BPF_BE32 + | BPF_BE64 + ; + + offset : BPF_EXPR + | BPF_SYMBOL + ; + + register_or_imm32 : BPF_REG + | expression + ; + + register32_or_imm32 : BPF_REG32 + | expression + ; + + imm32 : BPF_EXPR + | BPF_SYMBOL + ; + + imm64 : BPF_EXPR + | BPF_SYMBOL + ; + + register_or_expression : BPF_EXPR + | BPF_REG + ; + + BPF_EXPR : GAS_EXPR + +*/ + +enum bpf_token_type + { + /* Keep grouped to quickly access. */ + BPF_ADD, + BPF_SUB, + BPF_MUL, + BPF_DIV, + BPF_OR, + BPF_AND, + BPF_LSH, + BPF_RSH, + BPF_MOD, + BPF_XOR, + BPF_MOV, + BPF_ARSH, + BPF_NEG, + + BPF_REG, + + BPF_IF, + BPF_GOTO, + + /* Keep grouped to quickly access. */ + BPF_JEQ, + BPF_JGT, + BPF_JGE, + BPF_JLT, + BPF_JLE, + BPF_JSET, + BPF_JNE, + BPF_JSGT, + BPF_JSGE, + BPF_JSLT, + BPF_JSLE, + + BPF_SYMBOL, + BPF_CHR_CLSE_BR, + BPF_CHR_OPEN_BR, + + /* Keep grouped to quickly access. */ + BPF_CAST_U8, + BPF_CAST_U16, + BPF_CAST_U32, + BPF_CAST_U64, + + /* Keep grouped to quickly access. */ + BPF_LE16, + BPF_LE32, + BPF_LE64, + BPF_BE16, + BPF_BE32, + BPF_BE64, + + BPF_LOCK, + + BPF_IND_CALL, + BPF_LD, + BPF_LL, + BPF_EXPR, + BPF_UNKNOWN, + }; + +static int +valid_expr (const char *e, const char **end_expr) +{ + invalid_expression = NULL; + char *hold = input_line_pointer; + expressionS exp; + + input_line_pointer = (char *) e; + deferred_expression (&exp); + *end_expr = input_line_pointer; + input_line_pointer = hold; + + return invalid_expression == NULL; +} + +static char * +build_bpf_non_generic_load (char *src, enum bpf_token_type cast, + const char *imm32) +{ + char *bpf_insn; + static const char *cast_rw[] = {"b", "h", "w", "dw"}; + + bpf_insn = xasprintf ("%s%s%s %s%s%s%s", + "ld", + src ? "ind" : "abs", + cast_rw[cast - BPF_CAST_U8], + src ? "%" : "", + src ? src : "", + src ? "," : "", + imm32); + return bpf_insn; +} + +static char * +build_bpf_atomic_insn (char *dst, char *src, + enum bpf_token_type atomic_insn, + enum bpf_token_type cast, + const char *offset) +{ + char *bpf_insn; + static const char *cast_rw[] = {"w", "dw"}; + static const char *mnem[] = {"xadd"}; + + bpf_insn = xasprintf ("%s%s [%%%s%s%s],%%%s", mnem[atomic_insn - BPF_ADD], + cast_rw[cast - BPF_CAST_U32], dst, + *offset != '+' ? "+" : "", + offset, src); + return bpf_insn; +} + +static char * +build_bpf_jmp_insn (char *dst, char *src, + char *imm32, enum bpf_token_type op, + const char *sym, const char *offset) +{ + char *bpf_insn; + static const char *mnem[] = + { + "jeq", "jgt", "jge", "jlt", + "jle", "jset", "jne", "jsgt", + "jsge", "jslt", "jsle" + }; + + const char *in32 = (*dst == 'w' ? "32" : ""); + + *dst = 'r'; + if (src) + *src = 'r'; + + bpf_insn = xasprintf ("%s%s %%%s,%s%s,%s", + mnem[op - BPF_JEQ], in32, dst, + src ? "%" : "", + src ? src : imm32, + offset ? offset : sym); + return bpf_insn; +} + +static char * +build_bpf_arithm_insn (char *dst, char *src, + int load64, const char *imm32, + enum bpf_token_type type) +{ + char *bpf_insn; + static const char *mnem[] = + { + "add", "sub", "mul", "div", + "or", "and", "lsh", "rsh", + "mod", "xor", "mov", "arsh", + "neg", + }; + const char *in32 = (*dst == 'w' ? "32" : ""); + + *dst = 'r'; + if (src) + *src = 'r'; + + if (type == BPF_NEG) + bpf_insn = xasprintf ("%s%s %%%s", mnem[type - BPF_ADD], in32, dst); + else if (load64) + bpf_insn = xasprintf ("%s %%%s,%s", "lddw", dst, imm32); + else + bpf_insn = xasprintf ("%s%s %%%s,%s%s", mnem[type - BPF_ADD], + in32, dst, + src ? "%" : "", + src ? src: imm32); + return bpf_insn; +} + +static char * +build_bpf_endianness (char *dst, enum bpf_token_type endianness) +{ + char *bpf_insn; + static const char *size[] = {"16", "32", "64"}; + int be = 1; + + if (endianness == BPF_LE16 + || endianness == BPF_LE32 + || endianness == BPF_LE64) + be = 0; + + bpf_insn = xasprintf ("%s %%%s,%s", be ? "endbe" : "endle", + dst, be ? size[endianness - BPF_BE16] : size[endianness - BPF_LE16]); + return bpf_insn; +} + +static char * +build_bpf_load_store_insn (char *dst, char *src, + enum bpf_token_type cast, + const char *offset, int isload) +{ + char *bpf_insn; + static const char *cast_rw[] = {"b", "h", "w", "dw"}; + + *dst = *src = 'r'; + if (isload) + bpf_insn = xasprintf ("%s%s %%%s,[%%%s%s%s]", "ldx", + cast_rw[cast - BPF_CAST_U8], dst, src, + *offset != '+' ? "+" : "", + offset); + else + bpf_insn = xasprintf ("%s%s [%%%s%s%s],%%%s", "stx", + cast_rw[cast - BPF_CAST_U8], dst, + *offset != '+' ? "+" : "", + offset, src); + return bpf_insn; +} + +static int +look_for_reserved_word (const char *token, enum bpf_token_type *type) +{ + int i; + static struct + { + const char *name; + enum bpf_token_type type; + } reserved_words[] = + { + { + .name = "if", + .type = BPF_IF + }, + { + .name = "goto", + .type = BPF_GOTO + }, + { + .name = "le16", + .type = BPF_LE16 + }, + { + .name = "le32", + .type = BPF_LE32 + }, + { + .name = "le64", + .type = BPF_LE64 + }, + { + .name = "be16", + .type = BPF_BE16 + }, + { + .name = "be32", + .type = BPF_BE32 + }, + { + .name = "be64", + .type = BPF_BE64 + }, + { + .name = "lock", + .type = BPF_LOCK + }, + { + .name = "callx", + .type = BPF_IND_CALL + }, + { + .name = "skb", + .type = BPF_LD + }, + { + .name = "ll", + .type = BPF_LL + }, + { + .name = NULL, + } + }; + + for (i = 0; reserved_words[i].name; ++i) + if (*reserved_words[i].name == *token + && !strcmp (reserved_words[i].name, token)) + { + *type = reserved_words[i].type; + return 1; + } + + return 0; +} + +static int +is_register (const char *token, int len) +{ + if (token[0] == 'r' || token[0] == 'w') + if ((len == 2 && isdigit (token[1])) + || (len == 3 && token[1] == '1' && token[2] == '0')) + return 1; + + return 0; +} + +static enum bpf_token_type +is_cast (const char *token) +{ + static const char *cast_rw[] = {"u8", "u16", "u32", "u64"}; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE (cast_rw); ++i) + if (!strcmp (token, cast_rw[i])) + return BPF_CAST_U8 + i; + + return BPF_UNKNOWN; +} + +static enum bpf_token_type +get_token (const char **insn, char *token, size_t *tlen) +{ +#define GET() \ + (*str == '\0' \ + ? EOF \ + : *(unsigned char *)(str++)) + +#define UNGET() (--str) + +#define START_EXPR() \ + do \ + { \ + if (expr == NULL) \ + expr = str - 1; \ + } while (0) + +#define SCANNER_SKIP_WHITESPACE() \ + do \ + { \ + do \ + ch = GET (); \ + while (ch != EOF \ + && ((ch) == ' ' || (ch) == '\t')); \ + if (ch != EOF) \ + UNGET (); \ + } while (0) + + const char *str = *insn; + char ch, ch2 = 0; + enum bpf_token_type ttype = BPF_UNKNOWN; + size_t len = 0; + const char *expr = NULL; + const char *end_expr = NULL; + int state = 0; + int return_token = 0; + + while (1) + { + ch = GET (); + + if (ch == EOF || len > MAX_TOKEN_SZ) + break; + + switch (pseudoc_lex[(unsigned char) ch]) + { + case LEX_IS_WHITESPACE: + SCANNER_SKIP_WHITESPACE (); + return_token = 1; + + switch (state) + { + case 12: /* >' ' */ + ttype = BPF_JGT; + break; + + case 17: /* ==' ' */ + ttype = BPF_JEQ; + break; + + case 18: /* <' ' */ + ttype = BPF_JLT; + break; + + case 20: /* &' ' */ + ttype = BPF_JSET; + break; + + case 22: /* s<' '*/ + ttype = BPF_JSLT; + break; + + case 14: /* s> ' ' */ + ttype = BPF_JSGT; + break; + + case 16: /* =' ' */ + ttype = BPF_MOV; + break; + + default: + return_token = 0; + } + break; + + case LEX_IS_EXCLA: + token[len++] = ch; + state = 21; + break; + + case LEX_IS_ARITHM_OP: + if (state == 16) + { + /* ='-' is handle as '=' */ + UNGET (); + ttype = BPF_MOV; + return_token = 1; + break; + } + + START_EXPR(); + token[len++] = ch; + switch (ch) + { +#define BPF_ARITHM_OP(op, type) \ + case (op): \ + state = 6; \ + ttype = (type); \ + break; + + BPF_ARITHM_OP('+', BPF_ADD); + BPF_ARITHM_OP('-', BPF_SUB); + BPF_ARITHM_OP('*', BPF_MUL); + BPF_ARITHM_OP('/', BPF_DIV); + BPF_ARITHM_OP('|', BPF_OR); + BPF_ARITHM_OP('%', BPF_MOD); + BPF_ARITHM_OP('^', BPF_XOR); + + case '&': + state = 20; /* '&' */ + break; + + case '<': + switch (state) + { + case 0: + state = 18; /* '<' */ + break; + + case 18: + state = 19; /* <'<' */ + break; + + case 8: + state = 22; /* s'<' */ + break; + } + break; + + case '>': + switch (state) + { + case 0: + state = 12; /* '>' */ + break; + + case 12: + state = 13; /* >'>' */ + break; + + case 8: + state = 14; /* s'>' */ + break; + + case 14: + state = 15; /* s>'>' */ + break; + } + break; + } + break; + + case LEX_IS_STAR: + switch (state) + { + case 0: + token[len++] = ch; + START_EXPR (); + state = 2; /* '*', It could be the fist cast char. */ + break; + + case 16: /* ='*' Not valid token. */ + ttype = BPF_MOV; + return_token = 1; + UNGET (); + break; + + case 4: /* *(uXX'*' */ + token[len++] = ch; + state = 5; + break; + } + break; + + case LEX_IS_OPEN_BR: + START_EXPR (); + token[len++] = ch; + return_token = 1; + + switch (state) + { + case 2: + state = 3; /* *'(' second char of a cast or expr. */ + return_token = 0; + break; + + case 6: + if (valid_expr (expr, &end_expr)) + { + len = end_expr - expr; + memcpy (token, expr, len); + ttype = BPF_EXPR; + str = end_expr; + } + else + { + len = 0; + while (*invalid_expression) + token[len++] = *invalid_expression++; + + token[len] = 0; + ttype = BPF_UNKNOWN; + } + break; + + default: + ttype = BPF_CHR_OPEN_BR; + SCANNER_SKIP_WHITESPACE (); + ch2 = GET (); + + if ((isdigit (ch2) || ch2 == '(') + && valid_expr (expr, &end_expr)) + { + len = end_expr - expr; + memcpy (token, expr, len); + ttype = BPF_EXPR; + str = end_expr; + } + else + UNGET (); + } + break; + + case LEX_IS_CLSE_BR: + token[len++] = ch; + + if (state == 0) + { + ttype = BPF_CHR_CLSE_BR; + return_token = 1; + } + else if (state == 5) /* *(uXX*')' */ + return_token = 1; + break; + + case LEX_IS_EQUAL: + token[len++] = ch; + return_token = 1; + + switch (state) + { + case 0: + state = 16; /* '=' */ + return_token = 0; + break; + + case 16: + state = 17; /* ='=' */ + return_token = 0; + break; + + case 2: /* *'=' */ + ttype = BPF_MUL; + break; + + case 10: /* s>>'=' */ + ttype = BPF_ARSH; + break; + + case 12: /* >'=' */ + ttype = BPF_JGE; + break; + + case 13: /* >>'=' */ + ttype = BPF_RSH; + break; + + case 14: /* s>'=' */ + ttype = BPF_JSGE; + break; + + case 15: /* s>>'=' */ + ttype = BPF_ARSH; + break; + + case 18: /* <'=' */ + ttype = BPF_JLE; + break; + + case 19: /* <<'=' */ + ttype = BPF_LSH; + break; + + case 20: /* &'=' */ + ttype = BPF_AND; + break; + + case 21: /* !'=' */ + ttype = BPF_JNE; + break; + + case 22: /* s<'=' */ + ttype = BPF_JSLE; + break; + } + break; + + case LEX_IS_SYMBOL_COMPONENT: + return_token = 1; + + switch (state) + { + case 17: /* =='sym' */ + ttype = BPF_JEQ; + break; + + case 12: /* >'sym' */ + ttype = BPF_JGT; + break; + + case 18: /* <'sym' */ + ttype = BPF_JLT; + break; + + case 20: /* &'sym' */ + ttype = BPF_JSET; + break; + + case 14: /*s>'sym' */ + ttype = BPF_JSGT; + break; + + case 22: /* s<'sym' */ + ttype = BPF_JSLT; + break; + + case 16: /* ='sym' */ + ttype = BPF_MOV; + break; + + default: + return_token = 0; + } + + if (return_token) + { + UNGET (); + break; + } + + START_EXPR (); + token[len++] = ch; + + while ((ch2 = GET ()) != EOF) + { + int type; + + type = pseudoc_lex[(unsigned char) ch2]; + if (type != LEX_IS_SYMBOL_COMPONENT) + break; + token[len++] = ch2; + } + + if (ch2 != EOF) + UNGET (); + + if (state == 0) + { + if (len == 1 && ch == 's') + state = 8; /* signed instructions: 's' */ + else + { + ttype = BPF_SYMBOL; + if (is_register (token, len)) + ttype = BPF_REG; + else if (look_for_reserved_word (token, &ttype)) + ; + else if ((pseudoc_lex[(unsigned char) *token] == LEX_IS_ARITHM_OP + || *token == '(' || isdigit(*token)) + && valid_expr (expr, &end_expr)) + { + len = end_expr - expr; + token[len] = '\0'; + ttype = BPF_EXPR; + str = end_expr; + } + + return_token = 1; + } + } + else if (state == 3) /* *('sym' */ + { + if ((ttype = is_cast (&token[2])) != BPF_UNKNOWN) + state = 4; /* *('uXX' */ + else + { + ttype = BPF_EXPR; + return_token = 1; + } + } + else if (state == 6) + { + if (ttype == BPF_SUB) /* neg */ + { + if (is_register (&token[1], len - 1)) + ttype = BPF_NEG; + else if (valid_expr(expr, &end_expr)) + { + len = end_expr - expr; + memcpy(token, expr, len); + ttype = BPF_EXPR; + str = end_expr; + } + else + { + len = 0; + while (*invalid_expression) + token[len++] = *invalid_expression++; + token[len] = 0; + ttype = BPF_UNKNOWN; + } + } + else if (valid_expr (expr, &end_expr)) + { + len = end_expr - expr; + memcpy(token, expr, len); + ttype = BPF_EXPR; + str = end_expr; + } + else + ttype = BPF_UNKNOWN; + + return_token = 1; + } + break; + } + + if (return_token) + { + *tlen = len; + *insn = str; + break; + } + } + + return ttype; + +#undef GET +#undef UNGET +#undef START_EXPR +#undef SCANNER_SKIP_WHITESPACE +#undef BPF_ARITHM_OP +} + +/* + The parser represent a FSM for the grammar described above. So for example + the following rule: + + ` bpf_alu_insn : BPF_REG bpf_alu_operator register_or_imm32' + + Is parser as follows: + + 1. It starts in state 0. + + 2. Consumes next token, e.g: `BPF_REG' and set `state' variable to a + particular state to helps to identify, in this case, that a register + token has been read, a comment surrounded by a single quote in the + pseudo-c token is added along with the new `state' value to indicate + what the scanner has read, e.g.: + + state = 6; // dst_reg = str_cast ( 'src_reg' + + So, in `state 6' the scanner has consumed: a destination register + (BPF_REG), an equal character (BPF_MOV), a cast token (BPF_CAST), an + open parenthesis (BPF_CHR_OPEN_BR) and the source register (BPF_REG). + + 3. If the accumulated tokens represent a complete BPF pseudo-c syntax + instruction then, a validation of the terms is made, for example: if + the registers have the same sizes (32/64 bits), if a specific + destination register must be used, etc., after that, a builder: + build_bfp_{non_generic_load,atomic_insn,jmp_insn,arithm_insn,endianness,load_store_insn} + is invoked, internally, it translates the BPF pseudo-c instruction to + a BPF GAS instruction using the previous terms recollected by the + scanner. + + 4. If a successful build of BPF GAS instruction was done, a final + state is set to `ST_EOI' (End Of Instruction) meaning that is not + expecting for more tokens in such instruction. Otherwise if the + conditions to calling builder are not satisfied an error is emitted + and `parse_err' is set. +*/ + +static char * +bpf_pseudoc_to_normal_syntax (const char *str, char **errmsg) +{ +#define syntax_err(format, ...) \ + do \ + { \ + if (! parse_err) \ + { \ + parse_err = 1; \ + errbuf = xasprintf (format, ##__VA_ARGS__); \ + } \ + } while (0) + + enum bpf_token_type ttype; + enum bpf_token_type bpf_endianness, + bpf_atomic_insn; + enum bpf_token_type bpf_jmp_op = BPF_JEQ; /* Arbitrary. */ + enum bpf_token_type bpf_cast = BPF_CAST_U8; /* Arbitrary. */ + enum bpf_token_type bpf_arithm_op = BPF_ADD; /* Arbitrary. */ + char *bpf_insn = NULL; + char *errbuf = NULL; + char src_reg[3] = {0}; + char dst_reg[3] = {0}; + char str_imm32[40] = {0}; + char str_offset[40] = {0}; + char str_symbol[MAX_TOKEN_SZ] = {0}; + char token[MAX_TOKEN_SZ] = {0}; + int state = 0; + int parse_err = 0; + size_t tlen; + + while (*str) + { + ttype = get_token (&str, token, &tlen); + if (ttype == BPF_UNKNOWN || state == ST_EOI) + { + syntax_err ("unexpected token: '%s'", token); + break; + } + + switch (ttype) + { + case BPF_UNKNOWN: + case BPF_LL: + break; + + case BPF_REG: + switch (state) + { + case 0: + memcpy (dst_reg, token, tlen); + state = 1; /* 'dst_reg' */ + break; + + case 3: + /* dst_reg bpf_op 'src_reg' */ + memcpy (src_reg, token, tlen); + if (*dst_reg == *src_reg) + bpf_insn = build_bpf_arithm_insn (dst_reg, src_reg, 0, + NULL, bpf_arithm_op); + else + { + syntax_err ("different register sizes: '%s', '%s'", + dst_reg, src_reg); + break; + } + state = ST_EOI; + break; + + case 5: + memcpy (src_reg, token, tlen); + state = 6; /* dst_reg = str_cast ( 'src_reg' */ + break; + + case 9: + memcpy (dst_reg, token, tlen); + state = 10; /* str_cast ( 'dst_reg' */ + break; + + case 11: + /* str_cast ( dst_reg offset ) = 'src_reg' */ + memcpy (src_reg, token, tlen); + bpf_insn = build_bpf_load_store_insn (dst_reg, src_reg, + bpf_cast, str_offset, 0); + state = ST_EOI; + break; + + case 14: + memcpy (dst_reg, token, tlen); + state = 15; /* if 'dst_reg' */ + break; + + case 16: + memcpy (src_reg, token, tlen); + state = 17; /* if dst_reg jmp_op 'src_reg' */ + break; + + case 24: + /* dst_reg = endianness src_reg */ + memcpy (src_reg, token, tlen); + if (*dst_reg == 'r' && !strcmp (dst_reg, src_reg)) + bpf_insn = build_bpf_endianness (dst_reg, bpf_endianness); + else + syntax_err ("invalid operand for instruction: '%s'", token); + + state = ST_EOI; + break; + + case 28: + memcpy (dst_reg, token, tlen); + state = 29; /* lock str_cast ( 'dst_reg' */ + break; + + case 32: + { + /* lock str_cast ( dst_reg offset ) atomic_insn 'src_reg' */ + int with_offset = *str_offset != '\0'; + + memcpy (src_reg, token, tlen); + if ((bpf_cast != BPF_CAST_U32 + && bpf_cast != BPF_CAST_U64) + || *dst_reg != 'r' + || *src_reg != 'r') + syntax_err ("invalid wide atomic instruction"); + else + bpf_insn = build_bpf_atomic_insn (dst_reg, src_reg, bpf_atomic_insn, + bpf_cast, with_offset ? str_offset : str_symbol); + } + + state = ST_EOI; + break; + + case 33: + /* callx 'dst_reg' */ + bpf_insn = xasprintf ("%s %%%s", "call", token); + state = ST_EOI; + break; + + case 35: + memcpy (src_reg, token, tlen); + state = 36; /* dst_reg = str_cast skb [ 'src_reg' */ + break; + } + break; + + case BPF_MOV: + case BPF_ADD: + case BPF_SUB: + case BPF_MUL: + case BPF_DIV: + case BPF_OR: + case BPF_AND: + case BPF_LSH: + case BPF_RSH: + case BPF_MOD: + case BPF_XOR: + case BPF_ARSH: + case BPF_NEG: + switch (state) + { + case 1: + state = 3; /* dst_reg 'arith_op' */ + bpf_arithm_op = ttype; + break; + + case 3: + if (ttype == BPF_NEG) + { + /* reg = -reg */ + bpf_arithm_op = ttype; + memcpy (src_reg, token + 1, tlen - 1); + if (strcmp (dst_reg, src_reg)) + { + syntax_err ("found: '%s', expected: -%s", token, dst_reg); + break; + } + + bpf_insn = build_bpf_arithm_insn (dst_reg, src_reg, 0, + NULL, bpf_arithm_op); + state = ST_EOI; + } + break; + + case 23: + memcpy (src_reg, token, tlen); + state = 11; /* str_cast ( dst_reg offset ) '=' */ + break; + + case 12: + if (ttype == BPF_MOV) + state = 13; /* str_cast ( dst_reg offset ) '=' */ + break; + + case 31: + bpf_atomic_insn = ttype; + state = 32; /* lock str_cast ( dst_reg offset ) 'atomic_insn' */ + break; + + default: + syntax_err ("unexpected '%s'", token); + state = ST_EOI; + } + break; + + case BPF_CAST_U8: + case BPF_CAST_U16: + case BPF_CAST_U32: + case BPF_CAST_U64: + bpf_cast = ttype; + switch (state) + { + case 3: + state = 4; /* dst_reg = 'str_cast' */ + break; + + case 0: + state = 8; /* 'str_cast' */ + break; + + case 26: + state = 27; /* lock 'str_cast' */ + break; + } + break; + + case BPF_CHR_OPEN_BR: + switch (state) + { + case 4: + state = 5; /* dst_reg = str_cast '(' */ + break; + + case 8: + state = 9; /* str_cast '(' */ + break; + + case 27: + state = 28; /* lock str_cast '(' */ + break; + + case 34: + state = 35; /* dst_reg = str_cast skb '[' */ + break; + } + break; + + case BPF_CHR_CLSE_BR: + switch (state) + { + case 7: + /* dst_reg = str_cast ( imm32 ')' */ + bpf_insn = build_bpf_load_store_insn (dst_reg, src_reg, + bpf_cast, str_imm32, 1); + state = ST_EOI; + break; + + case 11: + state = 12; /* str_cast ( dst_reg imm32 ')' */ + break; + + case 21: + /* dst_reg = str_cast ( src_reg offset ')' */ + bpf_insn = build_bpf_load_store_insn (dst_reg, src_reg, + bpf_cast, str_offset, 1); + state = ST_EOI; + break; + + case 22: + state = 23; /* str_cast ( dst_reg offset ')' */ + break; + + case 30: + state = 31; /* lock str_cast ( dst_reg offset ')' */ + break; + + case 37: + /* dst_reg = str_cast skb [ src_reg imm32 ']' */ + if (*dst_reg != 'w' && !strcmp ("r0", dst_reg)) + bpf_insn = build_bpf_non_generic_load (*src_reg != '\0' ? src_reg : NULL, + bpf_cast, str_imm32); + else + syntax_err ("invalid register operand: '%s'", dst_reg); + + state = ST_EOI; + break; + } + break; + + case BPF_EXPR: + switch (state) + { + case 3: + { + /* dst_reg bpf_arithm_op 'imm32' */ + int load64 = 0; + + memcpy (str_imm32, token, tlen); + memset (token, 0, tlen); + + if ((ttype = get_token (&str, token, &tlen)) == BPF_LL + && bpf_arithm_op == BPF_MOV) + load64 = 1; + else if (ttype != BPF_UNKNOWN) + syntax_err ("unexpected token: '%s'", token); + + if (load64 && *dst_reg == 'w') + syntax_err ("unexpected register size: '%s'", dst_reg); + + if (! parse_err) + bpf_insn = build_bpf_arithm_insn (dst_reg, NULL, load64, + str_imm32, bpf_arithm_op); + state = ST_EOI; + } + break; + + case 18: + { + /* if dst_reg jmp_op src_reg goto 'offset' */ + int with_src = *src_reg != '\0'; + + memcpy (str_offset, token, tlen); + if (with_src && *dst_reg != *src_reg) + syntax_err ("different register size: '%s', '%s'", + dst_reg, src_reg); + else + bpf_insn = build_bpf_jmp_insn (dst_reg, with_src ? src_reg : NULL, + with_src ? NULL: str_imm32, + bpf_jmp_op, NULL, str_offset); + state = ST_EOI; + } + break; + + case 19: + /* goto 'offset' */ + memcpy (str_offset, token, tlen); + bpf_insn = xasprintf ("%s %s", "ja", str_offset); + state = ST_EOI; + break; + + case 6: + memcpy (str_offset, token, tlen); + state = 21; /* dst_reg = str_cast ( src_reg 'offset' */ + break; + + case 10: + memcpy (str_offset, token, tlen); + state = 22; /* str_cast ( dst_reg 'offset' */ + break; + + case 16: + memcpy (str_imm32, token, tlen); + state = 25; /* if dst_reg jmp_op 'imm32' */ + break; + + case 29: + memcpy (str_offset, token, tlen); + state = 30; /* lock str_cast ( dst_reg 'offset' */ + break; + + case 34: + /* dst_reg = str_cast skb 'imm32' */ + if (*dst_reg != 'w' && !strcmp ("r0", dst_reg)) + { + memcpy (str_imm32, token, tlen); + bpf_insn = build_bpf_non_generic_load (*src_reg != '\0' ? src_reg : NULL, + bpf_cast, str_imm32); + } + else + syntax_err ("invalid register operand: '%s'", dst_reg); + + state = ST_EOI; + break; + + case 36: + memcpy (str_imm32, token, tlen); + state = 37; /* dst_reg = str_cast skb [ src_reg 'imm32' */ + break; + } + break; + + case BPF_IF: + if (state == 0) + state = 14; + break; + + case BPF_JSGT: + case BPF_JSLT: + case BPF_JSLE: + case BPF_JSGE: + case BPF_JGT: + case BPF_JGE: + case BPF_JLE: + case BPF_JSET: + case BPF_JNE: + case BPF_JLT: + case BPF_JEQ: + if (state == 15) + { + bpf_jmp_op = ttype; + state = 16; /* if dst_reg 'jmp_op' */ + } + break; + + case BPF_GOTO: + switch (state) + { + case 17: + case 25: + state = 18; /* if dst_reg jmp_op src_reg|imm32 'goto' */ + break; + + case 0: + state = 19; + break; + } + break; + + case BPF_SYMBOL: + switch (state) + { + case 18: + { + /* if dst_reg jmp_op src_reg goto 'sym' */ + int with_src = *src_reg != '\0'; + + memcpy (str_symbol, token, tlen); + if (with_src && *dst_reg != *src_reg) + syntax_err ("different register size: '%s', '%s'", + dst_reg, src_reg); + else + bpf_insn = build_bpf_jmp_insn (dst_reg, with_src ? src_reg : NULL, + with_src ? NULL: str_imm32, + bpf_jmp_op, str_symbol, NULL); + state = ST_EOI; + } + break; + + case 19: + /* goto 'sym' */ + memcpy (str_symbol, token, tlen); + bpf_insn = xasprintf ("%s %s", "ja", str_symbol); + state = ST_EOI; + break; + + case 0: + state = ST_EOI; + break; + + case 3: + { + /* dst_reg arithm_op 'sym' */ + int load64 = 0; + + memcpy (str_symbol, token, tlen); + memset (token, 0, tlen); + + if ((ttype = get_token (&str, token, &tlen)) == BPF_LL + && bpf_arithm_op == BPF_MOV) + load64 = 1; + else if (ttype != BPF_UNKNOWN) + syntax_err ("unexpected token: '%s'", token); + + if (load64 && *dst_reg == 'w') + syntax_err ("unexpected register size: '%s'", dst_reg); + + if (! parse_err) + bpf_insn = build_bpf_arithm_insn (dst_reg, NULL, load64, + str_symbol, bpf_arithm_op); + state = ST_EOI; + } + break; + } + break; + + case BPF_LE16: + case BPF_LE32: + case BPF_LE64: + case BPF_BE16: + case BPF_BE32: + case BPF_BE64: + bpf_endianness = ttype; + state = 24; /* dst_reg = 'endianness' */ + break; + + case BPF_LOCK: + state = 26; + break; + + case BPF_IND_CALL: + state = 33; + break; + + case BPF_LD: + state = 34; /* dst_reg = str_cast 'skb' */ + break; + } + + memset (token, 0, tlen); + } + + if (state != ST_EOI) + syntax_err ("incomplete instruction"); + + *errmsg = errbuf; + return bpf_insn; + +#undef syntax_err +} + void md_assemble (char *str) { const CGEN_INSN *insn; char *errmsg; + char *a_errmsg; CGEN_FIELDS fields; + char *normal; #if CGEN_INT_INSN_P CGEN_INSN_INT buffer[CGEN_MAX_INSN_SIZE / sizeof (CGEN_INT_INSN_P)]; @@ -378,11 +1875,26 @@ md_assemble (char *str) gas_cgen_init_parse (); insn = bpf_cgen_assemble_insn (gas_cgen_cpu_desc, str, &fields, buffer, &errmsg); - if (insn == NULL) { - as_bad ("%s", errmsg); - return; + normal = bpf_pseudoc_to_normal_syntax (str, &a_errmsg); + if (normal) + { + insn = bpf_cgen_assemble_insn (gas_cgen_cpu_desc, normal, &fields, + buffer, &a_errmsg); + xfree (normal); + } + + if (insn == NULL) + { + as_bad ("%s", errmsg); + if (a_errmsg) + { + as_bad ("%s", a_errmsg); + xfree (a_errmsg); + } + return; + } } gas_cgen_finish_insn (insn, buffer, CGEN_FIELDS_BITSIZE (&fields), @@ -393,6 +1905,7 @@ md_assemble (char *str) void md_operand (expressionS *expressionP) { + invalid_expression = input_line_pointer - 1; gas_cgen_md_operand (expressionP); } diff --git a/gas/config/tc-bpf.h b/gas/config/tc-bpf.h index 1f7d76762f6..db604dbe8bc 100644 --- a/gas/config/tc-bpf.h +++ b/gas/config/tc-bpf.h @@ -51,3 +51,5 @@ /* The Linux kernel verifier expects NOPs to be encoded in this way; a jump to offset 0 means jump to the next instruction. */ #define md_single_noop_insn "ja 0" + +#define TC_EQUAL_IN_INSN(c, s) 1 -- 2.30.2