X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fvc4%2Fvc4_qir.c;h=982e8298ae90b3c3b6cad55fb6713e5a85c0fe23;hb=ee69cfd11d34e6570c579c42f9cd8b5c8ea36bcf;hp=c43b9b60597fa63f83df5f8b09daef30dccd6fb1;hpb=dadc32ac8072cf78b405d1b54414e1f020b0de41;p=mesa.git diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index c43b9b60597..982e8298ae9 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -22,7 +22,6 @@ */ #include "util/u_memory.h" -#include "util/u_simple_list.h" #include "util/ralloc.h" #include "vc4_qir.h" @@ -36,10 +35,17 @@ struct qir_op_info { static const struct qir_op_info qir_op_info[] = { [QOP_MOV] = { "mov", 1, 1 }, + [QOP_FMOV] = { "fmov", 1, 1 }, + [QOP_MMOV] = { "mmov", 1, 1 }, [QOP_FADD] = { "fadd", 1, 2 }, [QOP_FSUB] = { "fsub", 1, 2 }, [QOP_FMUL] = { "fmul", 1, 2 }, [QOP_MUL24] = { "mul24", 1, 2 }, + [QOP_V8MULD] = {"v8muld", 1, 2 }, + [QOP_V8MIN] = {"v8min", 1, 2 }, + [QOP_V8MAX] = {"v8max", 1, 2 }, + [QOP_V8ADDS] = {"v8adds", 1, 2 }, + [QOP_V8SUBS] = {"v8subs", 1, 2 }, [QOP_FMIN] = { "fmin", 1, 2 }, [QOP_FMAX] = { "fmax", 1, 2 }, [QOP_FMINABS] = { "fminabs", 1, 2 }, @@ -58,51 +64,27 @@ static const struct qir_op_info qir_op_info[] = { [QOP_XOR] = { "xor", 1, 2 }, [QOP_NOT] = { "not", 1, 1 }, - [QOP_SF] = { "sf", 0, 1 }, - [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1 }, - [QOP_SEL_X_0_NC] = { "fsel_x_0_nc", 1, 1 }, - [QOP_SEL_X_0_ZS] = { "fsel_x_0_zs", 1, 1 }, - [QOP_SEL_X_0_ZC] = { "fsel_x_0_zc", 1, 1 }, - [QOP_SEL_X_Y_NS] = { "fsel_x_y_ns", 1, 2 }, - [QOP_SEL_X_Y_NC] = { "fsel_x_y_nc", 1, 2 }, - [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2 }, - [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2 }, - [QOP_RCP] = { "rcp", 1, 1 }, [QOP_RSQ] = { "rsq", 1, 1 }, - [QOP_EXP2] = { "exp2", 1, 2 }, - [QOP_LOG2] = { "log2", 1, 2 }, - [QOP_PACK_COLORS] = { "pack_colors", 1, 4 }, - [QOP_PACK_SCALED] = { "pack_scaled", 1, 2 }, - [QOP_VPM_WRITE] = { "vpm_write", 0, 1, true }, - [QOP_VPM_READ] = { "vpm_read", 0, 1, true }, - [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true }, - [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true }, - [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true }, - [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true }, + [QOP_EXP2] = { "exp2", 1, 1 }, + [QOP_LOG2] = { "log2", 1, 1 }, [QOP_TLB_COLOR_READ] = { "tlb_color_read", 1, 0 }, + [QOP_MS_MASK] = { "ms_mask", 0, 1, true }, [QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 }, - [QOP_FRAG_X] = { "frag_x", 1, 0 }, - [QOP_FRAG_Y] = { "frag_y", 1, 0 }, [QOP_FRAG_Z] = { "frag_z", 1, 0 }, [QOP_FRAG_W] = { "frag_w", 1, 0 }, - [QOP_FRAG_REV_FLAG] = { "frag_rev_flag", 1, 0 }, - [QOP_TEX_S] = { "tex_s", 0, 2 }, - [QOP_TEX_T] = { "tex_t", 0, 2 }, - [QOP_TEX_R] = { "tex_r", 0, 2 }, - [QOP_TEX_B] = { "tex_b", 0, 2 }, - [QOP_TEX_DIRECT] = { "tex_direct", 0, 2 }, + [QOP_TEX_S] = { "tex_s", 0, 2, true }, + [QOP_TEX_T] = { "tex_t", 0, 2, true }, + [QOP_TEX_R] = { "tex_r", 0, 2, true }, + [QOP_TEX_B] = { "tex_b", 0, 2, true }, + [QOP_TEX_DIRECT] = { "tex_direct", 0, 2, true }, [QOP_TEX_RESULT] = { "tex_result", 1, 0, true }, - [QOP_R4_UNPACK_A] = { "r4_unpack_a", 1, 1 }, - [QOP_R4_UNPACK_B] = { "r4_unpack_b", 1, 1 }, - [QOP_R4_UNPACK_C] = { "r4_unpack_c", 1, 1 }, - [QOP_R4_UNPACK_D] = { "r4_unpack_d", 1, 1 }, - [QOP_UNPACK_8A] = { "unpack_8a", 1, 1 }, - [QOP_UNPACK_8B] = { "unpack_8b", 1, 1 }, - [QOP_UNPACK_8C] = { "unpack_8c", 1, 1 }, - [QOP_UNPACK_8D] = { "unpack_8d", 1, 1 }, + + [QOP_LOAD_IMM] = { "load_imm", 0, 1 }, + + [QOP_BRANCH] = { "branch", 0, 0, true }, }; static const char * @@ -129,6 +111,22 @@ qir_get_op_nsrc(enum qop qop) */ bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst) +{ + switch (inst->dst.file) { + case QFILE_TLB_Z_WRITE: + case QFILE_TLB_COLOR_WRITE: + case QFILE_TLB_COLOR_WRITE_MS: + case QFILE_TLB_STENCIL_SETUP: + return true; + default: + break; + } + + return qir_op_info[inst->op].has_side_effects; +} + +bool +qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst) { /* We can dead-code eliminate varyings, because we only tell the VS * about the live ones at the end. But we have to preserve the @@ -137,32 +135,85 @@ qir_has_side_effects(struct vc4_compile *c, struct qinst *inst) */ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { if (inst->src[i].file == QFILE_VARY && - c->input_semantics[inst->src[i].index].semantic == 0xff) { + c->input_slots[inst->src[i].index].slot == 0xff) { return true; } + + if (inst->src[i].file == QFILE_VPM) + return true; } - return qir_op_info[inst->op].has_side_effects; + if (inst->dst.file == QFILE_VPM) + return true; + + return false; } bool -qir_depends_on_flags(struct qinst *inst) +qir_is_mul(struct qinst *inst) +{ + switch (inst->op) { + case QOP_MMOV: + case QOP_FMUL: + case QOP_MUL24: + case QOP_V8MULD: + case QOP_V8MIN: + case QOP_V8MAX: + case QOP_V8ADDS: + case QOP_V8SUBS: + return true; + default: + return false; + } +} + +bool +qir_is_float_input(struct qinst *inst) { switch (inst->op) { - case QOP_SEL_X_0_NS: - case QOP_SEL_X_0_NC: - case QOP_SEL_X_0_ZS: - case QOP_SEL_X_0_ZC: - case QOP_SEL_X_Y_NS: - case QOP_SEL_X_Y_NC: - case QOP_SEL_X_Y_ZS: - case QOP_SEL_X_Y_ZC: + case QOP_FMOV: + case QOP_FMUL: + case QOP_FADD: + case QOP_FSUB: + case QOP_FMIN: + case QOP_FMAX: + case QOP_FMINABS: + case QOP_FMAXABS: + case QOP_FTOI: return true; default: return false; } } +bool +qir_is_raw_mov(struct qinst *inst) +{ + return ((inst->op == QOP_MOV || + inst->op == QOP_FMOV || + inst->op == QOP_MMOV) && + inst->cond == QPU_COND_ALWAYS && + !inst->dst.pack && + !inst->src[0].pack); +} + +bool +qir_is_tex(struct qinst *inst) +{ + return inst->op >= QOP_TEX_S && inst->op <= QOP_TEX_DIRECT; +} + +bool +qir_depends_on_flags(struct qinst *inst) +{ + if (inst->op == QOP_BRANCH) { + return inst->cond != QPU_COND_BRANCH_ALWAYS; + } else { + return (inst->cond != QPU_COND_ALWAYS && + inst->cond != QPU_COND_NEVER); + } +} + bool qir_writes_r4(struct qinst *inst) { @@ -179,33 +230,106 @@ qir_writes_r4(struct qinst *inst) } } -bool -qir_reads_r4(struct qinst *inst) +uint8_t +qir_channels_written(struct qinst *inst) { - switch (inst->op) { - case QOP_R4_UNPACK_A: - case QOP_R4_UNPACK_B: - case QOP_R4_UNPACK_C: - case QOP_R4_UNPACK_D: - return true; - default: - return false; + if (qir_is_mul(inst)) { + switch (inst->dst.pack) { + case QPU_PACK_MUL_NOP: + case QPU_PACK_MUL_8888: + return 0xf; + case QPU_PACK_MUL_8A: + return 0x1; + case QPU_PACK_MUL_8B: + return 0x2; + case QPU_PACK_MUL_8C: + return 0x4; + case QPU_PACK_MUL_8D: + return 0x8; + } + } else { + switch (inst->dst.pack) { + case QPU_PACK_A_NOP: + case QPU_PACK_A_8888: + case QPU_PACK_A_8888_SAT: + case QPU_PACK_A_32_SAT: + return 0xf; + case QPU_PACK_A_8A: + case QPU_PACK_A_8A_SAT: + return 0x1; + case QPU_PACK_A_8B: + case QPU_PACK_A_8B_SAT: + return 0x2; + case QPU_PACK_A_8C: + case QPU_PACK_A_8C_SAT: + return 0x4; + case QPU_PACK_A_8D: + case QPU_PACK_A_8D_SAT: + return 0x8; + case QPU_PACK_A_16A: + case QPU_PACK_A_16A_SAT: + return 0x3; + case QPU_PACK_A_16B: + case QPU_PACK_A_16B_SAT: + return 0xc; + } } + unreachable("Bad pack field"); } static void -qir_print_reg(struct vc4_compile *c, struct qreg reg) +qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write) { - const char *files[] = { + static const char *files[] = { [QFILE_TEMP] = "t", [QFILE_VARY] = "v", [QFILE_UNIF] = "u", + [QFILE_TLB_COLOR_WRITE] = "tlb_c", + [QFILE_TLB_COLOR_WRITE_MS] = "tlb_c_ms", + [QFILE_TLB_Z_WRITE] = "tlb_z", + [QFILE_TLB_STENCIL_SETUP] = "tlb_stencil", + [QFILE_FRAG_X] = "frag_x", + [QFILE_FRAG_Y] = "frag_y", + [QFILE_FRAG_REV_FLAG] = "frag_rev_flag", }; - if (reg.file == QFILE_NULL) + switch (reg.file) { + + case QFILE_NULL: fprintf(stderr, "null"); - else + break; + + case QFILE_LOAD_IMM: + fprintf(stderr, "0x%08x (%f)", reg.index, uif(reg.index)); + break; + + case QFILE_SMALL_IMM: + if ((int)reg.index >= -16 && (int)reg.index <= 15) + fprintf(stderr, "%d", reg.index); + else + fprintf(stderr, "%f", uif(reg.index)); + break; + + case QFILE_VPM: + if (write) { + fprintf(stderr, "vpm"); + } else { + fprintf(stderr, "vpm%d.%d", + reg.index / 4, reg.index % 4); + } + break; + + case QFILE_TLB_COLOR_WRITE: + case QFILE_TLB_COLOR_WRITE_MS: + case QFILE_TLB_Z_WRITE: + case QFILE_TLB_STENCIL_SETUP: + fprintf(stderr, "%s", files[reg.file]); + break; + + default: fprintf(stderr, "%s%d", files[reg.file], reg.index); + break; + } if (reg.file == QFILE_UNIF && c->uniform_contents[reg.index] == QUNIFORM_CONSTANT) { @@ -218,24 +342,96 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg) void qir_dump_inst(struct vc4_compile *c, struct qinst *inst) { - fprintf(stderr, "%s ", qir_get_op_name(inst->op)); + fprintf(stderr, "%s", qir_get_op_name(inst->op)); + if (inst->op == QOP_BRANCH) + vc4_qpu_disasm_cond_branch(stderr, inst->cond); + else + vc4_qpu_disasm_cond(stderr, inst->cond); + if (inst->sf) + fprintf(stderr, ".sf"); + fprintf(stderr, " "); + + if (inst->op != QOP_BRANCH) { + qir_print_reg(c, inst->dst, true); + if (inst->dst.pack) { + if (inst->dst.pack) { + if (qir_is_mul(inst)) + vc4_qpu_disasm_pack_mul(stderr, inst->dst.pack); + else + vc4_qpu_disasm_pack_a(stderr, inst->dst.pack); + } + } + } - qir_print_reg(c, inst->dst); for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { fprintf(stderr, ", "); - qir_print_reg(c, inst->src[i]); + qir_print_reg(c, inst->src[i], false); + vc4_qpu_disasm_unpack(stderr, inst->src[i].pack); } } void qir_dump(struct vc4_compile *c) { - struct simple_node *node; - - foreach(node, &c->instructions) { - struct qinst *inst = (struct qinst *)node; - qir_dump_inst(c, inst); - fprintf(stderr, "\n"); + int ip = 0; + + qir_for_each_block(block, c) { + fprintf(stderr, "BLOCK %d:\n", block->index); + qir_for_each_inst(inst, block) { + if (c->temp_start) { + bool first = true; + + for (int i = 0; i < c->num_temps; i++) { + if (c->temp_start[i] != ip) + continue; + + if (first) { + first = false; + } else { + fprintf(stderr, ", "); + } + fprintf(stderr, "S%4d", i); + } + + if (first) + fprintf(stderr, " "); + else + fprintf(stderr, " "); + } + + if (c->temp_end) { + bool first = true; + + for (int i = 0; i < c->num_temps; i++) { + if (c->temp_end[i] != ip) + continue; + + if (first) { + first = false; + } else { + fprintf(stderr, ", "); + } + fprintf(stderr, "E%4d", i); + } + + if (first) + fprintf(stderr, " "); + else + fprintf(stderr, " "); + } + + qir_dump_inst(c, inst); + fprintf(stderr, "\n"); + ip++; + } + if (block->successors[1]) { + fprintf(stderr, "-> BLOCK %d, %d\n", + block->successors[0]->index, + block->successors[1]->index); + } else if (block->successors[0]) { + fprintf(stderr, "-> BLOCK %d\n", + block->successors[0]->index); + } } } @@ -246,6 +442,16 @@ qir_get_temp(struct vc4_compile *c) reg.file = QFILE_TEMP; reg.index = c->num_temps++; + reg.pack = 0; + + if (c->num_temps > c->defs_array_size) { + uint32_t old_size = c->defs_array_size; + c->defs_array_size = MAX2(old_size * 2, 16); + c->defs = reralloc(c, c->defs, struct qinst *, + c->defs_array_size); + memset(&c->defs[old_size], 0, + sizeof(c->defs[0]) * (c->defs_array_size - old_size)); + } return reg; } @@ -260,6 +466,7 @@ qir_inst(enum qop op, struct qreg dst, struct qreg src0, struct qreg src1) inst->src = calloc(2, sizeof(inst->src[0])); inst->src[0] = src0; inst->src[1] = src1; + inst->cond = QPU_COND_ALWAYS; return inst; } @@ -284,16 +491,90 @@ qir_inst4(enum qop op, struct qreg dst, return inst; } -void +static void qir_emit(struct vc4_compile *c, struct qinst *inst) { - insert_at_tail(&c->instructions, &inst->link); + list_addtail(&inst->link, &c->cur_block->instructions); +} + +/* Updates inst to write to a new temporary, emits it, and notes the def. */ +struct qreg +qir_emit_def(struct vc4_compile *c, struct qinst *inst) +{ + assert(inst->dst.file == QFILE_NULL); + + inst->dst = qir_get_temp(c); + + if (inst->dst.file == QFILE_TEMP) + c->defs[inst->dst.index] = inst; + + qir_emit(c, inst); + + return inst->dst; +} + +struct qinst * +qir_emit_nondef(struct vc4_compile *c, struct qinst *inst) +{ + if (inst->dst.file == QFILE_TEMP) + c->defs[inst->dst.index] = NULL; + + qir_emit(c, inst); + + return inst; } bool qir_reg_equals(struct qreg a, struct qreg b) { - return a.file == b.file && a.index == b.index; + return a.file == b.file && a.index == b.index && a.pack == b.pack; +} + +struct qblock * +qir_new_block(struct vc4_compile *c) +{ + struct qblock *block = rzalloc(c, struct qblock); + + list_inithead(&block->instructions); + + block->predecessors = _mesa_set_create(block, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + + block->index = c->next_block_index++; + + return block; +} + +void +qir_set_emit_block(struct vc4_compile *c, struct qblock *block) +{ + c->cur_block = block; + list_addtail(&block->link, &c->blocks); +} + +struct qblock * +qir_entry_block(struct vc4_compile *c) +{ + return list_first_entry(&c->blocks, struct qblock, link); +} + +struct qblock * +qir_exit_block(struct vc4_compile *c) +{ + return list_last_entry(&c->blocks, struct qblock, link); +} + +void +qir_link_blocks(struct qblock *predecessor, struct qblock *successor) +{ + _mesa_set_add(successor->predecessors, predecessor); + if (predecessor->successors[0]) { + assert(!predecessor->successors[1]); + predecessor->successors[1] = successor; + } else { + predecessor->successors[0] = successor; + } } struct vc4_compile * @@ -301,31 +582,60 @@ qir_compile_init(void) { struct vc4_compile *c = rzalloc(NULL, struct vc4_compile); - make_empty_list(&c->instructions); + list_inithead(&c->blocks); + qir_set_emit_block(c, qir_new_block(c)); c->output_position_index = -1; - c->output_clipvertex_index = -1; c->output_color_index = -1; c->output_point_size_index = -1; + c->output_sample_mask_index = -1; + + c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer, + _mesa_key_pointer_equal); return c; } void -qir_remove_instruction(struct qinst *qinst) +qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst) { - remove_from_list(&qinst->link); + if (qinst->dst.file == QFILE_TEMP) + c->defs[qinst->dst.index] = NULL; + + list_del(&qinst->link); free(qinst->src); free(qinst); } +struct qreg +qir_follow_movs(struct vc4_compile *c, struct qreg reg) +{ + int pack = reg.pack; + + while (reg.file == QFILE_TEMP && + c->defs[reg.index] && + (c->defs[reg.index]->op == QOP_MOV || + c->defs[reg.index]->op == QOP_FMOV || + c->defs[reg.index]->op == QOP_MMOV)&& + !c->defs[reg.index]->dst.pack && + !c->defs[reg.index]->src[0].pack) { + reg = c->defs[reg.index]->src[0]; + } + + reg.pack = pack; + return reg; +} + void qir_compile_destroy(struct vc4_compile *c) { - while (!is_empty_list(&c->instructions)) { - struct qinst *qinst = - (struct qinst *)first_elem(&c->instructions); - qir_remove_instruction(qinst); + qir_for_each_block(block, c) { + while (!list_empty(&block->instructions)) { + struct qinst *qinst = + list_first_entry(&block->instructions, + struct qinst, link); + qir_remove_instruction(c, qinst); + } } ralloc_free(c); @@ -343,6 +653,58 @@ qir_get_stage_name(enum qstage stage) return names[stage]; } +struct qreg +qir_uniform(struct vc4_compile *c, + enum quniform_contents contents, + uint32_t data) +{ + for (int i = 0; i < c->num_uniforms; i++) { + if (c->uniform_contents[i] == contents && + c->uniform_data[i] == data) { + return qir_reg(QFILE_UNIF, i); + } + } + + uint32_t uniform = c->num_uniforms++; + + if (uniform >= c->uniform_array_size) { + c->uniform_array_size = MAX2(MAX2(16, uniform + 1), + c->uniform_array_size * 2); + + c->uniform_data = reralloc(c, c->uniform_data, + uint32_t, + c->uniform_array_size); + c->uniform_contents = reralloc(c, c->uniform_contents, + enum quniform_contents, + c->uniform_array_size); + } + + c->uniform_contents[uniform] = contents; + c->uniform_data[uniform] = data; + + return qir_reg(QFILE_UNIF, uniform); +} + +void +qir_SF(struct vc4_compile *c, struct qreg src) +{ + struct qinst *last_inst = NULL; + + if (!list_empty(&c->cur_block->instructions)) + last_inst = (struct qinst *)c->cur_block->instructions.prev; + + /* We don't have any way to guess which kind of MOV is implied. */ + assert(!src.pack); + + if (src.file != QFILE_TEMP || + !c->defs[src.index] || + last_inst != c->defs[src.index]) { + last_inst = qir_MOV_dest(c, qir_reg(QFILE_NULL, 0), src); + last_inst = (struct qinst *)c->cur_block->instructions.prev; + } + last_inst->sf = true; +} + #define OPTPASS(func) \ do { \ bool stage_progress = func(c); \ @@ -353,6 +715,7 @@ qir_get_stage_name(enum qstage stage) "QIR opt pass %2d: %s progress\n", \ pass, #func); \ } \ + qir_validate(c); \ } \ } while (0) @@ -366,9 +729,12 @@ qir_optimize(struct vc4_compile *c) bool progress = false; OPTPASS(qir_opt_algebraic); - OPTPASS(qir_opt_cse); + OPTPASS(qir_opt_constant_folding); OPTPASS(qir_opt_copy_propagation); + OPTPASS(qir_opt_peephole_sf); OPTPASS(qir_opt_dead_code); + OPTPASS(qir_opt_small_immediates); + OPTPASS(qir_opt_vpm); if (!progress) break;