X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fvc4%2Fvc4_program.c;h=97cbabbd511b57d72f256d096c8c3277136f8d39;hb=314f0c57e4c00b0a5cb544fa43e356c1069acd8f;hp=242069803690d4e1cf40231fbbccfac7c5b03ed1;hpb=e8378fee0c20ecd26451c079c725420077606cb9;p=mesa.git diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 24206980369..97cbabbd511 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -24,7 +24,7 @@ #include #include "util/u_format.h" -#include "util/u_hash.h" +#include "util/crc32.h" #include "util/u_math.h" #include "util/u_memory.h" #include "util/ralloc.h" @@ -37,9 +37,7 @@ #include "vc4_context.h" #include "vc4_qpu.h" #include "vc4_qir.h" -#ifdef USE_VC4_SIMULATOR -#include "simpenrose/simpenrose.h" -#endif +#include "mesa/state_tracker/st_glsl_types.h" static struct qreg ntq_get_src(struct vc4_compile *c, nir_src src, int i); @@ -67,6 +65,23 @@ resize_qreg_array(struct vc4_compile *c, (*regs)[i] = c->undef; } +static void +ntq_emit_thrsw(struct vc4_compile *c) +{ + if (!c->fs_threaded) + return; + + /* Always thread switch after each texture operation for now. + * + * We could do better by batching a bunch of texture fetches up and + * then doing one thread switch and collecting all their results + * afterward. + */ + qir_emit_nondef(c, qir_inst(QOP_THRSW, c->undef, + c->undef, c->undef)); + c->last_thrsw_at_top_level = (c->execute.file == QFILE_NULL); +} + static struct qreg indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr) { @@ -107,6 +122,9 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr) qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0)); c->num_texture_samples++; + + ntq_emit_thrsw(c); + return qir_TEX_RESULT(c); } @@ -139,10 +157,33 @@ ntq_init_ssa_def(struct vc4_compile *c, nir_ssa_def *def) return qregs; } +/** + * This function is responsible for getting QIR results into the associated + * storage for a NIR instruction. + * + * If it's a NIR SSA def, then we just set the associated hash table entry to + * the new result. + * + * If it's a NIR reg, then we need to update the existing qreg assigned to the + * NIR destination with the incoming value. To do that without introducing + * new MOVs, we require that the incoming qreg either be a uniform, or be + * SSA-defined by the previous QIR instruction in the block and rewritable by + * this function. That lets us sneak ahead and insert the SF flag beforehand + * (knowing that the previous instruction doesn't depend on flags) and rewrite + * its destination to be the NIR reg's destination + */ static void ntq_store_dest(struct vc4_compile *c, nir_dest *dest, int chan, struct qreg result) { + struct qinst *last_inst = NULL; + if (!list_empty(&c->cur_block->instructions)) + last_inst = (struct qinst *)c->cur_block->instructions.prev; + + assert(result.file == QFILE_UNIF || + (result.file == QFILE_TEMP && + last_inst && last_inst == c->defs[result.index])); + if (dest->is_ssa) { assert(chan < dest->ssa.num_components); @@ -164,14 +205,34 @@ ntq_store_dest(struct vc4_compile *c, nir_dest *dest, int chan, _mesa_hash_table_search(c->def_ht, reg); struct qreg *qregs = entry->data; - /* Conditionally move the result to the destination if the - * channel is active. + /* Insert a MOV if the source wasn't an SSA def in the + * previous instruction. + */ + if (result.file == QFILE_UNIF) { + result = qir_MOV(c, result); + last_inst = c->defs[result.index]; + } + + /* We know they're both temps, so just rewrite index. */ + c->defs[last_inst->dst.index] = NULL; + last_inst->dst.index = qregs[chan].index; + + /* If we're in control flow, then make this update of the reg + * conditional on the execution mask. */ if (c->execute.file != QFILE_NULL) { + last_inst->dst.index = qregs[chan].index; + + /* Set the flags to the current exec mask. To insert + * the SF, we temporarily remove our SSA instruction. + */ + list_del(&last_inst->link); qir_SF(c, c->execute); - qir_MOV_cond(c, QPU_COND_ZS, qregs[chan], result); - } else { - qir_MOV_dest(c, qregs[chan], result); + list_addtail(&last_inst->link, + &c->cur_block->instructions); + + last_inst->cond = QPU_COND_ZS; + last_inst->cond_is_exec_mask = true; } } } @@ -322,22 +383,21 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr) qir_TEX_DIRECT(c, addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit)); + ntq_emit_thrsw(c); + struct qreg tex = qir_TEX_RESULT(c); c->num_texture_samples++; - struct qreg dest[4]; enum pipe_format format = c->key->tex[unit].format; if (util_format_is_depth_or_stencil(format)) { struct qreg scaled = ntq_scale_depth_texture(c, tex); for (int i = 0; i < 4; i++) - dest[i] = scaled; + ntq_store_dest(c, &instr->dest, i, qir_MOV(c, scaled)); } else { for (int i = 0; i < 4; i++) - dest[i] = qir_UNPACK_8_F(c, tex, i); + ntq_store_dest(c, &instr->dest, i, + qir_UNPACK_8_F(c, tex, i)); } - - for (int i = 0; i < 4; i++) - ntq_store_dest(c, &instr->dest, i, dest[i]); } static void @@ -379,6 +439,16 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) } } + if (c->stage != QSTAGE_FRAG && !is_txl) { + /* From the GLSL 1.20 spec: + * + * "If it is mip-mapped and running on the vertex shader, + * then the base texture is used." + */ + is_txl = true; + lod = qir_uniform_ui(c, 0); + } + if (c->key->tex[unit].force_first_level) { lod = qir_uniform(c, QUNIFORM_TEXTURE_FIRST_LEVEL, unit); is_txl = true; @@ -435,6 +505,9 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) qir_TEX_S(c, s, texture_u[next_texture_u++]); c->num_texture_samples++; + + ntq_emit_thrsw(c); + struct qreg tex = qir_TEX_RESULT(c); enum pipe_format format = c->key->tex[unit].format; @@ -447,6 +520,15 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) struct qreg u0 = qir_uniform_f(c, 0.0f); struct qreg u1 = qir_uniform_f(c, 1.0f); if (c->key->tex[unit].compare_mode) { + /* From the GL_ARB_shadow spec: + * + * "Let Dt (D subscript t) be the depth texture + * value, in the range [0, 1]. Let R be the + * interpolated texture coordinate clamped to the + * range [0, 1]." + */ + compare = qir_SAT(c, compare); + switch (c->key->tex[unit].compare_func) { case PIPE_FUNC_NEVER: depth_output = qir_uniform_f(c, 0.0f); @@ -501,8 +583,9 @@ ntq_ffract(struct vc4_compile *c, struct qreg src) struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src)); struct qreg diff = qir_FSUB(c, src, trunc); qir_SF(c, diff); - return qir_SEL(c, QPU_COND_NS, - qir_FADD(c, diff, qir_uniform_f(c, 1.0)), diff); + return qir_MOV(c, qir_SEL(c, QPU_COND_NS, + qir_FADD(c, diff, qir_uniform_f(c, 1.0)), + diff)); } /** @@ -519,8 +602,9 @@ ntq_ffloor(struct vc4_compile *c, struct qreg src) */ qir_SF(c, qir_FSUB(c, src, trunc)); - return qir_SEL(c, QPU_COND_NS, - qir_FSUB(c, trunc, qir_uniform_f(c, 1.0)), trunc); + return qir_MOV(c, qir_SEL(c, QPU_COND_NS, + qir_FSUB(c, trunc, qir_uniform_f(c, 1.0)), + trunc)); } /** @@ -537,8 +621,9 @@ ntq_fceil(struct vc4_compile *c, struct qreg src) */ qir_SF(c, qir_FSUB(c, trunc, src)); - return qir_SEL(c, QPU_COND_NS, - qir_FADD(c, trunc, qir_uniform_f(c, 1.0)), trunc); + return qir_MOV(c, qir_SEL(c, QPU_COND_NS, + qir_FADD(c, trunc, qir_uniform_f(c, 1.0)), + trunc)); } static struct qreg @@ -619,7 +704,7 @@ ntq_fsign(struct vc4_compile *c, struct qreg src) qir_MOV_dest(c, t, qir_uniform_f(c, 0.0)); qir_MOV_dest(c, t, qir_uniform_f(c, 1.0))->cond = QPU_COND_ZC; qir_MOV_dest(c, t, qir_uniform_f(c, -1.0))->cond = QPU_COND_NS; - return t; + return qir_MOV(c, t); } static void @@ -798,7 +883,7 @@ ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr) qir_PACK_8_F(c, result, src, i); } - ntq_store_dest(c, &instr->dest.dest, 0, result); + ntq_store_dest(c, &instr->dest.dest, 0, qir_MOV(c, result)); } /** Handles sign-extended bitfield extracts for 16 bits. */ @@ -904,6 +989,9 @@ ntq_emit_comparison(struct vc4_compile *c, struct qreg *dest, break; } + /* Make the temporary for nir_store_dest(). */ + *dest = qir_MOV(c, *dest); + return true; } @@ -917,6 +1005,8 @@ static struct qreg ntq_emit_bcsel(struct vc4_compile *c, nir_alu_instr *instr, { if (!instr->src[0].src.is_ssa) goto out; + if (instr->src[0].src.ssa->parent_instr->type != nir_instr_type_alu) + goto out; nir_alu_instr *compare = nir_instr_as_alu(instr->src[0].src.ssa->parent_instr); if (!compare) @@ -928,7 +1018,47 @@ static struct qreg ntq_emit_bcsel(struct vc4_compile *c, nir_alu_instr *instr, out: qir_SF(c, src[0]); - return qir_SEL(c, QPU_COND_NS, src[1], src[2]); + return qir_MOV(c, qir_SEL(c, QPU_COND_NS, src[1], src[2])); +} + +static struct qreg +ntq_fddx(struct vc4_compile *c, struct qreg src) +{ + /* Make sure that we have a bare temp to use for MUL rotation, so it + * can be allocated to an accumulator. + */ + if (src.pack || src.file != QFILE_TEMP) + src = qir_MOV(c, src); + + struct qreg from_left = qir_ROT_MUL(c, src, 1); + struct qreg from_right = qir_ROT_MUL(c, src, 15); + + /* Distinguish left/right pixels of the quad. */ + qir_SF(c, qir_AND(c, qir_reg(QFILE_QPU_ELEMENT, 0), + qir_uniform_ui(c, 1))); + + return qir_MOV(c, qir_SEL(c, QPU_COND_ZS, + qir_FSUB(c, from_right, src), + qir_FSUB(c, src, from_left))); +} + +static struct qreg +ntq_fddy(struct vc4_compile *c, struct qreg src) +{ + if (src.pack || src.file != QFILE_TEMP) + src = qir_MOV(c, src); + + struct qreg from_bottom = qir_ROT_MUL(c, src, 2); + struct qreg from_top = qir_ROT_MUL(c, src, 14); + + /* Distinguish top/bottom pixels of the quad. */ + qir_SF(c, qir_AND(c, + qir_reg(QFILE_QPU_ELEMENT, 0), + qir_uniform_ui(c, 2))); + + return qir_MOV(c, qir_SEL(c, QPU_COND_ZS, + qir_FSUB(c, from_top, src), + qir_FSUB(c, src, from_bottom))); } static void @@ -949,7 +1079,8 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) srcs[i] = ntq_get_src(c, instr->src[i].src, instr->src[i].swizzle[0]); for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) - ntq_store_dest(c, &instr->dest.dest, i, srcs[i]); + ntq_store_dest(c, &instr->dest.dest, i, + qir_MOV(c, srcs[i])); return; } @@ -1015,9 +1146,9 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) case nir_op_i2b: case nir_op_f2b: qir_SF(c, src[0]); - result = qir_SEL(c, QPU_COND_ZC, - qir_uniform_ui(c, ~0), - qir_uniform_ui(c, 0)); + result = qir_MOV(c, qir_SEL(c, QPU_COND_ZC, + qir_uniform_ui(c, ~0), + qir_uniform_ui(c, 0))); break; case nir_op_iadd: @@ -1081,7 +1212,7 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) break; case nir_op_fcsel: qir_SF(c, src[0]); - result = qir_SEL(c, QPU_COND_ZC, src[1], src[2]); + result = qir_MOV(c, qir_SEL(c, QPU_COND_ZC, src[1], src[2])); break; case nir_op_frcp: @@ -1157,6 +1288,18 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) result = qir_V8MULD(c, src[0], src[1]); break; + case nir_op_fddx: + case nir_op_fddx_coarse: + case nir_op_fddx_fine: + result = ntq_fddx(c, src[0]); + break; + + case nir_op_fddy: + case nir_op_fddy_coarse: + case nir_op_fddy_fine: + result = ntq_fddy(c, src[0]); + break; + default: fprintf(stderr, "unknown NIR ALU inst: "); nir_print_instr(&instr->instr, stderr); @@ -1183,7 +1326,7 @@ emit_frag_end(struct vc4_compile *c) } uint32_t discard_cond = QPU_COND_ALWAYS; - if (c->discard.file != QFILE_NULL) { + if (c->s->info->fs.uses_discard) { qir_SF(c, c->discard); discard_cond = QPU_COND_ZS; } @@ -1312,7 +1455,7 @@ emit_vert_end(struct vc4_compile *c, struct vc4_varying_slot *fs_inputs, uint32_t num_fs_inputs) { - struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]); + struct qreg rcp_w = ntq_rcp(c, c->outputs[c->output_position_index + 3]); emit_stub_vpm_read(c); @@ -1370,15 +1513,14 @@ vc4_optimize_nir(struct nir_shader *s) progress = false; NIR_PASS_V(s, nir_lower_vars_to_ssa); - NIR_PASS_V(s, nir_lower_alu_to_scalar); - NIR_PASS_V(s, nir_lower_phis_to_scalar); - + NIR_PASS(progress, s, nir_lower_alu_to_scalar); + NIR_PASS(progress, s, nir_lower_phis_to_scalar); NIR_PASS(progress, s, nir_copy_prop); NIR_PASS(progress, s, nir_opt_remove_phis); NIR_PASS(progress, s, nir_opt_dce); NIR_PASS(progress, s, nir_opt_dead_cf); NIR_PASS(progress, s, nir_opt_cse); - NIR_PASS(progress, s, nir_opt_peephole_select); + NIR_PASS(progress, s, nir_opt_peephole_select, 8); NIR_PASS(progress, s, nir_opt_algebraic); NIR_PASS(progress, s, nir_opt_constant_folding); NIR_PASS(progress, s, nir_opt_undef); @@ -1427,10 +1569,11 @@ ntq_setup_inputs(struct vc4_compile *c) if (c->stage == QSTAGE_FRAG) { if (var->data.location == VARYING_SLOT_POS) { emit_fragcoord_input(c, loc); - } else if (var->data.location >= VARYING_SLOT_VAR0 && - (c->fs_key->point_sprite_mask & - (1 << (var->data.location - - VARYING_SLOT_VAR0)))) { + } else if (var->data.location == VARYING_SLOT_PNTC || + (var->data.location >= VARYING_SLOT_VAR0 && + (c->fs_key->point_sprite_mask & + (1 << (var->data.location - + VARYING_SLOT_VAR0))))) { c->inputs[loc * 4 + 0] = c->point_x; c->inputs[loc * 4 + 1] = c->point_y; } else { @@ -1485,11 +1628,11 @@ static void ntq_setup_uniforms(struct vc4_compile *c) { nir_foreach_variable(var, &c->s->uniforms) { - unsigned array_len = MAX2(glsl_get_length(var->type), 1); - unsigned array_elem_size = 4 * sizeof(float); + uint32_t vec4_count = st_glsl_type_size(var->type); + unsigned vec4_size = 4 * sizeof(float); - declare_uniform_range(c, var->data.driver_location * array_elem_size, - array_len * array_elem_size); + declare_uniform_range(c, var->data.driver_location * vec4_size, + vec4_count * vec4_size); } } @@ -1632,12 +1775,12 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) } } ntq_store_dest(c, &instr->dest, 0, - c->color_reads[sample_index]); + qir_MOV(c, c->color_reads[sample_index])); } else { offset = nir_intrinsic_base(instr) + const_offset->u32[0]; int comp = nir_intrinsic_component(instr); ntq_store_dest(c, &instr->dest, 0, - c->inputs[offset * 4 + comp]); + qir_MOV(c, c->inputs[offset * 4 + comp])); } break; @@ -1667,15 +1810,33 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) break; case nir_intrinsic_discard: - c->discard = qir_uniform_ui(c, ~0); + if (c->execute.file != QFILE_NULL) { + qir_SF(c, c->execute); + qir_MOV_cond(c, QPU_COND_ZS, c->discard, + qir_uniform_ui(c, ~0)); + } else { + qir_MOV_dest(c, c->discard, qir_uniform_ui(c, ~0)); + } break; - case nir_intrinsic_discard_if: - if (c->discard.file == QFILE_NULL) - c->discard = qir_uniform_ui(c, 0); - c->discard = qir_OR(c, c->discard, + case nir_intrinsic_discard_if: { + /* true (~0) if we're discarding */ + struct qreg cond = ntq_get_src(c, instr->src[0], 0); + + if (c->execute.file != QFILE_NULL) { + /* execute == 0 means the channel is active. Invert + * the condition so that we can use zero as "executing + * and discarding." + */ + qir_SF(c, qir_AND(c, c->execute, qir_NOT(c, cond))); + qir_MOV_cond(c, QPU_COND_ZS, c->discard, cond); + } else { + qir_OR_dest(c, c->discard, c->discard, ntq_get_src(c, instr->src[0], 0)); + } + break; + } default: fprintf(stderr, "Unknown intrinsic: "); @@ -1706,11 +1867,9 @@ ntq_emit_if(struct vc4_compile *c, nir_if *if_stmt) return; } - nir_cf_node *nir_first_else_node = nir_if_first_else_node(if_stmt); - nir_cf_node *nir_last_else_node = nir_if_last_else_node(if_stmt); - nir_block *nir_else_block = nir_cf_node_as_block(nir_first_else_node); + nir_block *nir_else_block = nir_if_first_else_block(if_stmt); bool empty_else_block = - (nir_first_else_node == nir_last_else_node && + (nir_else_block == nir_if_last_else_block(if_stmt) && exec_list_is_empty(&nir_else_block->instr_list)); struct qblock *then_block = qir_new_block(c); @@ -1947,6 +2106,9 @@ ntq_emit_impl(struct vc4_compile *c, nir_function_impl *impl) static void nir_to_qir(struct vc4_compile *c) { + if (c->stage == QSTAGE_FRAG && c->s->info->fs.uses_discard) + c->discard = qir_MOV(c, qir_uniform_ui(c, 0)); + ntq_setup_inputs(c); ntq_setup_outputs(c); ntq_setup_uniforms(c); @@ -1969,8 +2131,16 @@ static const nir_shader_compiler_options nir_options = { .lower_fsat = true, .lower_fsqrt = true, .lower_negate = true, + .native_integers = true, }; +const void * +vc4_screen_get_compiler_options(struct pipe_screen *pscreen, + enum pipe_shader_ir ir, unsigned shader) +{ + return &nir_options; +} + static int count_nir_instrs(nir_shader *nir) { @@ -1988,7 +2158,7 @@ count_nir_instrs(nir_shader *nir) static struct vc4_compile * vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, - struct vc4_key *key) + struct vc4_key *key, bool fs_threaded) { struct vc4_compile *c = qir_compile_init(); @@ -1998,6 +2168,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, c->program_id = key->shader_state->program_id; c->variant_id = p_atomic_inc_return(&key->shader_state->compiled_variant_count); + c->fs_threaded = fs_threaded; c->key = key; switch (stage) { @@ -2115,6 +2286,17 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, switch (stage) { case QSTAGE_FRAG: + /* FS threading requires that the thread execute + * QPU_SIG_LAST_THREAD_SWITCH exactly once before terminating + * (with no other THRSW afterwards, obviously). If we didn't + * fetch a texture at a top level block, this wouldn't be + * true. + */ + if (c->fs_threaded && !c->last_thrsw_at_top_level) { + c->failed = true; + return c; + } + emit_frag_end(c); break; case QSTAGE_VERT: @@ -2179,14 +2361,24 @@ vc4_shader_state_create(struct pipe_context *pctx, so->program_id = vc4->next_uncompiled_program_id++; - if (vc4_debug & VC4_DEBUG_TGSI) { - fprintf(stderr, "prog %d TGSI:\n", - so->program_id); - tgsi_dump(cso->tokens, 0); - fprintf(stderr, "\n"); - } + nir_shader *s; - nir_shader *s = tgsi_to_nir(cso->tokens, &nir_options); + if (cso->type == PIPE_SHADER_IR_NIR) { + /* The backend takes ownership of the NIR shader on state + * creation. + */ + s = cso->ir.nir; + } else { + assert(cso->type == PIPE_SHADER_IR_TGSI); + + if (vc4_debug & VC4_DEBUG_TGSI) { + fprintf(stderr, "prog %d TGSI:\n", + so->program_id); + tgsi_dump(cso->tokens, 0); + fprintf(stderr, "\n"); + } + s = tgsi_to_nir(cso->tokens, &nir_options); + } NIR_PASS_V(s, nir_opt_global_to_local); NIR_PASS_V(s, nir_convert_to_ssa); @@ -2249,7 +2441,7 @@ vc4_setup_compiled_fs_inputs(struct vc4_context *vc4, struct vc4_compile *c, memset(input_live, 0, sizeof(input_live)); qir_for_each_inst_inorder(inst, c) { - for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { + for (int i = 0; i < qir_get_nsrc(inst); i++) { if (inst->src[i].file == QFILE_VARY) input_live[inst->src[i].index] = true; } @@ -2305,12 +2497,16 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, { struct hash_table *ht; uint32_t key_size; + bool try_threading; + if (stage == QSTAGE_FRAG) { ht = vc4->fs_cache; key_size = sizeof(struct vc4_fs_key); + try_threading = vc4->screen->has_threaded_fs; } else { ht = vc4->vs_cache; key_size = sizeof(struct vc4_vs_key); + try_threading = false; } struct vc4_compiled_shader *shader; @@ -2318,7 +2514,13 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, if (entry) return entry->data; - struct vc4_compile *c = vc4_shader_ntq(vc4, stage, key); + struct vc4_compile *c = vc4_shader_ntq(vc4, stage, key, try_threading); + /* If the FS failed to compile threaded, fall back to single threaded. */ + if (try_threading && c->failed) { + qir_compile_destroy(c); + c = vc4_shader_ntq(vc4, stage, key, false); + } + shader = rzalloc(NULL, struct vc4_compiled_shader); shader->program_id = vc4->next_compiled_program_id++; @@ -2327,7 +2529,7 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, /* Note: the temporary clone in c->s has been freed. */ nir_shader *orig_shader = key->shader_state->base.ir.nir; - if (orig_shader->info.outputs_written & (1 << FRAG_RESULT_DEPTH)) + if (orig_shader->info->outputs_written & (1 << FRAG_RESULT_DEPTH)) shader->disable_early_z = true; } else { shader->num_inputs = c->num_inputs; @@ -2342,9 +2544,17 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, } } - copy_uniform_state_to_shader(shader, c); - shader->bo = vc4_bo_alloc_shader(vc4->screen, c->qpu_insts, - c->qpu_inst_count * sizeof(uint64_t)); + shader->failed = c->failed; + if (c->failed) { + shader->failed = true; + } else { + copy_uniform_state_to_shader(shader, c); + shader->bo = vc4_bo_alloc_shader(vc4->screen, c->qpu_insts, + c->qpu_inst_count * + sizeof(uint64_t)); + } + + shader->fs_threaded = c->fs_threaded; /* Copy the compiler UBO range state to the compiled shader, dropping * out arrays that were never referenced by an indirect load. @@ -2382,7 +2592,7 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, qir_compile_destroy(c); struct vc4_key *dup_key; - dup_key = ralloc_size(shader, key_size); + dup_key = rzalloc_size(shader, key_size); /* TODO: don't use rzalloc */ memcpy(dup_key, key, key_size); _mesa_hash_table_insert(ht, dup_key, shader); @@ -2427,6 +2637,7 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key, static void vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode) { + struct vc4_job *job = vc4->job; struct vc4_fs_key local_key; struct vc4_fs_key *key = &local_key; @@ -2453,7 +2664,7 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode) } else { key->logicop_func = PIPE_LOGICOP_COPY; } - if (vc4->msaa) { + if (job->msaa) { key->msaa = vc4->rasterizer->base.multisample; key->sample_coverage = (vc4->rasterizer->base.multisample && vc4->sample_mask != (1 << VC4_MAX_SAMPLES) - 1); @@ -2546,11 +2757,15 @@ vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode) } } -void +bool vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode) { vc4_update_compiled_fs(vc4, prim_mode); vc4_update_compiled_vs(vc4, prim_mode); + + return !(vc4->prog.cs->failed || + vc4->prog.vs->failed || + vc4->prog.fs->failed); } static uint32_t