From 3f929efa2872aa5a4402520ec9fd551392e2413a Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Mon, 18 Jun 2012 14:50:04 -0700 Subject: [PATCH] i965/fs: Add FS_OPCODE_MOV_DISPATCH_TO_FLAGS to fragment shader backend. In order to compute centroid varyings correctly, the fragment shader needs to be able to load the current pixel/sample mask into a flag register. This patch adds an opcode to the fragment shader back-end to do this; the opcode gets translated into the instruction mov(1) f0<1>UW g1.14<0,1,0>UW { align1 WE_all } Since this instruction clobbers f0, instruction scheduling has to treat it the same as instructions that have a conditional modifier. Reviewed-by: Eric Anholt --- src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_fs.h | 1 + src/mesa/drivers/dri/i965/brw_fs_emit.cpp | 26 +++++++++++++++++++ .../dri/i965/brw_fs_schedule_instructions.cpp | 13 ++++++++-- 4 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 73a8c906692..3234b0e7419 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -675,6 +675,7 @@ enum opcode { FS_OPCODE_SPILL, FS_OPCODE_UNSPILL, FS_OPCODE_PULL_CONSTANT_LOAD, + FS_OPCODE_MOV_DISPATCH_TO_FLAGS, VS_OPCODE_URB_WRITE, VS_OPCODE_SCRATCH_READ, diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 18d0a9cefa3..ee067321f5b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -539,6 +539,7 @@ public: void generate_spill(fs_inst *inst, struct brw_reg src); void generate_unspill(fs_inst *inst, struct brw_reg dst); void generate_pull_constant_load(fs_inst *inst, struct brw_reg dst); + void generate_mov_dispatch_to_flags(); void emit_dummy_fs(); fs_reg *emit_fragcoord_interpolation(ir_variable *ir); diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 0881ad76c88..bfa62c31781 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -591,6 +591,27 @@ fs_visitor::generate_pull_constant_load(fs_inst *inst, struct brw_reg dst) } } + +/** + * Cause the current pixel/sample mask (from R1.7 bits 15:0) to be transferred + * into the flags register (f0.0). + * + * Used only on Gen6 and above. + */ +void +fs_visitor::generate_mov_dispatch_to_flags() +{ + struct brw_reg f0 = brw_flag_reg(); + struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); + + assert (intel->gen >= 6); + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, f0, g1); + brw_pop_insn_state(p); +} + + static uint32_t brw_file_from_reg(fs_reg *reg) { switch (reg->file) { @@ -928,6 +949,11 @@ fs_visitor::generate_code() case FS_OPCODE_FB_WRITE: generate_fb_write(inst); break; + + case FS_OPCODE_MOV_DISPATCH_TO_FLAGS: + generate_mov_dispatch_to_flags(); + break; + default: if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) { _mesa_problem(ctx, "Unsupported opcode `%s' in FS", diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index 910f3297d27..07c7c40d595 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -334,7 +334,11 @@ instruction_scheduler::calculate_deps() } } - if (inst->conditional_mod) { + /* Treat FS_OPCODE_MOV_DISPATCH_TO_FLAGS as though it had a + * conditional_mod, because it sets the flag register. + */ + if (inst->conditional_mod || + inst->opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) { add_dep(last_conditional_mod, n, 0); last_conditional_mod = n; } @@ -413,8 +417,13 @@ instruction_scheduler::calculate_deps() } } - if (inst->conditional_mod) + /* Treat FS_OPCODE_MOV_DISPATCH_TO_FLAGS as though it had a + * conditional_mod, because it sets the flag register. + */ + if (inst->conditional_mod || + inst->opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) { last_conditional_mod = n; + } } } -- 2.30.2