From e26bdf56a479f0da63da09793acbb0bcebeb29e4 Mon Sep 17 00:00:00 2001 From: Anuj Phogat Date: Thu, 24 Oct 2013 16:21:13 -0700 Subject: [PATCH] i965: Add FS backend for builtin gl_SampleMask[] V2: - Update comments - Add a special backend instructions to compute sample_mask. - Add a new variable uses_omask in brw_wm_prog_data. V3: - Make changes to support simd16 mode. - Delete redundant AND instruction and handle the register stride in FS backend instruction. Signed-off-by: Anuj Phogat Reviewed-by: Paul Berry --- src/mesa/drivers/dri/i965/brw_context.h | 1 + src/mesa/drivers/dri/i965/brw_defines.h | 1 + src/mesa/drivers/dri/i965/brw_fs.h | 5 +++ .../drivers/dri/i965/brw_fs_generator.cpp | 42 +++++++++++++++++++ src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 12 ++++++ 5 files changed, 61 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 665807c8bf8..bec4d6b5541 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -384,6 +384,7 @@ struct brw_wm_prog_data { GLuint nr_pull_params; bool dual_src_blend; bool uses_pos_offset; + bool uses_omask; uint32_t prog_offset_16; /** diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index e03ac583bfe..bfea88a670f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -794,6 +794,7 @@ enum opcode { FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_MOV_DISPATCH_TO_FLAGS, FS_OPCODE_DISCARD_JUMP, + FS_OPCODE_SET_OMASK, FS_OPCODE_SET_SAMPLE_ID, FS_OPCODE_SET_SIMD4X2_OFFSET, FS_OPCODE_PACK_HALF_2x16_SPLIT, diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 0ed8c053634..e9bf52f1f11 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -443,6 +443,7 @@ public: struct hash_table *variable_ht; fs_reg frag_depth; + fs_reg sample_mask; fs_reg outputs[BRW_MAX_DRAW_BUFFERS]; unsigned output_components[BRW_MAX_DRAW_BUFFERS]; fs_reg dual_src_output; @@ -549,6 +550,10 @@ private: struct brw_reg offset); void generate_mov_dispatch_to_flags(fs_inst *inst); + void generate_set_omask(fs_inst *inst, + struct brw_reg dst, + struct brw_reg sample_mask); + void generate_set_sample_id(fs_inst *inst, struct brw_reg dst, struct brw_reg src0, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 25530983f66..d67c4e4b3ae 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1068,6 +1068,44 @@ fs_generator::generate_set_simd4x2_offset(fs_inst *inst, brw_pop_insn_state(p); } +/* Sets vstride=16, width=8, hstride=2 or vstride=0, width=1, hstride=0 + * (when mask is passed as a uniform) of register mask before moving it + * to register dst. + */ +void +fs_generator::generate_set_omask(fs_inst *inst, + struct brw_reg dst, + struct brw_reg mask) +{ + bool stride_8_8_1 = + (mask.vstride == BRW_VERTICAL_STRIDE_8 && + mask.width == BRW_WIDTH_8 && + mask.hstride == BRW_HORIZONTAL_STRIDE_1); + + bool stride_0_1_0 = + (mask.vstride == BRW_VERTICAL_STRIDE_0 && + mask.width == BRW_WIDTH_1 && + mask.hstride == BRW_HORIZONTAL_STRIDE_0); + + assert(stride_8_8_1 || stride_0_1_0); + assert(dst.type == BRW_REGISTER_TYPE_UW); + + if (dispatch_width == 16) + dst = vec16(dst); + brw_push_insn_state(p); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + + if (stride_8_8_1) { + brw_MOV(p, dst, stride(retype(brw_vec1_reg(mask.file, mask.nr, 0), + dst.type), 16, 8, 2)); + } else if (stride_0_1_0) { + brw_MOV(p, dst, stride(retype(brw_vec1_reg(mask.file, mask.nr, 0), + dst.type), 0, 1, 0)); + } + brw_pop_insn_state(p); +} + /* Sets vstride=1, width=4, hstride=0 of register src1 during * the ADD instruction. */ @@ -1666,6 +1704,10 @@ fs_generator::generate_code(exec_list *instructions) generate_set_simd4x2_offset(inst, dst, src[0]); break; + case FS_OPCODE_SET_OMASK: + generate_set_omask(inst, dst, src[0]); + break; + case FS_OPCODE_SET_SAMPLE_ID: generate_set_sample_id(inst, dst, src[0], src[1]); break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 09936374fd2..dd606718b66 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -82,6 +82,8 @@ fs_visitor::visit(ir_variable *ir) } } else if (ir->location == FRAG_RESULT_DEPTH) { this->frag_depth = *reg; + } else if (ir->location == FRAG_RESULT_SAMPLE_MASK) { + this->sample_mask = *reg; } else { /* gl_FragData or a user-defined FS output */ assert(ir->location >= FRAG_RESULT_DATA0 && @@ -2526,6 +2528,16 @@ fs_visitor::emit_fb_writes() pop_force_uncompressed(); } + c->prog_data.uses_omask = + fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK); + if(c->prog_data.uses_omask) { + this->current_annotation = "FB write oMask"; + assert(this->sample_mask.file != BAD_FILE); + /* Hand over gl_SampleMask. Only lower 16 bits are relevant. */ + emit(FS_OPCODE_SET_OMASK, fs_reg(MRF, nr, BRW_REGISTER_TYPE_UW), this->sample_mask); + nr += 1; + } + /* Reserve space for color. It'll be filled in per MRT below. */ int color_mrf = nr; nr += 4 * reg_width; -- 2.30.2