From 6ced0fa57f1ad308b8cdb0ad7ccb9dffb30ad107 Mon Sep 17 00:00:00 2001 From: Paul Berry Date: Sun, 21 Apr 2013 08:51:33 -0700 Subject: [PATCH] i965/gs: Add opcodes needed for EndPrimitive(). Reviewed-by: Ian Romanick Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_defines.h | 26 ++++++ src/mesa/drivers/dri/i965/brw_shader.cpp | 4 + src/mesa/drivers/dri/i965/brw_vec4.h | 2 + src/mesa/drivers/dri/i965/brw_vec4_emit.cpp | 88 +++++++++++++++++++++ 4 files changed, 120 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 007e7fb2185..e9e0c4a0e3a 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -847,6 +847,32 @@ enum opcode { * scratch reads and writes to operate correctly. */ GS_OPCODE_SET_DWORD_2_IMMED, + + /** + * Prepare the dst register for storage in the "Channel Mask" fields of a + * URB_WRITE message header. + * + * DWORD 4 of dst is shifted left by 4 bits, so that later, + * GS_OPCODE_SET_CHANNEL_MASKS can OR DWORDs 0 and 4 together to form the + * final channel mask. + * + * Note: since GS_OPCODE_SET_CHANNEL_MASKS ORs DWORDs 0 and 4 together to + * form the final channel mask, DWORDs 0 and 4 of the dst register must not + * have any extraneous bits set prior to execution of this opcode (that is, + * they should be in the range 0x0 to 0xf). + */ + GS_OPCODE_PREPARE_CHANNEL_MASKS, + + /** + * Set the "Channel Mask" fields of a URB_WRITE message header. + * + * - dst is the MRF containing the message header. + * + * - src.x is the channel mask, as prepared by + * GS_OPCODE_PREPARE_CHANNEL_MASKS. DWORDs 0 and 4 are OR'ed together to + * form the final channel mask. + */ + GS_OPCODE_SET_CHANNEL_MASKS, }; #define BRW_PREDICATE_NONE 0 diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index e7dbdbe8920..53364a5ba60 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -507,6 +507,10 @@ brw_instruction_name(enum opcode op) return "set_vertex_count"; case GS_OPCODE_SET_DWORD_2_IMMED: return "set_dword_2_immed"; + case GS_OPCODE_PREPARE_CHANNEL_MASKS: + return "prepare_channel_masks"; + case GS_OPCODE_SET_CHANNEL_MASKS: + return "set_channel_masks"; default: /* Yes, this leaks. It's in debug code, it should never occur, and if diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index c5101d39007..cba5cd4cc95 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -610,6 +610,8 @@ private: void generate_gs_set_vertex_count(struct brw_reg dst, struct brw_reg src); void generate_gs_set_dword_2_immed(struct brw_reg dst, struct brw_reg src); + void generate_gs_prepare_channel_masks(struct brw_reg dst); + void generate_gs_set_channel_masks(struct brw_reg dst, struct brw_reg src); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); void generate_scratch_write(vec4_instruction *inst, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp index bf04bd9881e..6916134c1ac 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp @@ -515,6 +515,86 @@ vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst, brw_pop_insn_state(p); } +void +vec4_generator::generate_gs_prepare_channel_masks(struct brw_reg dst) +{ + /* We want to left shift just DWORD 4 (the x component belonging to the + * second geometry shader invocation) by 4 bits. So generate the + * instruction: + * + * shl(1) dst.4<1>UD dst.4<0,1,0>UD 4UD { align1 WE_all } + */ + dst = suboffset(vec1(dst), 4); + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_SHL(p, dst, dst, brw_imm_ud(4)); + brw_pop_insn_state(p); +} + +void +vec4_generator::generate_gs_set_channel_masks(struct brw_reg dst, + struct brw_reg src) +{ + /* From p21 of volume 4 part 2 of the Ivy Bridge PRM (2.4.3.1 Message + * Header: M0.5): + * + * 15 Vertex 1 DATA [3] / Vertex 0 DATA[7] Channel Mask + * + * When Swizzle Control = URB_INTERLEAVED this bit controls Vertex 1 + * DATA[3], when Swizzle Control = URB_NOSWIZZLE this bit controls + * Vertex 0 DATA[7]. This bit is ANDed with the corresponding + * channel enable to determine the final channel enable. For the + * URB_READ_OWORD & URB_READ_HWORD messages, when final channel + * enable is 1 it indicates that Vertex 1 DATA [3] will be included + * in the writeback message. For the URB_WRITE_OWORD & + * URB_WRITE_HWORD messages, when final channel enable is 1 it + * indicates that Vertex 1 DATA [3] will be written to the surface. + * + * 0: Vertex 1 DATA [3] / Vertex 0 DATA[7] channel not included + * 1: Vertex DATA [3] / Vertex 0 DATA[7] channel included + * + * 14 Vertex 1 DATA [2] Channel Mask + * 13 Vertex 1 DATA [1] Channel Mask + * 12 Vertex 1 DATA [0] Channel Mask + * 11 Vertex 0 DATA [3] Channel Mask + * 10 Vertex 0 DATA [2] Channel Mask + * 9 Vertex 0 DATA [1] Channel Mask + * 8 Vertex 0 DATA [0] Channel Mask + * + * (This is from a section of the PRM that is agnostic to the particular + * type of shader being executed, so "Vertex 0" and "Vertex 1" refer to + * geometry shader invocations 0 and 1, respectively). Since we have the + * enable flags for geometry shader invocation 0 in bits 3:0 of DWORD 0, + * and the enable flags for geometry shader invocation 1 in bits 7:0 of + * DWORD 4, we just need to OR them together and store the result in bits + * 15:8 of DWORD 5. + * + * It's easier to get the EU to do this if we think of the src and dst + * registers as composed of 32 bytes each; then, we want to pick up the + * contents of bytes 0 and 16 from src, OR them together, and store them in + * byte 21. + * + * We can do that by the following EU instruction: + * + * or(1) dst.21<1>UB src<0,1,0>UB src.16<0,1,0>UB { align1 WE_all } + * + * Note: this relies on the source register having zeros in (a) bits 7:4 of + * DWORD 0 and (b) bits 3:0 of DWORD 4. We can rely on (b) because the + * source register was prepared by GS_OPCODE_PREPARE_CHANNEL_MASKS (which + * shifts DWORD 4 left by 4 bits), and we can rely on (a) because prior to + * the execution of GS_OPCODE_PREPARE_CHANNEL_MASKS, DWORDs 0 and 4 need to + * contain valid channel mask values (which are in the range 0x0-0xf). + */ + dst = retype(dst, BRW_REGISTER_TYPE_UB); + src = retype(src, BRW_REGISTER_TYPE_UB); + brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_OR(p, suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16)); + brw_pop_insn_state(p); +} + void vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index) @@ -1003,6 +1083,14 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, generate_gs_set_dword_2_immed(dst, src[0]); break; + case GS_OPCODE_PREPARE_CHANNEL_MASKS: + generate_gs_prepare_channel_masks(dst); + break; + + case GS_OPCODE_SET_CHANNEL_MASKS: + generate_gs_set_channel_masks(dst, src[0]); + break; + case SHADER_OPCODE_SHADER_TIME_ADD: brw_shader_time_add(p, src[0], SURF_INDEX_VEC4_SHADER_TIME); mark_surface_used(SURF_INDEX_VEC4_SHADER_TIME); -- 2.30.2