X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_eu_emit.c;h=f9f8d49a0d0b4f78b4430ef238e19069966990a3;hb=160848d8ef96cf3a760c02cc576df7dbffc1f669;hp=fe3a0299925f77c49edd50ec71c9e3b59119faa7;hpb=dbf3a15313eed930a3d8fdde12e457259c43651b;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index fe3a0299925..f9f8d49a0d0 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -34,31 +34,71 @@ #include "brw_defines.h" #include "brw_eu.h" - - +#include "glsl/ralloc.h" /*********************************************************************** * Internal helper for constructing instructions */ -static void guess_execution_size( struct brw_instruction *insn, - struct brw_reg reg ) +static void guess_execution_size(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg) { - if (reg.width == BRW_WIDTH_8 && - insn->header.compression_control == BRW_COMPRESSION_COMPRESSED) + if (reg.width == BRW_WIDTH_8 && p->compressed) insn->header.execution_size = BRW_EXECUTE_16; else insn->header.execution_size = reg.width; /* note - definitions are compatible */ } -static void brw_set_dest( struct brw_instruction *insn, - struct brw_reg dest ) +/** + * Prior to Sandybridge, the SEND instruction accepted non-MRF source + * registers, implicitly moving the operand to a message register. + * + * On Sandybridge, this is no longer the case. This function performs the + * explicit move; it should be called before emitting a SEND instruction. + */ +void +gen6_resolve_implied_move(struct brw_compile *p, + struct brw_reg *src, + GLuint msg_reg_nr) +{ + struct intel_context *intel = &p->brw->intel; + if (intel->gen < 6) + return; + + if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) { + brw_push_insn_state(p); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), + retype(*src, BRW_REGISTER_TYPE_UD)); + brw_pop_insn_state(p); + } + *src = brw_message_reg(msg_reg_nr); +} + +static void +gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg) +{ + struct intel_context *intel = &p->brw->intel; + if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) { + reg->file = BRW_GENERAL_REGISTER_FILE; + reg->nr += 111; + } +} + + +void +brw_set_dest(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg dest) { if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE && dest.file != BRW_MESSAGE_REGISTER_FILE) assert(dest.nr < 128); + gen7_convert_mrf_to_grf(p, &dest); + insn->bits1.da1.dest_reg_file = dest.file; insn->bits1.da1.dest_reg_type = dest.type; insn->bits1.da1.dest_address_mode = dest.address_mode; @@ -100,7 +140,7 @@ static void brw_set_dest( struct brw_instruction *insn, /* NEW: Set the execution size based on dest.width and * insn->compression_control: */ - guess_execution_size(insn, dest); + guess_execution_size(p, insn, dest); } extern int reg_type_size[]; @@ -181,12 +221,15 @@ validate_reg(struct brw_instruction *insn, struct brw_reg reg) /* 10. Check destination issues. */ } -static void brw_set_src0( struct brw_instruction *insn, - struct brw_reg reg ) +void +brw_set_src0(struct brw_compile *p, struct brw_instruction *insn, + struct brw_reg reg) { if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE) assert(reg.nr < 128); + gen7_convert_mrf_to_grf(p, ®); + validate_reg(insn, reg); insn->bits1.da1.src0_reg_file = reg.file; @@ -257,13 +300,16 @@ static void brw_set_src0( struct brw_instruction *insn, } -void brw_set_src1( struct brw_instruction *insn, - struct brw_reg reg ) +void brw_set_src1(struct brw_compile *p, + struct brw_instruction *insn, + struct brw_reg reg) { assert(reg.file != BRW_MESSAGE_REGISTER_FILE); assert(reg.nr < 128); + gen7_convert_mrf_to_grf(p, ®); + validate_reg(insn, reg); insn->bits1.da1.src1_reg_file = reg.file; @@ -326,7 +372,7 @@ void brw_set_src1( struct brw_instruction *insn, -static void brw_set_math_message( struct brw_context *brw, +static void brw_set_math_message( struct brw_compile *p, struct brw_instruction *insn, GLuint msg_length, GLuint response_length, @@ -336,8 +382,9 @@ static void brw_set_math_message( struct brw_context *brw, GLboolean saturate, GLuint dataType ) { + struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; - brw_set_src1(insn, brw_imm_d(0)); + brw_set_src1(p, insn, brw_imm_d(0)); if (intel->gen == 5) { insn->bits3.math_gen5.function = function; @@ -366,14 +413,15 @@ static void brw_set_math_message( struct brw_context *brw, } -static void brw_set_ff_sync_message(struct brw_context *brw, +static void brw_set_ff_sync_message(struct brw_compile *p, struct brw_instruction *insn, GLboolean allocate, GLuint response_length, GLboolean end_of_thread) { + struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; - brw_set_src1(insn, brw_imm_d(0)); + brw_set_src1(p, insn, brw_imm_d(0)); insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */ insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */ @@ -393,7 +441,7 @@ static void brw_set_ff_sync_message(struct brw_context *brw, } } -static void brw_set_urb_message( struct brw_context *brw, +static void brw_set_urb_message( struct brw_compile *p, struct brw_instruction *insn, GLboolean allocate, GLboolean used, @@ -404,11 +452,25 @@ static void brw_set_urb_message( struct brw_context *brw, GLuint offset, GLuint swizzle_control ) { + struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; - brw_set_src1(insn, brw_imm_d(0)); - - if (intel->gen >= 5) { - insn->bits3.urb_gen5.opcode = 0; /* ? */ + brw_set_src1(p, insn, brw_imm_d(0)); + + if (intel->gen == 7) { + insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */ + insn->bits3.urb_gen7.offset = offset; + assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE); + insn->bits3.urb_gen7.swizzle_control = swizzle_control; + /* per_slot_offset = 0 makes it ignore offsets in message header */ + insn->bits3.urb_gen7.per_slot_offset = 0; + insn->bits3.urb_gen7.complete = complete; + insn->bits3.urb_gen7.header_present = 1; + insn->bits3.urb_gen7.response_length = response_length; + insn->bits3.urb_gen7.msg_length = msg_length; + insn->bits3.urb_gen7.end_of_thread = end_of_thread; + insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB; + } else if (intel->gen >= 5) { + insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */ insn->bits3.urb_gen5.offset = offset; insn->bits3.urb_gen5.swizzle_control = swizzle_control; insn->bits3.urb_gen5.allocate = allocate; @@ -442,35 +504,48 @@ static void brw_set_urb_message( struct brw_context *brw, } } -static void brw_set_dp_write_message( struct brw_context *brw, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint msg_control, - GLuint msg_type, - GLuint msg_length, - GLboolean header_present, - GLuint pixel_scoreboard_clear, - GLuint response_length, - GLuint end_of_thread, - GLuint send_commit_msg) +void +brw_set_dp_write_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint msg_length, + GLboolean header_present, + GLuint pixel_scoreboard_clear, + GLuint response_length, + GLuint end_of_thread, + GLuint send_commit_msg) { + struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; - brw_set_src1(insn, brw_imm_ud(0)); - - if (intel->gen >= 6) { - insn->bits3.dp_render_cache.binding_table_index = binding_table_index; - insn->bits3.dp_render_cache.msg_control = msg_control; - insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear; - insn->bits3.dp_render_cache.msg_type = msg_type; - insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg; - insn->bits3.dp_render_cache.header_present = header_present; - insn->bits3.dp_render_cache.response_length = response_length; - insn->bits3.dp_render_cache.msg_length = msg_length; - insn->bits3.dp_render_cache.end_of_thread = end_of_thread; - insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE; - /* XXX really need below? */ - insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE; - insn->bits2.send_gen5.end_of_thread = end_of_thread; + brw_set_src1(p, insn, brw_imm_ud(0)); + + if (intel->gen >= 7) { + insn->bits3.gen7_dp.binding_table_index = binding_table_index; + insn->bits3.gen7_dp.msg_control = msg_control; + insn->bits3.gen7_dp.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.gen7_dp.msg_type = msg_type; + insn->bits3.gen7_dp.header_present = header_present; + insn->bits3.gen7_dp.response_length = response_length; + insn->bits3.gen7_dp.msg_length = msg_length; + insn->bits3.gen7_dp.end_of_thread = end_of_thread; + + /* We always use the render cache for write messages */ + insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE; + } else if (intel->gen == 6) { + insn->bits3.gen6_dp.binding_table_index = binding_table_index; + insn->bits3.gen6_dp.msg_control = msg_control; + insn->bits3.gen6_dp.pixel_scoreboard_clear = pixel_scoreboard_clear; + insn->bits3.gen6_dp.msg_type = msg_type; + insn->bits3.gen6_dp.send_commit_msg = send_commit_msg; + insn->bits3.gen6_dp.header_present = header_present; + insn->bits3.gen6_dp.response_length = response_length; + insn->bits3.gen6_dp.msg_length = msg_length; + insn->bits3.gen6_dp.end_of_thread = end_of_thread; + + /* We always use the render cache for write messages */ + insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE; } else if (intel->gen == 5) { insn->bits3.dp_write_gen5.binding_table_index = binding_table_index; insn->bits3.dp_write_gen5.msg_control = msg_control; @@ -496,20 +571,49 @@ static void brw_set_dp_write_message( struct brw_context *brw, } } -static void brw_set_dp_read_message( struct brw_context *brw, - struct brw_instruction *insn, - GLuint binding_table_index, - GLuint msg_control, - GLuint msg_type, - GLuint target_cache, - GLuint msg_length, - GLuint response_length, - GLuint end_of_thread ) +void +brw_set_dp_read_message(struct brw_compile *p, + struct brw_instruction *insn, + GLuint binding_table_index, + GLuint msg_control, + GLuint msg_type, + GLuint target_cache, + GLuint msg_length, + GLuint response_length) { + struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; - brw_set_src1(insn, brw_imm_d(0)); - - if (intel->gen == 5) { + brw_set_src1(p, insn, brw_imm_d(0)); + + if (intel->gen >= 7) { + insn->bits3.gen7_dp.binding_table_index = binding_table_index; + insn->bits3.gen7_dp.msg_control = msg_control; + insn->bits3.gen7_dp.pixel_scoreboard_clear = 0; + insn->bits3.gen7_dp.msg_type = msg_type; + insn->bits3.gen7_dp.header_present = 1; + insn->bits3.gen7_dp.response_length = response_length; + insn->bits3.gen7_dp.msg_length = msg_length; + insn->bits3.gen7_dp.end_of_thread = 0; + insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_CONST_CACHE; + } else if (intel->gen == 6) { + uint32_t target_function; + + if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE) + target_function = GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE; + else + target_function = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE; + + insn->bits3.gen6_dp.binding_table_index = binding_table_index; + insn->bits3.gen6_dp.msg_control = msg_control; + insn->bits3.gen6_dp.pixel_scoreboard_clear = 0; + insn->bits3.gen6_dp.msg_type = msg_type; + insn->bits3.gen6_dp.send_commit_msg = 0; + insn->bits3.gen6_dp.header_present = 1; + insn->bits3.gen6_dp.response_length = response_length; + insn->bits3.gen6_dp.msg_length = msg_length; + insn->bits3.gen6_dp.end_of_thread = 0; + insn->header.destreg__conditionalmod = target_function; + } else if (intel->gen == 5) { insn->bits3.dp_read_gen5.binding_table_index = binding_table_index; insn->bits3.dp_read_gen5.msg_control = msg_control; insn->bits3.dp_read_gen5.msg_type = msg_type; @@ -518,9 +622,19 @@ static void brw_set_dp_read_message( struct brw_context *brw, insn->bits3.dp_read_gen5.response_length = response_length; insn->bits3.dp_read_gen5.msg_length = msg_length; insn->bits3.dp_read_gen5.pad1 = 0; - insn->bits3.dp_read_gen5.end_of_thread = end_of_thread; + insn->bits3.dp_read_gen5.end_of_thread = 0; insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ; - insn->bits2.send_gen5.end_of_thread = end_of_thread; + insn->bits2.send_gen5.end_of_thread = 0; + } else if (intel->is_g4x) { + insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/ + insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/ + insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/ + insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/ + insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/ + insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/ + insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ + insn->bits3.dp_read_g4x.pad1 = 0; + insn->bits3.dp_read_g4x.end_of_thread = 0; } else { insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/ insn->bits3.dp_read.msg_control = msg_control; /*8:11*/ @@ -530,11 +644,11 @@ static void brw_set_dp_read_message( struct brw_context *brw, insn->bits3.dp_read.msg_length = msg_length; /*20:23*/ insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/ insn->bits3.dp_read.pad1 = 0; /*28:30*/ - insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/ + insn->bits3.dp_read.end_of_thread = 0; /*31*/ } } -static void brw_set_sampler_message(struct brw_context *brw, +static void brw_set_sampler_message(struct brw_compile *p, struct brw_instruction *insn, GLuint binding_table_index, GLuint sampler, @@ -545,11 +659,22 @@ static void brw_set_sampler_message(struct brw_context *brw, GLuint header_present, GLuint simd_mode) { + struct brw_context *brw = p->brw; struct intel_context *intel = &brw->intel; assert(eot == 0); - brw_set_src1(insn, brw_imm_d(0)); - - if (intel->gen >= 5) { + brw_set_src1(p, insn, brw_imm_d(0)); + + if (intel->gen >= 7) { + insn->bits3.sampler_gen7.binding_table_index = binding_table_index; + insn->bits3.sampler_gen7.sampler = sampler; + insn->bits3.sampler_gen7.msg_type = msg_type; + insn->bits3.sampler_gen7.simd_mode = simd_mode; + insn->bits3.sampler_gen7.header_present = header_present; + insn->bits3.sampler_gen7.response_length = response_length; + insn->bits3.sampler_gen7.msg_length = msg_length; + insn->bits3.sampler_gen7.end_of_thread = eot; + insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER; + } else if (intel->gen >= 5) { insn->bits3.sampler_gen5.binding_table_index = binding_table_index; insn->bits3.sampler_gen5.sampler = sampler; insn->bits3.sampler_gen5.msg_type = msg_type; @@ -585,9 +710,9 @@ static void brw_set_sampler_message(struct brw_context *brw, } - -static struct brw_instruction *next_insn( struct brw_compile *p, - GLuint opcode ) +#define next_insn brw_next_insn +struct brw_instruction * +brw_next_insn(struct brw_compile *p, GLuint opcode) { struct brw_instruction *insn; @@ -608,15 +733,14 @@ static struct brw_instruction *next_insn( struct brw_compile *p, return insn; } - static struct brw_instruction *brw_alu1( struct brw_compile *p, GLuint opcode, struct brw_reg dest, struct brw_reg src ) { struct brw_instruction *insn = next_insn(p, opcode); - brw_set_dest(insn, dest); - brw_set_src0(insn, src); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); return insn; } @@ -627,9 +751,9 @@ static struct brw_instruction *brw_alu2(struct brw_compile *p, struct brw_reg src1 ) { struct brw_instruction *insn = next_insn(p, opcode); - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_src1(insn, src1); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); return insn; } @@ -658,6 +782,8 @@ struct brw_instruction *brw_##OP(struct brw_compile *p, \ * stores a rounded value (possibly the wrong way) in the dest register, but * also sets a per-channel "increment bit" in the flag register. A predicated * add of 1.0 fixes dest to contain the desired result. + * + * Sandybridge and later appear to round correctly without an ADD. */ #define ROUND(OP) \ void brw_##OP(struct brw_compile *p, \ @@ -666,12 +792,15 @@ void brw_##OP(struct brw_compile *p, \ { \ struct brw_instruction *rnd, *add; \ rnd = next_insn(p, BRW_OPCODE_##OP); \ - brw_set_dest(rnd, dest); \ - brw_set_src0(rnd, src); \ - rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \ + brw_set_dest(p, rnd, dest); \ + brw_set_src0(p, rnd, src); \ \ - add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ - add->header.predicate_control = BRW_PREDICATE_NORMAL; \ + if (p->brw->intel.gen < 6) { \ + /* turn on round-increments */ \ + rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \ + add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \ + add->header.predicate_control = BRW_PREDICATE_NORMAL; \ + } \ } @@ -765,9 +894,9 @@ struct brw_instruction *brw_MUL(struct brw_compile *p, void brw_NOP(struct brw_compile *p) { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP); - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, brw_imm_ud(0x0)); + brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(p, insn, brw_imm_ud(0x0)); } @@ -794,6 +923,19 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, return insn; } +static void +push_if_stack(struct brw_compile *p, struct brw_instruction *inst) +{ + p->if_stack[p->if_stack_depth] = inst; + + p->if_stack_depth++; + if (p->if_stack_array_size <= p->if_stack_depth) { + p->if_stack_array_size *= 2; + p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *, + p->if_stack_array_size); + } +} + /* EU takes the value from the flag register and pushes it onto some * sort of a stack (presumably merging with any flag value already on * the stack). Within an if block, the flags at the top of the stack @@ -806,34 +948,32 @@ struct brw_instruction *brw_JMPI(struct brw_compile *p, * * When the matching 'endif' instruction is reached, the flags are * popped off. If the stack is now empty, normal execution resumes. - * - * No attempt is made to deal with stack overflow (14 elements?). */ -struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) +struct brw_instruction * +brw_IF(struct brw_compile *p, GLuint execute_size) { struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; - if (p->single_program_flow) { - assert(execute_size == BRW_EXECUTE_1); - - insn = next_insn(p, BRW_OPCODE_ADD); - insn->header.predicate_inverse = 1; - } else { - insn = next_insn(p, BRW_OPCODE_IF); - } + insn = next_insn(p, BRW_OPCODE_IF); /* Override the defaults for this instruction: */ if (intel->gen < 6) { - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); - } else { - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (intel->gen == 6) { + brw_set_dest(p, insn, brw_imm_w(0)); insn->bits1.branch_gen6.jump_count = 0; - brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + } else { + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = 0; + insn->bits3.break_cont.uip = 0; } insn->header.execution_size = execute_size; @@ -845,22 +985,30 @@ struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size) p->current->header.predicate_control = BRW_PREDICATE_NONE; + push_if_stack(p, insn); return insn; } +/* This function is only used for gen6-style IF instructions with an + * embedded comparison (conditional modifier). It is not used on gen7. + */ struct brw_instruction * -brw_IF_gen6(struct brw_compile *p, uint32_t conditional, - struct brw_reg src0, struct brw_reg src1) +gen6_IF(struct brw_compile *p, uint32_t conditional, + struct brw_reg src0, struct brw_reg src1) { struct brw_instruction *insn; insn = next_insn(p, BRW_OPCODE_IF); - brw_set_dest(insn, brw_imm_w(0)); - insn->header.execution_size = BRW_EXECUTE_8; + brw_set_dest(p, insn, brw_imm_w(0)); + if (p->compressed) { + insn->header.execution_size = BRW_EXECUTE_16; + } else { + insn->header.execution_size = BRW_EXECUTE_8; + } insn->bits1.branch_gen6.jump_count = 0; - brw_set_src0(insn, src0); - brw_set_src1(insn, src1); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); assert(insn->header.compression_control == BRW_COMPRESSION_NONE); assert(insn->header.predicate_control == BRW_PREDICATE_NONE); @@ -869,162 +1017,251 @@ brw_IF_gen6(struct brw_compile *p, uint32_t conditional, if (!p->single_program_flow) insn->header.thread_control = BRW_THREAD_SWITCH; + push_if_stack(p, insn); return insn; } -struct brw_instruction *brw_ELSE(struct brw_compile *p, - struct brw_instruction *if_insn) +/** + * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs. + */ +static void +convert_IF_ELSE_to_ADD(struct brw_compile *p, + struct brw_instruction *if_inst, + struct brw_instruction *else_inst) +{ + /* The next instruction (where the ENDIF would be, if it existed) */ + struct brw_instruction *next_inst = &p->store[p->nr_insn]; + + assert(p->single_program_flow); + assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); + assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); + assert(if_inst->header.execution_size == BRW_EXECUTE_1); + + /* Convert IF to an ADD instruction that moves the instruction pointer + * to the first instruction of the ELSE block. If there is no ELSE + * block, point to where ENDIF would be. Reverse the predicate. + * + * There's no need to execute an ENDIF since we don't need to do any + * stack operations, and if we're currently executing, we just want to + * continue normally. + */ + if_inst->header.opcode = BRW_OPCODE_ADD; + if_inst->header.predicate_inverse = 1; + + if (else_inst != NULL) { + /* Convert ELSE to an ADD instruction that points where the ENDIF + * would be. + */ + else_inst->header.opcode = BRW_OPCODE_ADD; + + if_inst->bits3.ud = (else_inst - if_inst + 1) * 16; + else_inst->bits3.ud = (next_inst - else_inst) * 16; + } else { + if_inst->bits3.ud = (next_inst - if_inst) * 16; + } +} + +/** + * Patch IF and ELSE instructions with appropriate jump targets. + */ +static void +patch_IF_ELSE(struct brw_compile *p, + struct brw_instruction *if_inst, + struct brw_instruction *else_inst, + struct brw_instruction *endif_inst) { struct intel_context *intel = &p->brw->intel; - struct brw_instruction *insn; - GLuint br = 1; - /* jump count is for 64bit data chunk each, so one 128bit - instruction requires 2 chunks. */ + assert(!p->single_program_flow); + assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF); + assert(endif_inst != NULL); + assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE); + + unsigned br = 1; + /* Jump count is for 64bit data chunk each, so one 128bit instruction + * requires 2 chunks. + */ if (intel->gen >= 5) br = 2; - if (p->single_program_flow) { - insn = next_insn(p, BRW_OPCODE_ADD); + assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF); + endif_inst->header.execution_size = if_inst->header.execution_size; + + if (else_inst == NULL) { + /* Patch IF -> ENDIF */ + if (intel->gen < 6) { + /* Turn it into an IFF, which means no mask stack operations for + * all-false and jumping past the ENDIF. + */ + if_inst->header.opcode = BRW_OPCODE_IFF; + if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1); + if_inst->bits3.if_else.pop_count = 0; + if_inst->bits3.if_else.pad0 = 0; + } else if (intel->gen == 6) { + /* As of gen6, there is no IFF and IF must point to the ENDIF. */ + if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst); + } else { + if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); + if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst); + } } else { - insn = next_insn(p, BRW_OPCODE_ELSE); + else_inst->header.execution_size = if_inst->header.execution_size; + + /* Patch IF -> ELSE */ + if (intel->gen < 6) { + if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst); + if_inst->bits3.if_else.pop_count = 0; + if_inst->bits3.if_else.pad0 = 0; + } else if (intel->gen == 6) { + if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1); + } + + /* Patch ELSE -> ENDIF */ + if (intel->gen < 6) { + /* BRW_OPCODE_ELSE pre-gen6 should point just past the + * matching ENDIF. + */ + else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1); + else_inst->bits3.if_else.pop_count = 1; + else_inst->bits3.if_else.pad0 = 0; + } else if (intel->gen == 6) { + /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ + else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst); + } else { + /* The IF instruction's JIP should point just past the ELSE */ + if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1); + /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */ + if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst); + else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst); + } } +} + +void +brw_ELSE(struct brw_compile *p) +{ + struct intel_context *intel = &p->brw->intel; + struct brw_instruction *insn; + + insn = next_insn(p, BRW_OPCODE_ELSE); if (intel->gen < 6) { - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); - } else { - brw_set_dest(insn, brw_imm_w(0)); + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (intel->gen == 6) { + brw_set_dest(p, insn, brw_imm_w(0)); insn->bits1.branch_gen6.jump_count = 0; - brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + } else { + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = 0; + insn->bits3.break_cont.uip = 0; } insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = if_insn->header.execution_size; insn->header.mask_control = BRW_MASK_ENABLE; if (!p->single_program_flow) insn->header.thread_control = BRW_THREAD_SWITCH; - /* Patch the if instruction to point at this instruction. - */ - if (p->single_program_flow) { - assert(if_insn->header.opcode == BRW_OPCODE_ADD); - - if_insn->bits3.ud = (insn - if_insn + 1) * 16; - } else { - assert(if_insn->header.opcode == BRW_OPCODE_IF); - - if (intel->gen < 6) { - if_insn->bits3.if_else.jump_count = br * (insn - if_insn); - if_insn->bits3.if_else.pop_count = 0; - if_insn->bits3.if_else.pad0 = 0; - } else { - if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1); - } - } - - return insn; + push_if_stack(p, insn); } -void brw_ENDIF(struct brw_compile *p, - struct brw_instruction *patch_insn) +void +brw_ENDIF(struct brw_compile *p) { struct intel_context *intel = &p->brw->intel; - GLuint br = 1; + struct brw_instruction *insn; + struct brw_instruction *else_inst = NULL; + struct brw_instruction *if_inst = NULL; + + /* Pop the IF and (optional) ELSE instructions from the stack */ + p->if_stack_depth--; + if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) { + else_inst = p->if_stack[p->if_stack_depth]; + p->if_stack_depth--; + } + if_inst = p->if_stack[p->if_stack_depth]; - if (intel->gen >= 5) - br = 2; - if (p->single_program_flow) { - /* In single program flow mode, there's no need to execute an ENDIF, - * since we don't need to do any stack operations, and if we're executing - * currently, we want to just continue executing. - */ - struct brw_instruction *next = &p->store[p->nr_insn]; + /* ENDIF is useless; don't bother emitting it. */ + convert_IF_ELSE_to_ADD(p, if_inst, else_inst); + return; + } - assert(patch_insn->header.opcode == BRW_OPCODE_ADD); + insn = next_insn(p, BRW_OPCODE_ENDIF); - patch_insn->bits3.ud = (next - patch_insn) * 16; + if (intel->gen < 6) { + brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else if (intel->gen == 6) { + brw_set_dest(p, insn, brw_imm_w(0)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); } else { - struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF); - - if (intel->gen < 6) { - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD)); - brw_set_src1(insn, brw_imm_d(0x0)); - } else { - brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_W)); - brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - } - - insn->header.compression_control = BRW_COMPRESSION_NONE; - insn->header.execution_size = patch_insn->header.execution_size; - insn->header.mask_control = BRW_MASK_ENABLE; - insn->header.thread_control = BRW_THREAD_SWITCH; - - if (intel->gen < 6) - assert(patch_insn->bits3.if_else.jump_count == 0); - else - assert(patch_insn->bits1.branch_gen6.jump_count == 0); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, brw_imm_ud(0)); + } - /* Patch the if or else instructions to point at this or the next - * instruction respectively. - */ - if (patch_insn->header.opcode == BRW_OPCODE_IF) { - if (intel->gen < 6) { - /* Turn it into an IFF, which means no mask stack operations for - * all-false and jumping past the ENDIF. - */ - patch_insn->header.opcode = BRW_OPCODE_IFF; - patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); - patch_insn->bits3.if_else.pop_count = 0; - patch_insn->bits3.if_else.pad0 = 0; - } else { - /* As of gen6, there is no IFF and IF must point to the ENDIF. */ - patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); - } - } else { - assert(patch_insn->header.opcode == BRW_OPCODE_ELSE); - if (intel->gen < 6) { - /* BRW_OPCODE_ELSE pre-gen6 should point just past the - * matching ENDIF. - */ - patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1); - patch_insn->bits3.if_else.pop_count = 1; - patch_insn->bits3.if_else.pad0 = 0; - } else { - /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */ - patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn); - } - } + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.mask_control = BRW_MASK_ENABLE; + insn->header.thread_control = BRW_THREAD_SWITCH; - /* Also pop item off the stack in the endif instruction: - */ - if (intel->gen < 6) { - insn->bits3.if_else.jump_count = 0; - insn->bits3.if_else.pop_count = 1; - insn->bits3.if_else.pad0 = 0; - } else { - insn->bits1.branch_gen6.jump_count = 2; - } + /* Also pop item off the stack in the endif instruction: */ + if (intel->gen < 6) { + insn->bits3.if_else.jump_count = 0; + insn->bits3.if_else.pop_count = 1; + insn->bits3.if_else.pad0 = 0; + } else if (intel->gen == 6) { + insn->bits1.branch_gen6.jump_count = 2; + } else { + insn->bits3.break_cont.jip = 2; } + patch_IF_ELSE(p, if_inst, else_inst, insn); } struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; + insn = next_insn(p, BRW_OPCODE_BREAK); - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + if (intel->gen >= 6) { + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, brw_imm_d(0x0)); + } else { + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + insn->bits3.if_else.pad0 = 0; + insn->bits3.if_else.pop_count = pop_count; + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + + return insn; +} + +struct brw_instruction *gen6_CONT(struct brw_compile *p, + struct brw_instruction *do_insn) +{ + struct brw_instruction *insn; + + insn = next_insn(p, BRW_OPCODE_CONTINUE); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); + insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = BRW_EXECUTE_8; - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - insn->bits3.if_else.pad0 = 0; - insn->bits3.if_else.pop_count = pop_count; return insn; } @@ -1032,9 +1269,9 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) { struct brw_instruction *insn; insn = next_insn(p, BRW_OPCODE_CONTINUE); - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0x0)); insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = BRW_EXECUTE_8; /* insn->header.mask_control = BRW_MASK_DISABLE; */ @@ -1044,19 +1281,35 @@ struct brw_instruction *brw_CONT(struct brw_compile *p, int pop_count) } /* DO/WHILE loop: + * + * The DO/WHILE is just an unterminated loop -- break or continue are + * used for control within the loop. We have a few ways they can be + * done. + * + * For uniform control flow, the WHILE is just a jump, so ADD ip, ip, + * jip and no DO instruction. + * + * For non-uniform control flow pre-gen6, there's a DO instruction to + * push the mask, and a WHILE to jump back, and BREAK to get out and + * pop the mask. + * + * For gen6, there's no more mask stack, so no need for DO. WHILE + * just points back to the first instruction of the loop. */ struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size) { - if (p->single_program_flow) { + struct intel_context *intel = &p->brw->intel; + + if (intel->gen >= 6 || p->single_program_flow) { return &p->store[p->nr_insn]; } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO); /* Override the defaults for this instruction: */ - brw_set_dest(insn, brw_null_reg()); - brw_set_src0(insn, brw_null_reg()); - brw_set_src1(insn, brw_null_reg()); + brw_set_dest(p, insn, brw_null_reg()); + brw_set_src0(p, insn, brw_null_reg()); + brw_set_src1(p, insn, brw_null_reg()); insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.execution_size = execute_size; @@ -1080,34 +1333,50 @@ struct brw_instruction *brw_WHILE(struct brw_compile *p, if (intel->gen >= 5) br = 2; - if (p->single_program_flow) - insn = next_insn(p, BRW_OPCODE_ADD); - else + if (intel->gen >= 7) { insn = next_insn(p, BRW_OPCODE_WHILE); - brw_set_dest(insn, brw_ip_reg()); - brw_set_src0(insn, brw_ip_reg()); - brw_set_src1(insn, brw_imm_d(0x0)); + brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, brw_imm_ud(0)); + insn->bits3.break_cont.jip = br * (do_insn - insn); - insn->header.compression_control = BRW_COMPRESSION_NONE; + insn->header.execution_size = BRW_EXECUTE_8; + } else if (intel->gen == 6) { + insn = next_insn(p, BRW_OPCODE_WHILE); - if (p->single_program_flow) { - insn->header.execution_size = BRW_EXECUTE_1; + brw_set_dest(p, insn, brw_imm_w(0)); + insn->bits1.branch_gen6.jump_count = br * (do_insn - insn); + brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); + brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D)); - insn->bits3.d = (do_insn - insn) * 16; + insn->header.execution_size = BRW_EXECUTE_8; } else { - insn->header.execution_size = do_insn->header.execution_size; + if (p->single_program_flow) { + insn = next_insn(p, BRW_OPCODE_ADD); - assert(do_insn->header.opcode == BRW_OPCODE_DO); - insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); - insn->bits3.if_else.pop_count = 0; - insn->bits3.if_else.pad0 = 0; - } + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16)); + insn->header.execution_size = BRW_EXECUTE_1; + } else { + insn = next_insn(p, BRW_OPCODE_WHILE); -/* insn->header.mask_control = BRW_MASK_ENABLE; */ + assert(do_insn->header.opcode == BRW_OPCODE_DO); + + brw_set_dest(p, insn, brw_ip_reg()); + brw_set_src0(p, insn, brw_ip_reg()); + brw_set_src1(p, insn, brw_imm_d(0)); + + insn->header.execution_size = do_insn->header.execution_size; + insn->bits3.if_else.jump_count = br * (do_insn - insn + 1); + insn->bits3.if_else.pop_count = 0; + insn->bits3.if_else.pad0 = 0; + } + } + insn->header.compression_control = BRW_COMPRESSION_NONE; + p->current->header.predicate_control = BRW_PREDICATE_NONE; - /* insn->header.mask_control = BRW_MASK_DISABLE; */ - p->current->header.predicate_control = BRW_PREDICATE_NONE; return insn; } @@ -1145,9 +1414,9 @@ void brw_CMP(struct brw_compile *p, struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP); insn->header.destreg__conditionalmod = conditional; - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_src1(insn, src1); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); /* guess_execution_size(insn, src0); */ @@ -1170,9 +1439,9 @@ void brw_WAIT (struct brw_compile *p) struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT); struct brw_reg src = brw_notification_1_reg(); - brw_set_dest(insn, src); - brw_set_src0(insn, src); - brw_set_src1(insn, brw_null_reg()); + brw_set_dest(p, insn, src); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); insn->header.execution_size = 0; /* must */ insn->header.predicate_control = 0; insn->header.compression_control = 0; @@ -1205,6 +1474,10 @@ void brw_math( struct brw_compile *p, assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1); assert(src.hstride == BRW_HORIZONTAL_STRIDE_1); + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); + if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT && function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) { assert(src.type == BRW_REGISTER_TYPE_F); @@ -1214,10 +1487,11 @@ void brw_math( struct brw_compile *p, * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - brw_set_src1(insn, brw_null_reg()); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); } else { struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1; @@ -1228,9 +1502,9 @@ void brw_math( struct brw_compile *p, insn->header.predicate_control = 0; insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - brw_set_math_message(p->brw, + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_math_message(p, insn, msg_length, response_length, function, @@ -1270,14 +1544,20 @@ void brw_math2(struct brw_compile *p, assert(src1.type == BRW_REGISTER_TYPE_F); } + /* Source modifiers are ignored for extended math instructions. */ + assert(!src0.negate); + assert(!src0.abs); + assert(!src1.negate); + assert(!src1.abs); + /* Math is the same ISA format as other opcodes, except that CondModifier * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_src1(insn, src1); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, src1); } /** @@ -1304,10 +1584,15 @@ void brw_math_16( struct brw_compile *p, * becomes FC[3:0] and ThreadCtrl becomes FC[5:4]. */ insn->header.destreg__conditionalmod = function; + insn->header.saturate = saturate; + + /* Source modifiers are ignored for extended math instructions. */ + assert(!src.negate); + assert(!src.abs); - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - brw_set_src1(insn, brw_null_reg()); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_src1(p, insn, brw_null_reg()); return; } @@ -1320,9 +1605,9 @@ void brw_math_16( struct brw_compile *p, insn = next_insn(p, BRW_OPCODE_SEND); insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); - brw_set_src0(insn, src); - brw_set_math_message(p->brw, + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); + brw_set_math_message(p, insn, msg_length, response_length, function, @@ -1337,9 +1622,9 @@ void brw_math_16( struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_2NDHALF; insn->header.destreg__conditionalmod = msg_reg_nr+1; - brw_set_dest(insn, offset(dest,1)); - brw_set_src0(insn, src); - brw_set_math_message(p->brw, + brw_set_dest(p, insn, offset(dest,1)); + brw_set_src0(p, insn, src); + brw_set_math_message(p, insn, msg_length, response_length, function, @@ -1365,9 +1650,12 @@ void brw_oword_block_write_scratch(struct brw_compile *p, GLuint offset) { struct intel_context *intel = &p->brw->intel; - uint32_t msg_control; + uint32_t msg_control, msg_type; int mlen; + if (intel->gen >= 6) + offset /= 16; + mrf = retype(mrf, BRW_REGISTER_TYPE_UD); if (num_regs == 1) { @@ -1432,14 +1720,23 @@ void brw_oword_block_write_scratch(struct brw_compile *p, send_commit_msg = 1; } - brw_set_dest(insn, dest); - brw_set_src0(insn, brw_null_reg()); + brw_set_dest(p, insn, dest); + if (intel->gen >= 6) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } + + if (intel->gen >= 6) + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; + else + msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE; - brw_set_dp_write_message(p->brw, + brw_set_dp_write_message(p, insn, 255, /* binding table index (255=stateless) */ msg_control, - BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */ + msg_type, mlen, GL_TRUE, /* header_present */ 0, /* pixel scoreboard */ @@ -1464,9 +1761,13 @@ brw_oword_block_read_scratch(struct brw_compile *p, int num_regs, GLuint offset) { + struct intel_context *intel = &p->brw->intel; uint32_t msg_control; int rlen; + if (intel->gen >= 6) + offset /= 16; + mrf = retype(mrf, BRW_REGISTER_TYPE_UD); dest = retype(dest, BRW_REGISTER_TYPE_UW); @@ -1502,18 +1803,21 @@ brw_oword_block_read_scratch(struct brw_compile *p, insn->header.compression_control = BRW_COMPRESSION_NONE; insn->header.destreg__conditionalmod = mrf.nr; - brw_set_dest(insn, dest); /* UW? */ - brw_set_src0(insn, brw_null_reg()); + brw_set_dest(p, insn, dest); /* UW? */ + if (intel->gen >= 6) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } - brw_set_dp_read_message(p->brw, + brw_set_dp_read_message(p, insn, 255, /* binding table index (255=stateless) */ msg_control, BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - 1, /* target cache (render/scratch) */ + BRW_DATAPORT_READ_TARGET_RENDER_CACHE, 1, /* msg_length */ - rlen, - 0); /* eot */ + rlen); } } @@ -1528,6 +1832,12 @@ void brw_oword_block_read(struct brw_compile *p, uint32_t offset, uint32_t bind_table_index) { + struct intel_context *intel = &p->brw->intel; + + /* On newer hardware, offset is in units of owords. */ + if (intel->gen >= 6) + offset /= 16; + mrf = retype(mrf, BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); @@ -1550,22 +1860,64 @@ void brw_oword_block_read(struct brw_compile *p, /* cast dest to a uword[8] vector */ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); - brw_set_dest(insn, dest); - brw_set_src0(insn, brw_null_reg()); + brw_set_dest(p, insn, dest); + if (intel->gen >= 6) { + brw_set_src0(p, insn, mrf); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } - brw_set_dp_read_message(p->brw, + brw_set_dp_read_message(p, insn, bind_table_index, BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, - 0, /* source cache = data cache */ + BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1, /* msg_length */ - 1, /* response_length (1 reg, 2 owords!) */ - 0); /* eot */ + 1); /* response_length (1 reg, 2 owords!) */ brw_pop_insn_state(p); } +/** + * Read a set of dwords from the data port Data Cache (const buffer). + * + * Location (in buffer) appears as UD offsets in the register after + * the provided mrf header reg. + */ +void brw_dword_scattered_read(struct brw_compile *p, + struct brw_reg dest, + struct brw_reg mrf, + uint32_t bind_table_index) +{ + mrf = retype(mrf, BRW_REGISTER_TYPE_UD); + + brw_push_insn_state(p); + brw_set_predicate_control(p, BRW_PREDICATE_NONE); + brw_set_compression_control(p, BRW_COMPRESSION_NONE); + brw_set_mask_control(p, BRW_MASK_DISABLE); + brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + brw_pop_insn_state(p); + + struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); + insn->header.destreg__conditionalmod = mrf.nr; + + /* cast dest to a uword[8] vector */ + dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW); + + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, brw_null_reg()); + + brw_set_dp_read_message(p, + insn, + bind_table_index, + BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS, + BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ, + BRW_DATAPORT_READ_TARGET_DATA_CACHE, + 2, /* msg_length */ + 1); /* response_length */ +} + /** @@ -1578,29 +1930,22 @@ void brw_dp_READ_4_vs(struct brw_compile *p, GLuint location, GLuint bind_table_index) { + struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_reg_nr = 1; - struct brw_reg b; - /* - printf("vs const read msg, location %u, msg_reg_nr %d\n", - location, msg_reg_nr); - */ + if (intel->gen >= 6) + location /= 16; /* Setup MRF[1] with location/offset into const buffer */ brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_predicate_control(p, BRW_PREDICATE_NONE); - - /* XXX I think we're setting all the dwords of MRF[1] to 'location'. - * when the docs say only dword[2] should be set. Hmmm. But it works. - */ - b = brw_message_reg(msg_reg_nr); - b = retype(b, BRW_REGISTER_TYPE_UD); - /*b = get_element_ud(b, 2);*/ - brw_MOV(p, b, brw_imm_ud(location)); - + brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2), + BRW_REGISTER_TYPE_UD), + brw_imm_ud(location)); brw_pop_insn_state(p); insn = next_insn(p, BRW_OPCODE_SEND); @@ -1610,18 +1955,21 @@ void brw_dp_READ_4_vs(struct brw_compile *p, insn->header.destreg__conditionalmod = msg_reg_nr; insn->header.mask_control = BRW_MASK_DISABLE; - brw_set_dest(insn, dest); - brw_set_src0(insn, brw_null_reg()); + brw_set_dest(p, insn, dest); + if (intel->gen >= 6) { + brw_set_src0(p, insn, brw_message_reg(msg_reg_nr)); + } else { + brw_set_src0(p, insn, brw_null_reg()); + } - brw_set_dp_read_message(p->brw, + brw_set_dp_read_message(p, insn, bind_table_index, 0, BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ - 0, /* source cache = data cache */ + BRW_DATAPORT_READ_TARGET_DATA_CACHE, 1, /* msg_length */ - 1, /* response_length (1 Oword) */ - 0); /* eot */ + 1); /* response_length (1 Oword) */ } /** @@ -1635,10 +1983,12 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, GLuint bind_table_index) { struct intel_context *intel = &p->brw->intel; + struct brw_reg src = brw_vec8_grf(0, 0); int msg_type; /* Setup MRF[1] with offset into const buffer */ brw_push_insn_state(p); + brw_set_access_mode(p, BRW_ALIGN_1); brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); brw_set_predicate_control(p, BRW_PREDICATE_NONE); @@ -1646,10 +1996,11 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, /* M1.0 is block offset 0, M1.4 is block offset 1, all other * fields ignored. */ - brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), + brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D), addr_reg, brw_imm_d(offset)); brw_pop_insn_state(p); + gen6_resolve_implied_move(p, &src, 0); struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = BRW_PREDICATE_NONE; @@ -1657,56 +2008,62 @@ void brw_dp_READ_4_vs_relative(struct brw_compile *p, insn->header.destreg__conditionalmod = 0; insn->header.mask_control = BRW_MASK_DISABLE; - brw_set_dest(insn, dest); - brw_set_src0(insn, brw_vec8_grf(0, 0)); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src); - if (intel->gen == 6) + if (intel->gen >= 6) msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else if (intel->gen == 5 || intel->is_g4x) msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; else msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ; - brw_set_dp_read_message(p->brw, + brw_set_dp_read_message(p, insn, bind_table_index, BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD, msg_type, - 0, /* source cache = data cache */ + BRW_DATAPORT_READ_TARGET_DATA_CACHE, 2, /* msg_length */ - 1, /* response_length */ - 0); /* eot */ + 1); /* response_length */ } void brw_fb_WRITE(struct brw_compile *p, int dispatch_width, - struct brw_reg dest, GLuint msg_reg_nr, struct brw_reg src0, GLuint binding_table_index, GLuint msg_length, GLuint response_length, - GLboolean eot) + GLboolean eot, + GLboolean header_present) { struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; GLuint msg_control, msg_type; - GLboolean header_present = GL_TRUE; + struct brw_reg dest; - insn = next_insn(p, BRW_OPCODE_SEND); - insn->header.predicate_control = 0; /* XXX */ + if (dispatch_width == 16) + dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW); + else + dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW); + + if (intel->gen >= 6 && binding_table_index == 0) { + insn = next_insn(p, BRW_OPCODE_SENDC); + } else { + insn = next_insn(p, BRW_OPCODE_SEND); + } + /* The execution mask is ignored for render target writes. */ + insn->header.predicate_control = 0; insn->header.compression_control = BRW_COMPRESSION_NONE; if (intel->gen >= 6) { - if (msg_length == 4) - header_present = GL_FALSE; - /* headerless version, just submit color payload */ src0 = brw_message_reg(msg_reg_nr); - msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6; + msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE; } else { insn->header.destreg__conditionalmod = msg_reg_nr; @@ -1718,9 +2075,9 @@ void brw_fb_WRITE(struct brw_compile *p, else msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_dp_write_message(p->brw, + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_dp_write_message(p, insn, binding_table_index, msg_control, @@ -1796,7 +2153,7 @@ void brw_SAMPLE(struct brw_compile *p, struct brw_reg m1 = brw_message_reg(msg_reg_nr); - guess_execution_size(p->current, dest); + guess_execution_size(p, p->current, dest); if (p->current->header.execution_size == BRW_EXECUTE_16) dispatch_16 = GL_TRUE; @@ -1807,7 +2164,8 @@ void brw_SAMPLE(struct brw_compile *p, brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_set_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, m1, brw_vec8_grf(0,0)); + brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD), + retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD)); brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12)); brw_pop_insn_state(p); @@ -1827,17 +2185,7 @@ void brw_SAMPLE(struct brw_compile *p, { struct brw_instruction *insn; - /* Sandybridge doesn't have the implied move for SENDs, - * and the first message register index comes from src0. - */ - if (intel->gen >= 6) { - brw_push_insn_state(p); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - /* m1 contains header? */ - brw_MOV(p, brw_message_reg(msg_reg_nr), src0); - brw_pop_insn_state(p); - src0 = brw_message_reg(msg_reg_nr); - } + gen6_resolve_implied_move(p, &src0, msg_reg_nr); insn = next_insn(p, BRW_OPCODE_SEND); insn->header.predicate_control = 0; /* XXX */ @@ -1845,9 +2193,9 @@ void brw_SAMPLE(struct brw_compile *p, if (intel->gen < 6) insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_sampler_message(p->brw, insn, + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_sampler_message(p, insn, binding_table_index, sampler, msg_type, @@ -1865,7 +2213,8 @@ void brw_SAMPLE(struct brw_compile *p, */ brw_push_insn_state(p); brw_set_compression_control(p, BRW_COMPRESSION_NONE); - brw_MOV(p, reg, reg); + brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD), + retype(reg, BRW_REGISTER_TYPE_UD)); brw_pop_insn_state(p); } @@ -1891,29 +2240,31 @@ void brw_urb_WRITE(struct brw_compile *p, struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; - /* Sandybridge doesn't have the implied move for SENDs, - * and the first message register index comes from src0. - */ - if (intel->gen >= 6) { + gen6_resolve_implied_move(p, &src0, msg_reg_nr); + + if (intel->gen == 7) { + /* Enable Channel Masks in the URB_WRITE_HWORD message header */ brw_push_insn_state(p); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_MOV(p, brw_message_reg(msg_reg_nr), src0); + brw_set_access_mode(p, BRW_ALIGN_1); + brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5), + BRW_REGISTER_TYPE_UD), + retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD), + brw_imm_ud(0xff00)); brw_pop_insn_state(p); - src0 = brw_message_reg(msg_reg_nr); } insn = next_insn(p, BRW_OPCODE_SEND); assert(msg_length < BRW_MAX_MRF); - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_src1(insn, brw_imm_d(0)); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, brw_imm_d(0)); if (intel->gen < 6) insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_urb_message(p->brw, + brw_set_urb_message(p, insn, allocate, used, @@ -1925,6 +2276,84 @@ void brw_urb_WRITE(struct brw_compile *p, swizzle); } +static int +brw_find_next_block_end(struct brw_compile *p, int start) +{ + int ip; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_ENDIF: + case BRW_OPCODE_ELSE: + case BRW_OPCODE_WHILE: + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* There is no DO instruction on gen6, so to find the end of the loop + * we have to see if the loop is jumping back before our start + * instruction. + */ +static int +brw_find_loop_end(struct brw_compile *p, int start) +{ + struct intel_context *intel = &p->brw->intel; + int ip; + int br = 2; + + for (ip = start + 1; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + if (insn->header.opcode == BRW_OPCODE_WHILE) { + int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count + : insn->bits3.break_cont.jip; + if (ip + jip / br <= start) + return ip; + } + } + assert(!"not reached"); + return start + 1; +} + +/* After program generation, go back and update the UIP and JIP of + * BREAK and CONT instructions to their correct locations. + */ +void +brw_set_uip_jip(struct brw_compile *p) +{ + struct intel_context *intel = &p->brw->intel; + int ip; + int br = 2; + + if (intel->gen < 6) + return; + + for (ip = 0; ip < p->nr_insn; ip++) { + struct brw_instruction *insn = &p->store[ip]; + + switch (insn->header.opcode) { + case BRW_OPCODE_BREAK: + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + /* Gen7 UIP points to WHILE; Gen6 points just after it */ + insn->bits3.break_cont.uip = + br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0)); + break; + case BRW_OPCODE_CONTINUE: + insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip); + insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip); + + assert(insn->bits3.break_cont.uip != 0); + assert(insn->bits3.break_cont.jip != 0); + break; + } + } +} + void brw_ff_sync(struct brw_compile *p, struct brw_reg dest, GLuint msg_reg_nr, @@ -1936,27 +2365,17 @@ void brw_ff_sync(struct brw_compile *p, struct intel_context *intel = &p->brw->intel; struct brw_instruction *insn; - /* Sandybridge doesn't have the implied move for SENDs, - * and the first message register index comes from src0. - */ - if (intel->gen >= 6) { - brw_push_insn_state(p); - brw_set_mask_control( p, BRW_MASK_DISABLE ); - brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD), - retype(src0, BRW_REGISTER_TYPE_UD)); - brw_pop_insn_state(p); - src0 = brw_message_reg(msg_reg_nr); - } + gen6_resolve_implied_move(p, &src0, msg_reg_nr); insn = next_insn(p, BRW_OPCODE_SEND); - brw_set_dest(insn, dest); - brw_set_src0(insn, src0); - brw_set_src1(insn, brw_imm_d(0)); + brw_set_dest(p, insn, dest); + brw_set_src0(p, insn, src0); + brw_set_src1(p, insn, brw_imm_d(0)); if (intel->gen < 6) insn->header.destreg__conditionalmod = msg_reg_nr; - brw_set_ff_sync_message(p->brw, + brw_set_ff_sync_message(p, insn, allocate, response_length,