#include "brw_defines.h"
#include "brw_eu.h"
-
-
+#include "glsl/ralloc.h"
/***********************************************************************
* Internal helper for constructing instructions
*/
-static void guess_execution_size( struct brw_instruction *insn,
- struct brw_reg reg )
+static void guess_execution_size(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg reg)
{
- if (reg.width == BRW_WIDTH_8 &&
- insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
+ if (reg.width == BRW_WIDTH_8 && p->compressed)
insn->header.execution_size = BRW_EXECUTE_16;
else
insn->header.execution_size = reg.width; /* note - definitions are compatible */
}
-static void brw_set_dest( struct brw_instruction *insn,
- struct brw_reg dest )
+/**
+ * Prior to Sandybridge, the SEND instruction accepted non-MRF source
+ * registers, implicitly moving the operand to a message register.
+ *
+ * On Sandybridge, this is no longer the case. This function performs the
+ * explicit move; it should be called before emitting a SEND instruction.
+ */
+void
+gen6_resolve_implied_move(struct brw_compile *p,
+ struct brw_reg *src,
+ GLuint msg_reg_nr)
+{
+ struct intel_context *intel = &p->brw->intel;
+ if (intel->gen < 6)
+ return;
+
+ if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
+ brw_push_insn_state(p);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
+ retype(*src, BRW_REGISTER_TYPE_UD));
+ brw_pop_insn_state(p);
+ }
+ *src = brw_message_reg(msg_reg_nr);
+}
+
+static void
+gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
+{
+ struct intel_context *intel = &p->brw->intel;
+ if (intel->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
+ reg->file = BRW_GENERAL_REGISTER_FILE;
+ reg->nr += 111;
+ }
+}
+
+
+void
+brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg dest)
{
if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
dest.file != BRW_MESSAGE_REGISTER_FILE)
assert(dest.nr < 128);
+ gen7_convert_mrf_to_grf(p, &dest);
+
insn->bits1.da1.dest_reg_file = dest.file;
insn->bits1.da1.dest_reg_type = dest.type;
insn->bits1.da1.dest_address_mode = dest.address_mode;
/* NEW: Set the execution size based on dest.width and
* insn->compression_control:
*/
- guess_execution_size(insn, dest);
+ guess_execution_size(p, insn, dest);
}
extern int reg_type_size[];
/* 10. Check destination issues. */
}
-static void brw_set_src0( struct brw_instruction *insn,
- struct brw_reg reg )
+void
+brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg reg)
{
if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
+ gen7_convert_mrf_to_grf(p, ®);
+
validate_reg(insn, reg);
insn->bits1.da1.src0_reg_file = reg.file;
}
-void brw_set_src1( struct brw_instruction *insn,
- struct brw_reg reg )
+void brw_set_src1(struct brw_compile *p,
+ struct brw_instruction *insn,
+ struct brw_reg reg)
{
assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
assert(reg.nr < 128);
+ gen7_convert_mrf_to_grf(p, ®);
+
validate_reg(insn, reg);
insn->bits1.da1.src1_reg_file = reg.file;
-static void brw_set_math_message( struct brw_context *brw,
+static void brw_set_math_message( struct brw_compile *p,
struct brw_instruction *insn,
GLuint msg_length,
GLuint response_length,
GLboolean saturate,
GLuint dataType )
{
+ struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
- brw_set_src1(insn, brw_imm_d(0));
+ brw_set_src1(p, insn, brw_imm_d(0));
if (intel->gen == 5) {
insn->bits3.math_gen5.function = function;
}
-static void brw_set_ff_sync_message(struct brw_context *brw,
+static void brw_set_ff_sync_message(struct brw_compile *p,
struct brw_instruction *insn,
GLboolean allocate,
GLuint response_length,
GLboolean end_of_thread)
{
+ struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
- brw_set_src1(insn, brw_imm_d(0));
+ brw_set_src1(p, insn, brw_imm_d(0));
insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
}
}
-static void brw_set_urb_message( struct brw_context *brw,
+static void brw_set_urb_message( struct brw_compile *p,
struct brw_instruction *insn,
GLboolean allocate,
GLboolean used,
GLuint offset,
GLuint swizzle_control )
{
+ struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
- brw_set_src1(insn, brw_imm_d(0));
-
- if (intel->gen >= 5) {
- insn->bits3.urb_gen5.opcode = 0; /* ? */
+ brw_set_src1(p, insn, brw_imm_d(0));
+
+ if (intel->gen == 7) {
+ insn->bits3.urb_gen7.opcode = 0; /* URB_WRITE_HWORD */
+ insn->bits3.urb_gen7.offset = offset;
+ assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
+ insn->bits3.urb_gen7.swizzle_control = swizzle_control;
+ /* per_slot_offset = 0 makes it ignore offsets in message header */
+ insn->bits3.urb_gen7.per_slot_offset = 0;
+ insn->bits3.urb_gen7.complete = complete;
+ insn->bits3.urb_gen7.header_present = 1;
+ insn->bits3.urb_gen7.response_length = response_length;
+ insn->bits3.urb_gen7.msg_length = msg_length;
+ insn->bits3.urb_gen7.end_of_thread = end_of_thread;
+ insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_URB;
+ } else if (intel->gen >= 5) {
+ insn->bits3.urb_gen5.opcode = 0; /* URB_WRITE */
insn->bits3.urb_gen5.offset = offset;
insn->bits3.urb_gen5.swizzle_control = swizzle_control;
insn->bits3.urb_gen5.allocate = allocate;
}
}
-static void brw_set_dp_write_message( struct brw_context *brw,
- struct brw_instruction *insn,
- GLuint binding_table_index,
- GLuint msg_control,
- GLuint msg_type,
- GLuint msg_length,
- GLboolean header_present,
- GLuint pixel_scoreboard_clear,
- GLuint response_length,
- GLuint end_of_thread,
- GLuint send_commit_msg)
+void
+brw_set_dp_write_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint msg_length,
+ GLboolean header_present,
+ GLuint pixel_scoreboard_clear,
+ GLuint response_length,
+ GLuint end_of_thread,
+ GLuint send_commit_msg)
{
+ struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
- brw_set_src1(insn, brw_imm_ud(0));
-
- if (intel->gen >= 6) {
- insn->bits3.dp_render_cache.binding_table_index = binding_table_index;
- insn->bits3.dp_render_cache.msg_control = msg_control;
- insn->bits3.dp_render_cache.pixel_scoreboard_clear = pixel_scoreboard_clear;
- insn->bits3.dp_render_cache.msg_type = msg_type;
- insn->bits3.dp_render_cache.send_commit_msg = send_commit_msg;
- insn->bits3.dp_render_cache.header_present = header_present;
- insn->bits3.dp_render_cache.response_length = response_length;
- insn->bits3.dp_render_cache.msg_length = msg_length;
- insn->bits3.dp_render_cache.end_of_thread = end_of_thread;
- insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
- /* XXX really need below? */
- insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
- insn->bits2.send_gen5.end_of_thread = end_of_thread;
+ brw_set_src1(p, insn, brw_imm_ud(0));
+
+ if (intel->gen >= 7) {
+ insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+ insn->bits3.gen7_dp.msg_control = msg_control;
+ insn->bits3.gen7_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.gen7_dp.msg_type = msg_type;
+ insn->bits3.gen7_dp.header_present = header_present;
+ insn->bits3.gen7_dp.response_length = response_length;
+ insn->bits3.gen7_dp.msg_length = msg_length;
+ insn->bits3.gen7_dp.end_of_thread = end_of_thread;
+
+ /* We always use the render cache for write messages */
+ insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
+ } else if (intel->gen == 6) {
+ insn->bits3.gen6_dp.binding_table_index = binding_table_index;
+ insn->bits3.gen6_dp.msg_control = msg_control;
+ insn->bits3.gen6_dp.pixel_scoreboard_clear = pixel_scoreboard_clear;
+ insn->bits3.gen6_dp.msg_type = msg_type;
+ insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
+ insn->bits3.gen6_dp.header_present = header_present;
+ insn->bits3.gen6_dp.response_length = response_length;
+ insn->bits3.gen6_dp.msg_length = msg_length;
+ insn->bits3.gen6_dp.end_of_thread = end_of_thread;
+
+ /* We always use the render cache for write messages */
+ insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
} else if (intel->gen == 5) {
insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
insn->bits3.dp_write_gen5.msg_control = msg_control;
}
}
-static void brw_set_dp_read_message( struct brw_context *brw,
- struct brw_instruction *insn,
- GLuint binding_table_index,
- GLuint msg_control,
- GLuint msg_type,
- GLuint target_cache,
- GLuint msg_length,
- GLuint response_length,
- GLuint end_of_thread )
+void
+brw_set_dp_read_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint target_cache,
+ GLuint msg_length,
+ GLuint response_length)
{
+ struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
- brw_set_src1(insn, brw_imm_d(0));
-
- if (intel->gen == 5) {
+ brw_set_src1(p, insn, brw_imm_d(0));
+
+ if (intel->gen >= 7) {
+ insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+ insn->bits3.gen7_dp.msg_control = msg_control;
+ insn->bits3.gen7_dp.pixel_scoreboard_clear = 0;
+ insn->bits3.gen7_dp.msg_type = msg_type;
+ insn->bits3.gen7_dp.header_present = 1;
+ insn->bits3.gen7_dp.response_length = response_length;
+ insn->bits3.gen7_dp.msg_length = msg_length;
+ insn->bits3.gen7_dp.end_of_thread = 0;
+ insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_CONST_CACHE;
+ } else if (intel->gen == 6) {
+ uint32_t target_function;
+
+ if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
+ target_function = GEN6_MESSAGE_TARGET_DP_SAMPLER_CACHE;
+ else
+ target_function = GEN6_MESSAGE_TARGET_DP_RENDER_CACHE;
+
+ insn->bits3.gen6_dp.binding_table_index = binding_table_index;
+ insn->bits3.gen6_dp.msg_control = msg_control;
+ insn->bits3.gen6_dp.pixel_scoreboard_clear = 0;
+ insn->bits3.gen6_dp.msg_type = msg_type;
+ insn->bits3.gen6_dp.send_commit_msg = 0;
+ insn->bits3.gen6_dp.header_present = 1;
+ insn->bits3.gen6_dp.response_length = response_length;
+ insn->bits3.gen6_dp.msg_length = msg_length;
+ insn->bits3.gen6_dp.end_of_thread = 0;
+ insn->header.destreg__conditionalmod = target_function;
+ } else if (intel->gen == 5) {
insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
insn->bits3.dp_read_gen5.msg_control = msg_control;
insn->bits3.dp_read_gen5.msg_type = msg_type;
insn->bits3.dp_read_gen5.response_length = response_length;
insn->bits3.dp_read_gen5.msg_length = msg_length;
insn->bits3.dp_read_gen5.pad1 = 0;
- insn->bits3.dp_read_gen5.end_of_thread = end_of_thread;
+ insn->bits3.dp_read_gen5.end_of_thread = 0;
insn->bits2.send_gen5.sfid = BRW_MESSAGE_TARGET_DATAPORT_READ;
- insn->bits2.send_gen5.end_of_thread = end_of_thread;
+ insn->bits2.send_gen5.end_of_thread = 0;
+ } else if (intel->is_g4x) {
+ insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
+ insn->bits3.dp_read_g4x.msg_control = msg_control; /*8:10*/
+ insn->bits3.dp_read_g4x.msg_type = msg_type; /*11:13*/
+ insn->bits3.dp_read_g4x.target_cache = target_cache; /*14:15*/
+ insn->bits3.dp_read_g4x.response_length = response_length; /*16:19*/
+ insn->bits3.dp_read_g4x.msg_length = msg_length; /*20:23*/
+ insn->bits3.dp_read_g4x.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
+ insn->bits3.dp_read_g4x.pad1 = 0;
+ insn->bits3.dp_read_g4x.end_of_thread = 0;
} else {
insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
insn->bits3.dp_read.msg_control = msg_control; /*8:11*/
insn->bits3.dp_read.msg_length = msg_length; /*20:23*/
insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ; /*24:27*/
insn->bits3.dp_read.pad1 = 0; /*28:30*/
- insn->bits3.dp_read.end_of_thread = end_of_thread; /*31*/
+ insn->bits3.dp_read.end_of_thread = 0; /*31*/
}
}
-static void brw_set_sampler_message(struct brw_context *brw,
+static void brw_set_sampler_message(struct brw_compile *p,
struct brw_instruction *insn,
GLuint binding_table_index,
GLuint sampler,
GLuint header_present,
GLuint simd_mode)
{
+ struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
assert(eot == 0);
- brw_set_src1(insn, brw_imm_d(0));
-
- if (intel->gen >= 5) {
+ brw_set_src1(p, insn, brw_imm_d(0));
+
+ if (intel->gen >= 7) {
+ insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
+ insn->bits3.sampler_gen7.sampler = sampler;
+ insn->bits3.sampler_gen7.msg_type = msg_type;
+ insn->bits3.sampler_gen7.simd_mode = simd_mode;
+ insn->bits3.sampler_gen7.header_present = header_present;
+ insn->bits3.sampler_gen7.response_length = response_length;
+ insn->bits3.sampler_gen7.msg_length = msg_length;
+ insn->bits3.sampler_gen7.end_of_thread = eot;
+ insn->header.destreg__conditionalmod = BRW_MESSAGE_TARGET_SAMPLER;
+ } else if (intel->gen >= 5) {
insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
insn->bits3.sampler_gen5.sampler = sampler;
insn->bits3.sampler_gen5.msg_type = msg_type;
}
-
-static struct brw_instruction *next_insn( struct brw_compile *p,
- GLuint opcode )
+#define next_insn brw_next_insn
+struct brw_instruction *
+brw_next_insn(struct brw_compile *p, GLuint opcode)
{
struct brw_instruction *insn;
return insn;
}
-
static struct brw_instruction *brw_alu1( struct brw_compile *p,
GLuint opcode,
struct brw_reg dest,
struct brw_reg src )
{
struct brw_instruction *insn = next_insn(p, opcode);
- brw_set_dest(insn, dest);
- brw_set_src0(insn, src);
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src);
return insn;
}
struct brw_reg src1 )
{
struct brw_instruction *insn = next_insn(p, opcode);
- brw_set_dest(insn, dest);
- brw_set_src0(insn, src0);
- brw_set_src1(insn, src1);
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, src1);
return insn;
}
* stores a rounded value (possibly the wrong way) in the dest register, but
* also sets a per-channel "increment bit" in the flag register. A predicated
* add of 1.0 fixes dest to contain the desired result.
+ *
+ * Sandybridge and later appear to round correctly without an ADD.
*/
#define ROUND(OP) \
void brw_##OP(struct brw_compile *p, \
{ \
struct brw_instruction *rnd, *add; \
rnd = next_insn(p, BRW_OPCODE_##OP); \
- brw_set_dest(rnd, dest); \
- brw_set_src0(rnd, src); \
- rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
+ brw_set_dest(p, rnd, dest); \
+ brw_set_src0(p, rnd, src); \
\
- add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
- add->header.predicate_control = BRW_PREDICATE_NORMAL; \
+ if (p->brw->intel.gen < 6) { \
+ /* turn on round-increments */ \
+ rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
+ add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
+ add->header.predicate_control = BRW_PREDICATE_NORMAL; \
+ } \
}
void brw_NOP(struct brw_compile *p)
{
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
- brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
- brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
- brw_set_src1(insn, brw_imm_ud(0x0));
+ brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(p, insn, brw_imm_ud(0x0));
}
return insn;
}
+static void
+push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
+{
+ p->if_stack[p->if_stack_depth] = inst;
+
+ p->if_stack_depth++;
+ if (p->if_stack_array_size <= p->if_stack_depth) {
+ p->if_stack_array_size *= 2;
+ p->if_stack = reralloc(p->mem_ctx, p->if_stack, struct brw_instruction *,
+ p->if_stack_array_size);
+ }
+}
+
/* EU takes the value from the flag register and pushes it onto some
* sort of a stack (presumably merging with any flag value already on
* the stack). Within an if block, the flags at the top of the stack
*
* When the matching 'endif' instruction is reached, the flags are
* popped off. If the stack is now empty, normal execution resumes.
- *
- * No attempt is made to deal with stack overflow (14 elements?).
*/
-struct brw_instruction *brw_IF(struct brw_compile *p, GLuint execute_size)
+struct brw_instruction *
+brw_IF(struct brw_compile *p, GLuint execute_size)
{
struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
- if (p->single_program_flow) {
- assert(execute_size == BRW_EXECUTE_1);
-
- insn = next_insn(p, BRW_OPCODE_ADD);
- insn->header.predicate_inverse = 1;
- } else {
- insn = next_insn(p, BRW_OPCODE_IF);
- }
+ insn = next_insn(p, BRW_OPCODE_IF);
/* Override the defaults for this instruction:
*/
if (intel->gen < 6) {
- brw_set_dest(insn, brw_ip_reg());
- brw_set_src0(insn, brw_ip_reg());
- brw_set_src1(insn, brw_imm_d(0x0));
- } else {
- brw_set_dest(insn, brw_imm_w(0));
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0));
+ } else if (intel->gen == 6) {
+ brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
- brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ } else {
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_ud(0));
+ insn->bits3.break_cont.jip = 0;
+ insn->bits3.break_cont.uip = 0;
}
insn->header.execution_size = execute_size;
p->current->header.predicate_control = BRW_PREDICATE_NONE;
+ push_if_stack(p, insn);
return insn;
}
+/* This function is only used for gen6-style IF instructions with an
+ * embedded comparison (conditional modifier). It is not used on gen7.
+ */
struct brw_instruction *
-brw_IF_gen6(struct brw_compile *p, uint32_t conditional,
- struct brw_reg src0, struct brw_reg src1)
+gen6_IF(struct brw_compile *p, uint32_t conditional,
+ struct brw_reg src0, struct brw_reg src1)
{
struct brw_instruction *insn;
insn = next_insn(p, BRW_OPCODE_IF);
- brw_set_dest(insn, brw_imm_w(0));
- insn->header.execution_size = BRW_EXECUTE_8;
+ brw_set_dest(p, insn, brw_imm_w(0));
+ if (p->compressed) {
+ insn->header.execution_size = BRW_EXECUTE_16;
+ } else {
+ insn->header.execution_size = BRW_EXECUTE_8;
+ }
insn->bits1.branch_gen6.jump_count = 0;
- brw_set_src0(insn, src0);
- brw_set_src1(insn, src1);
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, src1);
assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
if (!p->single_program_flow)
insn->header.thread_control = BRW_THREAD_SWITCH;
+ push_if_stack(p, insn);
return insn;
}
-struct brw_instruction *brw_ELSE(struct brw_compile *p,
- struct brw_instruction *if_insn)
+/**
+ * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
+ */
+static void
+convert_IF_ELSE_to_ADD(struct brw_compile *p,
+ struct brw_instruction *if_inst,
+ struct brw_instruction *else_inst)
+{
+ /* The next instruction (where the ENDIF would be, if it existed) */
+ struct brw_instruction *next_inst = &p->store[p->nr_insn];
+
+ assert(p->single_program_flow);
+ assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
+ assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
+ assert(if_inst->header.execution_size == BRW_EXECUTE_1);
+
+ /* Convert IF to an ADD instruction that moves the instruction pointer
+ * to the first instruction of the ELSE block. If there is no ELSE
+ * block, point to where ENDIF would be. Reverse the predicate.
+ *
+ * There's no need to execute an ENDIF since we don't need to do any
+ * stack operations, and if we're currently executing, we just want to
+ * continue normally.
+ */
+ if_inst->header.opcode = BRW_OPCODE_ADD;
+ if_inst->header.predicate_inverse = 1;
+
+ if (else_inst != NULL) {
+ /* Convert ELSE to an ADD instruction that points where the ENDIF
+ * would be.
+ */
+ else_inst->header.opcode = BRW_OPCODE_ADD;
+
+ if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
+ else_inst->bits3.ud = (next_inst - else_inst) * 16;
+ } else {
+ if_inst->bits3.ud = (next_inst - if_inst) * 16;
+ }
+}
+
+/**
+ * Patch IF and ELSE instructions with appropriate jump targets.
+ */
+static void
+patch_IF_ELSE(struct brw_compile *p,
+ struct brw_instruction *if_inst,
+ struct brw_instruction *else_inst,
+ struct brw_instruction *endif_inst)
{
struct intel_context *intel = &p->brw->intel;
- struct brw_instruction *insn;
- GLuint br = 1;
- /* jump count is for 64bit data chunk each, so one 128bit
- instruction requires 2 chunks. */
+ assert(!p->single_program_flow);
+ assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
+ assert(endif_inst != NULL);
+ assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
+
+ unsigned br = 1;
+ /* Jump count is for 64bit data chunk each, so one 128bit instruction
+ * requires 2 chunks.
+ */
if (intel->gen >= 5)
br = 2;
- if (p->single_program_flow) {
- insn = next_insn(p, BRW_OPCODE_ADD);
+ assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
+ endif_inst->header.execution_size = if_inst->header.execution_size;
+
+ if (else_inst == NULL) {
+ /* Patch IF -> ENDIF */
+ if (intel->gen < 6) {
+ /* Turn it into an IFF, which means no mask stack operations for
+ * all-false and jumping past the ENDIF.
+ */
+ if_inst->header.opcode = BRW_OPCODE_IFF;
+ if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
+ if_inst->bits3.if_else.pop_count = 0;
+ if_inst->bits3.if_else.pad0 = 0;
+ } else if (intel->gen == 6) {
+ /* As of gen6, there is no IFF and IF must point to the ENDIF. */
+ if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
+ } else {
+ if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
+ if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
+ }
} else {
- insn = next_insn(p, BRW_OPCODE_ELSE);
+ else_inst->header.execution_size = if_inst->header.execution_size;
+
+ /* Patch IF -> ELSE */
+ if (intel->gen < 6) {
+ if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
+ if_inst->bits3.if_else.pop_count = 0;
+ if_inst->bits3.if_else.pad0 = 0;
+ } else if (intel->gen == 6) {
+ if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
+ }
+
+ /* Patch ELSE -> ENDIF */
+ if (intel->gen < 6) {
+ /* BRW_OPCODE_ELSE pre-gen6 should point just past the
+ * matching ENDIF.
+ */
+ else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
+ else_inst->bits3.if_else.pop_count = 1;
+ else_inst->bits3.if_else.pad0 = 0;
+ } else if (intel->gen == 6) {
+ /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
+ else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
+ } else {
+ /* The IF instruction's JIP should point just past the ELSE */
+ if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
+ /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
+ if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
+ else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
+ }
}
+}
+
+void
+brw_ELSE(struct brw_compile *p)
+{
+ struct intel_context *intel = &p->brw->intel;
+ struct brw_instruction *insn;
+
+ insn = next_insn(p, BRW_OPCODE_ELSE);
if (intel->gen < 6) {
- brw_set_dest(insn, brw_ip_reg());
- brw_set_src0(insn, brw_ip_reg());
- brw_set_src1(insn, brw_imm_d(0x0));
- } else {
- brw_set_dest(insn, brw_imm_w(0));
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0));
+ } else if (intel->gen == 6) {
+ brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
- brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ } else {
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_ud(0));
+ insn->bits3.break_cont.jip = 0;
+ insn->bits3.break_cont.uip = 0;
}
insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.execution_size = if_insn->header.execution_size;
insn->header.mask_control = BRW_MASK_ENABLE;
if (!p->single_program_flow)
insn->header.thread_control = BRW_THREAD_SWITCH;
- /* Patch the if instruction to point at this instruction.
- */
- if (p->single_program_flow) {
- assert(if_insn->header.opcode == BRW_OPCODE_ADD);
-
- if_insn->bits3.ud = (insn - if_insn + 1) * 16;
- } else {
- assert(if_insn->header.opcode == BRW_OPCODE_IF);
-
- if (intel->gen < 6) {
- if_insn->bits3.if_else.jump_count = br * (insn - if_insn);
- if_insn->bits3.if_else.pop_count = 0;
- if_insn->bits3.if_else.pad0 = 0;
- } else {
- if_insn->bits1.branch_gen6.jump_count = br * (insn - if_insn + 1);
- }
- }
-
- return insn;
+ push_if_stack(p, insn);
}
-void brw_ENDIF(struct brw_compile *p,
- struct brw_instruction *patch_insn)
+void
+brw_ENDIF(struct brw_compile *p)
{
struct intel_context *intel = &p->brw->intel;
- GLuint br = 1;
+ struct brw_instruction *insn;
+ struct brw_instruction *else_inst = NULL;
+ struct brw_instruction *if_inst = NULL;
+
+ /* Pop the IF and (optional) ELSE instructions from the stack */
+ p->if_stack_depth--;
+ if (p->if_stack[p->if_stack_depth]->header.opcode == BRW_OPCODE_ELSE) {
+ else_inst = p->if_stack[p->if_stack_depth];
+ p->if_stack_depth--;
+ }
+ if_inst = p->if_stack[p->if_stack_depth];
- if (intel->gen >= 5)
- br = 2;
-
if (p->single_program_flow) {
- /* In single program flow mode, there's no need to execute an ENDIF,
- * since we don't need to do any stack operations, and if we're executing
- * currently, we want to just continue executing.
- */
- struct brw_instruction *next = &p->store[p->nr_insn];
+ /* ENDIF is useless; don't bother emitting it. */
+ convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
+ return;
+ }
- assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
+ insn = next_insn(p, BRW_OPCODE_ENDIF);
- patch_insn->bits3.ud = (next - patch_insn) * 16;
+ if (intel->gen < 6) {
+ brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
+ brw_set_src1(p, insn, brw_imm_d(0x0));
+ } else if (intel->gen == 6) {
+ brw_set_dest(p, insn, brw_imm_w(0));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
} else {
- struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
-
- if (intel->gen < 6) {
- brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
- brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
- brw_set_src1(insn, brw_imm_d(0x0));
- } else {
- brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_W));
- brw_set_src0(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- brw_set_src1(insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- }
-
- insn->header.compression_control = BRW_COMPRESSION_NONE;
- insn->header.execution_size = patch_insn->header.execution_size;
- insn->header.mask_control = BRW_MASK_ENABLE;
- insn->header.thread_control = BRW_THREAD_SWITCH;
-
- if (intel->gen < 6)
- assert(patch_insn->bits3.if_else.jump_count == 0);
- else
- assert(patch_insn->bits1.branch_gen6.jump_count == 0);
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_ud(0));
+ }
- /* Patch the if or else instructions to point at this or the next
- * instruction respectively.
- */
- if (patch_insn->header.opcode == BRW_OPCODE_IF) {
- if (intel->gen < 6) {
- /* Turn it into an IFF, which means no mask stack operations for
- * all-false and jumping past the ENDIF.
- */
- patch_insn->header.opcode = BRW_OPCODE_IFF;
- patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
- patch_insn->bits3.if_else.pop_count = 0;
- patch_insn->bits3.if_else.pad0 = 0;
- } else {
- /* As of gen6, there is no IFF and IF must point to the ENDIF. */
- patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
- }
- } else {
- assert(patch_insn->header.opcode == BRW_OPCODE_ELSE);
- if (intel->gen < 6) {
- /* BRW_OPCODE_ELSE pre-gen6 should point just past the
- * matching ENDIF.
- */
- patch_insn->bits3.if_else.jump_count = br * (insn - patch_insn + 1);
- patch_insn->bits3.if_else.pop_count = 1;
- patch_insn->bits3.if_else.pad0 = 0;
- } else {
- /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
- patch_insn->bits1.branch_gen6.jump_count = br * (insn - patch_insn);
- }
- }
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.mask_control = BRW_MASK_ENABLE;
+ insn->header.thread_control = BRW_THREAD_SWITCH;
- /* Also pop item off the stack in the endif instruction:
- */
- if (intel->gen < 6) {
- insn->bits3.if_else.jump_count = 0;
- insn->bits3.if_else.pop_count = 1;
- insn->bits3.if_else.pad0 = 0;
- } else {
- insn->bits1.branch_gen6.jump_count = 2;
- }
+ /* Also pop item off the stack in the endif instruction: */
+ if (intel->gen < 6) {
+ insn->bits3.if_else.jump_count = 0;
+ insn->bits3.if_else.pop_count = 1;
+ insn->bits3.if_else.pad0 = 0;
+ } else if (intel->gen == 6) {
+ insn->bits1.branch_gen6.jump_count = 2;
+ } else {
+ insn->bits3.break_cont.jip = 2;
}
+ patch_IF_ELSE(p, if_inst, else_inst, insn);
}
struct brw_instruction *brw_BREAK(struct brw_compile *p, int pop_count)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
+
insn = next_insn(p, BRW_OPCODE_BREAK);
- brw_set_dest(insn, brw_ip_reg());
- brw_set_src0(insn, brw_ip_reg());
- brw_set_src1(insn, brw_imm_d(0x0));
+ if (intel->gen >= 6) {
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_d(0x0));
+ } else {
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0));
+ insn->bits3.if_else.pad0 = 0;
+ insn->bits3.if_else.pop_count = pop_count;
+ }
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+
+ return insn;
+}
+
+struct brw_instruction *gen6_CONT(struct brw_compile *p,
+ struct brw_instruction *do_insn)
+{
+ struct brw_instruction *insn;
+
+ insn = next_insn(p, BRW_OPCODE_CONTINUE);
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0));
+
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
- /* insn->header.mask_control = BRW_MASK_DISABLE; */
- insn->bits3.if_else.pad0 = 0;
- insn->bits3.if_else.pop_count = pop_count;
return insn;
}
{
struct brw_instruction *insn;
insn = next_insn(p, BRW_OPCODE_CONTINUE);
- brw_set_dest(insn, brw_ip_reg());
- brw_set_src0(insn, brw_ip_reg());
- brw_set_src1(insn, brw_imm_d(0x0));
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0x0));
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
/* insn->header.mask_control = BRW_MASK_DISABLE; */
}
/* DO/WHILE loop:
+ *
+ * The DO/WHILE is just an unterminated loop -- break or continue are
+ * used for control within the loop. We have a few ways they can be
+ * done.
+ *
+ * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
+ * jip and no DO instruction.
+ *
+ * For non-uniform control flow pre-gen6, there's a DO instruction to
+ * push the mask, and a WHILE to jump back, and BREAK to get out and
+ * pop the mask.
+ *
+ * For gen6, there's no more mask stack, so no need for DO. WHILE
+ * just points back to the first instruction of the loop.
*/
struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
{
- if (p->single_program_flow) {
+ struct intel_context *intel = &p->brw->intel;
+
+ if (intel->gen >= 6 || p->single_program_flow) {
return &p->store[p->nr_insn];
} else {
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
/* Override the defaults for this instruction:
*/
- brw_set_dest(insn, brw_null_reg());
- brw_set_src0(insn, brw_null_reg());
- brw_set_src1(insn, brw_null_reg());
+ brw_set_dest(p, insn, brw_null_reg());
+ brw_set_src0(p, insn, brw_null_reg());
+ brw_set_src1(p, insn, brw_null_reg());
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = execute_size;
if (intel->gen >= 5)
br = 2;
- if (p->single_program_flow)
- insn = next_insn(p, BRW_OPCODE_ADD);
- else
+ if (intel->gen >= 7) {
insn = next_insn(p, BRW_OPCODE_WHILE);
- brw_set_dest(insn, brw_ip_reg());
- brw_set_src0(insn, brw_ip_reg());
- brw_set_src1(insn, brw_imm_d(0x0));
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_ud(0));
+ insn->bits3.break_cont.jip = br * (do_insn - insn);
- insn->header.compression_control = BRW_COMPRESSION_NONE;
+ insn->header.execution_size = BRW_EXECUTE_8;
+ } else if (intel->gen == 6) {
+ insn = next_insn(p, BRW_OPCODE_WHILE);
- if (p->single_program_flow) {
- insn->header.execution_size = BRW_EXECUTE_1;
+ brw_set_dest(p, insn, brw_imm_w(0));
+ insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- insn->bits3.d = (do_insn - insn) * 16;
+ insn->header.execution_size = BRW_EXECUTE_8;
} else {
- insn->header.execution_size = do_insn->header.execution_size;
+ if (p->single_program_flow) {
+ insn = next_insn(p, BRW_OPCODE_ADD);
- assert(do_insn->header.opcode == BRW_OPCODE_DO);
- insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
- insn->bits3.if_else.pop_count = 0;
- insn->bits3.if_else.pad0 = 0;
- }
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
+ insn->header.execution_size = BRW_EXECUTE_1;
+ } else {
+ insn = next_insn(p, BRW_OPCODE_WHILE);
-/* insn->header.mask_control = BRW_MASK_ENABLE; */
+ assert(do_insn->header.opcode == BRW_OPCODE_DO);
+
+ brw_set_dest(p, insn, brw_ip_reg());
+ brw_set_src0(p, insn, brw_ip_reg());
+ brw_set_src1(p, insn, brw_imm_d(0));
+
+ insn->header.execution_size = do_insn->header.execution_size;
+ insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
+ insn->bits3.if_else.pop_count = 0;
+ insn->bits3.if_else.pad0 = 0;
+ }
+ }
+ insn->header.compression_control = BRW_COMPRESSION_NONE;
+ p->current->header.predicate_control = BRW_PREDICATE_NONE;
- /* insn->header.mask_control = BRW_MASK_DISABLE; */
- p->current->header.predicate_control = BRW_PREDICATE_NONE;
return insn;
}
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
insn->header.destreg__conditionalmod = conditional;
- brw_set_dest(insn, dest);
- brw_set_src0(insn, src0);
- brw_set_src1(insn, src1);
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, src1);
/* guess_execution_size(insn, src0); */
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
struct brw_reg src = brw_notification_1_reg();
- brw_set_dest(insn, src);
- brw_set_src0(insn, src);
- brw_set_src1(insn, brw_null_reg());
+ brw_set_dest(p, insn, src);
+ brw_set_src0(p, insn, src);
+ brw_set_src1(p, insn, brw_null_reg());
insn->header.execution_size = 0; /* must */
insn->header.predicate_control = 0;
insn->header.compression_control = 0;
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
+ /* Source modifiers are ignored for extended math instructions. */
+ assert(!src.negate);
+ assert(!src.abs);
+
if (function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT &&
function != BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
assert(src.type == BRW_REGISTER_TYPE_F);
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
+ insn->header.saturate = saturate;
- brw_set_dest(insn, dest);
- brw_set_src0(insn, src);
- brw_set_src1(insn, brw_null_reg());
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src);
+ brw_set_src1(p, insn, brw_null_reg());
} else {
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
GLuint msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
insn->header.predicate_control = 0;
insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_dest(insn, dest);
- brw_set_src0(insn, src);
- brw_set_math_message(p->brw,
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src);
+ brw_set_math_message(p,
insn,
msg_length, response_length,
function,
assert(src1.type == BRW_REGISTER_TYPE_F);
}
+ /* Source modifiers are ignored for extended math instructions. */
+ assert(!src0.negate);
+ assert(!src0.abs);
+ assert(!src1.negate);
+ assert(!src1.abs);
+
/* Math is the same ISA format as other opcodes, except that CondModifier
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
- brw_set_dest(insn, dest);
- brw_set_src0(insn, src0);
- brw_set_src1(insn, src1);
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, src1);
}
/**
* becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
*/
insn->header.destreg__conditionalmod = function;
+ insn->header.saturate = saturate;
+
+ /* Source modifiers are ignored for extended math instructions. */
+ assert(!src.negate);
+ assert(!src.abs);
- brw_set_dest(insn, dest);
- brw_set_src0(insn, src);
- brw_set_src1(insn, brw_null_reg());
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src);
+ brw_set_src1(p, insn, brw_null_reg());
return;
}
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_dest(insn, dest);
- brw_set_src0(insn, src);
- brw_set_math_message(p->brw,
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src);
+ brw_set_math_message(p,
insn,
msg_length, response_length,
function,
insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
insn->header.destreg__conditionalmod = msg_reg_nr+1;
- brw_set_dest(insn, offset(dest,1));
- brw_set_src0(insn, src);
- brw_set_math_message(p->brw,
+ brw_set_dest(p, insn, offset(dest,1));
+ brw_set_src0(p, insn, src);
+ brw_set_math_message(p,
insn,
msg_length, response_length,
function,
GLuint offset)
{
struct intel_context *intel = &p->brw->intel;
- uint32_t msg_control;
+ uint32_t msg_control, msg_type;
int mlen;
+ if (intel->gen >= 6)
+ offset /= 16;
+
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
if (num_regs == 1) {
send_commit_msg = 1;
}
- brw_set_dest(insn, dest);
- brw_set_src0(insn, brw_null_reg());
+ brw_set_dest(p, insn, dest);
+ if (intel->gen >= 6) {
+ brw_set_src0(p, insn, mrf);
+ } else {
+ brw_set_src0(p, insn, brw_null_reg());
+ }
+
+ if (intel->gen >= 6)
+ msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
+ else
+ msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
- brw_set_dp_write_message(p->brw,
+ brw_set_dp_write_message(p,
insn,
255, /* binding table index (255=stateless) */
msg_control,
- BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
+ msg_type,
mlen,
GL_TRUE, /* header_present */
0, /* pixel scoreboard */
int num_regs,
GLuint offset)
{
+ struct intel_context *intel = &p->brw->intel;
uint32_t msg_control;
int rlen;
+ if (intel->gen >= 6)
+ offset /= 16;
+
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
dest = retype(dest, BRW_REGISTER_TYPE_UW);
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.destreg__conditionalmod = mrf.nr;
- brw_set_dest(insn, dest); /* UW? */
- brw_set_src0(insn, brw_null_reg());
+ brw_set_dest(p, insn, dest); /* UW? */
+ if (intel->gen >= 6) {
+ brw_set_src0(p, insn, mrf);
+ } else {
+ brw_set_src0(p, insn, brw_null_reg());
+ }
- brw_set_dp_read_message(p->brw,
+ brw_set_dp_read_message(p,
insn,
255, /* binding table index (255=stateless) */
msg_control,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
- 1, /* target cache (render/scratch) */
+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
1, /* msg_length */
- rlen,
- 0); /* eot */
+ rlen);
}
}
uint32_t offset,
uint32_t bind_table_index)
{
+ struct intel_context *intel = &p->brw->intel;
+
+ /* On newer hardware, offset is in units of owords. */
+ if (intel->gen >= 6)
+ offset /= 16;
+
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
/* cast dest to a uword[8] vector */
dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
- brw_set_dest(insn, dest);
- brw_set_src0(insn, brw_null_reg());
+ brw_set_dest(p, insn, dest);
+ if (intel->gen >= 6) {
+ brw_set_src0(p, insn, mrf);
+ } else {
+ brw_set_src0(p, insn, brw_null_reg());
+ }
- brw_set_dp_read_message(p->brw,
+ brw_set_dp_read_message(p,
insn,
bind_table_index,
BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
- 0, /* source cache = data cache */
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1, /* msg_length */
- 1, /* response_length (1 reg, 2 owords!) */
- 0); /* eot */
+ 1); /* response_length (1 reg, 2 owords!) */
brw_pop_insn_state(p);
}
+/**
+ * Read a set of dwords from the data port Data Cache (const buffer).
+ *
+ * Location (in buffer) appears as UD offsets in the register after
+ * the provided mrf header reg.
+ */
+void brw_dword_scattered_read(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ uint32_t bind_table_index)
+{
+ mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
+
+ brw_push_insn_state(p);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ brw_pop_insn_state(p);
+
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+ insn->header.destreg__conditionalmod = mrf.nr;
+
+ /* cast dest to a uword[8] vector */
+ dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
+
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, brw_null_reg());
+
+ brw_set_dp_read_message(p,
+ insn,
+ bind_table_index,
+ BRW_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS,
+ BRW_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ,
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+ 2, /* msg_length */
+ 1); /* response_length */
+}
+
/**
GLuint location,
GLuint bind_table_index)
{
+ struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
GLuint msg_reg_nr = 1;
- struct brw_reg b;
- /*
- printf("vs const read msg, location %u, msg_reg_nr %d\n",
- location, msg_reg_nr);
- */
+ if (intel->gen >= 6)
+ location /= 16;
/* Setup MRF[1] with location/offset into const buffer */
brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
-
- /* XXX I think we're setting all the dwords of MRF[1] to 'location'.
- * when the docs say only dword[2] should be set. Hmmm. But it works.
- */
- b = brw_message_reg(msg_reg_nr);
- b = retype(b, BRW_REGISTER_TYPE_UD);
- /*b = get_element_ud(b, 2);*/
- brw_MOV(p, b, brw_imm_ud(location));
-
+ brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 2),
+ BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(location));
brw_pop_insn_state(p);
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.destreg__conditionalmod = msg_reg_nr;
insn->header.mask_control = BRW_MASK_DISABLE;
- brw_set_dest(insn, dest);
- brw_set_src0(insn, brw_null_reg());
+ brw_set_dest(p, insn, dest);
+ if (intel->gen >= 6) {
+ brw_set_src0(p, insn, brw_message_reg(msg_reg_nr));
+ } else {
+ brw_set_src0(p, insn, brw_null_reg());
+ }
- brw_set_dp_read_message(p->brw,
+ brw_set_dp_read_message(p,
insn,
bind_table_index,
0,
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
- 0, /* source cache = data cache */
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
1, /* msg_length */
- 1, /* response_length (1 Oword) */
- 0); /* eot */
+ 1); /* response_length (1 Oword) */
}
/**
GLuint bind_table_index)
{
struct intel_context *intel = &p->brw->intel;
+ struct brw_reg src = brw_vec8_grf(0, 0);
int msg_type;
/* Setup MRF[1] with offset into const buffer */
brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_predicate_control(p, BRW_PREDICATE_NONE);
/* M1.0 is block offset 0, M1.4 is block offset 1, all other
* fields ignored.
*/
- brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD),
+ brw_ADD(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_D),
addr_reg, brw_imm_d(offset));
brw_pop_insn_state(p);
+ gen6_resolve_implied_move(p, &src, 0);
struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = BRW_PREDICATE_NONE;
insn->header.destreg__conditionalmod = 0;
insn->header.mask_control = BRW_MASK_DISABLE;
- brw_set_dest(insn, dest);
- brw_set_src0(insn, brw_vec8_grf(0, 0));
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src);
- if (intel->gen == 6)
+ if (intel->gen >= 6)
msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
else if (intel->gen == 5 || intel->is_g4x)
msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
else
msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
- brw_set_dp_read_message(p->brw,
+ brw_set_dp_read_message(p,
insn,
bind_table_index,
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
msg_type,
- 0, /* source cache = data cache */
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE,
2, /* msg_length */
- 1, /* response_length */
- 0); /* eot */
+ 1); /* response_length */
}
void brw_fb_WRITE(struct brw_compile *p,
int dispatch_width,
- struct brw_reg dest,
GLuint msg_reg_nr,
struct brw_reg src0,
GLuint binding_table_index,
GLuint msg_length,
GLuint response_length,
- GLboolean eot)
+ GLboolean eot,
+ GLboolean header_present)
{
struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
GLuint msg_control, msg_type;
- GLboolean header_present = GL_TRUE;
+ struct brw_reg dest;
- insn = next_insn(p, BRW_OPCODE_SEND);
- insn->header.predicate_control = 0; /* XXX */
+ if (dispatch_width == 16)
+ dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+ else
+ dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
+
+ if (intel->gen >= 6 && binding_table_index == 0) {
+ insn = next_insn(p, BRW_OPCODE_SENDC);
+ } else {
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ }
+ /* The execution mask is ignored for render target writes. */
+ insn->header.predicate_control = 0;
insn->header.compression_control = BRW_COMPRESSION_NONE;
if (intel->gen >= 6) {
- if (msg_length == 4)
- header_present = GL_FALSE;
-
/* headerless version, just submit color payload */
src0 = brw_message_reg(msg_reg_nr);
- msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE_GEN6;
+ msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
} else {
insn->header.destreg__conditionalmod = msg_reg_nr;
else
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
- brw_set_dest(insn, dest);
- brw_set_src0(insn, src0);
- brw_set_dp_write_message(p->brw,
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_dp_write_message(p,
insn,
binding_table_index,
msg_control,
struct brw_reg m1 = brw_message_reg(msg_reg_nr);
- guess_execution_size(p->current, dest);
+ guess_execution_size(p, p->current, dest);
if (p->current->header.execution_size == BRW_EXECUTE_16)
dispatch_16 = GL_TRUE;
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, m1, brw_vec8_grf(0,0));
+ brw_MOV(p, retype(m1, BRW_REGISTER_TYPE_UD),
+ retype(brw_vec8_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
brw_pop_insn_state(p);
{
struct brw_instruction *insn;
- /* Sandybridge doesn't have the implied move for SENDs,
- * and the first message register index comes from src0.
- */
- if (intel->gen >= 6) {
- brw_push_insn_state(p);
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- /* m1 contains header? */
- brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
- brw_pop_insn_state(p);
- src0 = brw_message_reg(msg_reg_nr);
- }
+ gen6_resolve_implied_move(p, &src0, msg_reg_nr);
insn = next_insn(p, BRW_OPCODE_SEND);
insn->header.predicate_control = 0; /* XXX */
if (intel->gen < 6)
insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_dest(insn, dest);
- brw_set_src0(insn, src0);
- brw_set_sampler_message(p->brw, insn,
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_sampler_message(p, insn,
binding_table_index,
sampler,
msg_type,
*/
brw_push_insn_state(p);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
- brw_MOV(p, reg, reg);
+ brw_MOV(p, retype(reg, BRW_REGISTER_TYPE_UD),
+ retype(reg, BRW_REGISTER_TYPE_UD));
brw_pop_insn_state(p);
}
struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
- /* Sandybridge doesn't have the implied move for SENDs,
- * and the first message register index comes from src0.
- */
- if (intel->gen >= 6) {
+ gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+
+ if (intel->gen == 7) {
+ /* Enable Channel Masks in the URB_WRITE_HWORD message header */
brw_push_insn_state(p);
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_MOV(p, brw_message_reg(msg_reg_nr), src0);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
+ BRW_REGISTER_TYPE_UD),
+ retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0xff00));
brw_pop_insn_state(p);
- src0 = brw_message_reg(msg_reg_nr);
}
insn = next_insn(p, BRW_OPCODE_SEND);
assert(msg_length < BRW_MAX_MRF);
- brw_set_dest(insn, dest);
- brw_set_src0(insn, src0);
- brw_set_src1(insn, brw_imm_d(0));
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, brw_imm_d(0));
if (intel->gen < 6)
insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_urb_message(p->brw,
+ brw_set_urb_message(p,
insn,
allocate,
used,
swizzle);
}
+static int
+brw_find_next_block_end(struct brw_compile *p, int start)
+{
+ int ip;
+
+ for (ip = start + 1; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ switch (insn->header.opcode) {
+ case BRW_OPCODE_ENDIF:
+ case BRW_OPCODE_ELSE:
+ case BRW_OPCODE_WHILE:
+ return ip;
+ }
+ }
+ assert(!"not reached");
+ return start + 1;
+}
+
+/* There is no DO instruction on gen6, so to find the end of the loop
+ * we have to see if the loop is jumping back before our start
+ * instruction.
+ */
+static int
+brw_find_loop_end(struct brw_compile *p, int start)
+{
+ struct intel_context *intel = &p->brw->intel;
+ int ip;
+ int br = 2;
+
+ for (ip = start + 1; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ if (insn->header.opcode == BRW_OPCODE_WHILE) {
+ int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
+ : insn->bits3.break_cont.jip;
+ if (ip + jip / br <= start)
+ return ip;
+ }
+ }
+ assert(!"not reached");
+ return start + 1;
+}
+
+/* After program generation, go back and update the UIP and JIP of
+ * BREAK and CONT instructions to their correct locations.
+ */
+void
+brw_set_uip_jip(struct brw_compile *p)
+{
+ struct intel_context *intel = &p->brw->intel;
+ int ip;
+ int br = 2;
+
+ if (intel->gen < 6)
+ return;
+
+ for (ip = 0; ip < p->nr_insn; ip++) {
+ struct brw_instruction *insn = &p->store[ip];
+
+ switch (insn->header.opcode) {
+ case BRW_OPCODE_BREAK:
+ insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+ /* Gen7 UIP points to WHILE; Gen6 points just after it */
+ insn->bits3.break_cont.uip =
+ br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
+ break;
+ case BRW_OPCODE_CONTINUE:
+ insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+ insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
+
+ assert(insn->bits3.break_cont.uip != 0);
+ assert(insn->bits3.break_cont.jip != 0);
+ break;
+ }
+ }
+}
+
void brw_ff_sync(struct brw_compile *p,
struct brw_reg dest,
GLuint msg_reg_nr,
struct intel_context *intel = &p->brw->intel;
struct brw_instruction *insn;
- /* Sandybridge doesn't have the implied move for SENDs,
- * and the first message register index comes from src0.
- */
- if (intel->gen >= 6) {
- brw_push_insn_state(p);
- brw_set_mask_control( p, BRW_MASK_DISABLE );
- brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
- retype(src0, BRW_REGISTER_TYPE_UD));
- brw_pop_insn_state(p);
- src0 = brw_message_reg(msg_reg_nr);
- }
+ gen6_resolve_implied_move(p, &src0, msg_reg_nr);
insn = next_insn(p, BRW_OPCODE_SEND);
- brw_set_dest(insn, dest);
- brw_set_src0(insn, src0);
- brw_set_src1(insn, brw_imm_d(0));
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, brw_imm_d(0));
if (intel->gen < 6)
insn->header.destreg__conditionalmod = msg_reg_nr;
- brw_set_ff_sync_message(p->brw,
+ brw_set_ff_sync_message(p,
insn,
allocate,
response_length,