#include "brw_defines.h"
#include "brw_eu.h"
-#include "../glsl/ralloc.h"
+#include "glsl/ralloc.h"
/***********************************************************************
* Internal helper for constructing instructions
* On Sandybridge, this is no longer the case. This function performs the
* explicit move; it should be called before emitting a SEND instruction.
*/
-static void
+void
gen6_resolve_implied_move(struct brw_compile *p,
struct brw_reg *src,
GLuint msg_reg_nr)
}
-static void brw_set_dest(struct brw_compile *p,
- struct brw_instruction *insn,
- struct brw_reg dest)
+void
+brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg dest)
{
- struct intel_context *intel = &p->brw->intel;
-
if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
dest.file != BRW_MESSAGE_REGISTER_FILE)
assert(dest.nr < 128);
/* 10. Check destination issues. */
}
-static void brw_set_src0(struct brw_compile *p,
- struct brw_instruction *insn,
- struct brw_reg reg)
+void
+brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
+ struct brw_reg reg)
{
if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
}
}
-static void brw_set_dp_write_message( struct brw_compile *p,
- struct brw_instruction *insn,
- GLuint binding_table_index,
- GLuint msg_control,
- GLuint msg_type,
- GLuint msg_length,
- GLboolean header_present,
- GLuint pixel_scoreboard_clear,
- GLuint response_length,
- GLuint end_of_thread,
- GLuint send_commit_msg)
+void
+brw_set_dp_write_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ GLuint binding_table_index,
+ GLuint msg_control,
+ GLuint msg_type,
+ GLuint msg_length,
+ GLboolean header_present,
+ GLuint pixel_scoreboard_clear,
+ GLuint response_length,
+ GLuint end_of_thread,
+ GLuint send_commit_msg)
{
struct brw_context *brw = p->brw;
struct intel_context *intel = &brw->intel;
}
}
-static void
+void
brw_set_dp_read_message(struct brw_compile *p,
struct brw_instruction *insn,
GLuint binding_table_index,
struct intel_context *intel = &brw->intel;
brw_set_src1(p, insn, brw_imm_d(0));
- if (intel->gen >= 6) {
+ if (intel->gen >= 7) {
+ insn->bits3.gen7_dp.binding_table_index = binding_table_index;
+ insn->bits3.gen7_dp.msg_control = msg_control;
+ insn->bits3.gen7_dp.pixel_scoreboard_clear = 0;
+ insn->bits3.gen7_dp.msg_type = msg_type;
+ insn->bits3.gen7_dp.header_present = 1;
+ insn->bits3.gen7_dp.response_length = response_length;
+ insn->bits3.gen7_dp.msg_length = msg_length;
+ insn->bits3.gen7_dp.end_of_thread = 0;
+ insn->header.destreg__conditionalmod = GEN6_MESSAGE_TARGET_DP_CONST_CACHE;
+ } else if (intel->gen == 6) {
uint32_t target_function;
if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
}
-
-static struct brw_instruction *next_insn( struct brw_compile *p,
- GLuint opcode )
+#define next_insn brw_next_insn
+struct brw_instruction *
+brw_next_insn(struct brw_compile *p, GLuint opcode)
{
struct brw_instruction *insn;
return insn;
}
-
static struct brw_instruction *brw_alu1( struct brw_compile *p,
GLuint opcode,
struct brw_reg dest,
* stores a rounded value (possibly the wrong way) in the dest register, but
* also sets a per-channel "increment bit" in the flag register. A predicated
* add of 1.0 fixes dest to contain the desired result.
+ *
+ * Sandybridge and later appear to round correctly without an ADD.
*/
#define ROUND(OP) \
void brw_##OP(struct brw_compile *p, \
rnd = next_insn(p, BRW_OPCODE_##OP); \
brw_set_dest(p, rnd, dest); \
brw_set_src0(p, rnd, src); \
- rnd->header.destreg__conditionalmod = 0x7; /* turn on round-increments */ \
\
- add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
- add->header.predicate_control = BRW_PREDICATE_NORMAL; \
+ if (p->brw->intel.gen < 6) { \
+ /* turn on round-increments */ \
+ rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R; \
+ add = brw_ADD(p, dest, dest, brw_imm_f(1.0f)); \
+ add->header.predicate_control = BRW_PREDICATE_NORMAL; \
+ } \
}
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
- } else {
+ } else if (intel->gen == 6) {
brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ } else {
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_ud(0));
+ insn->bits3.break_cont.jip = 0;
+ insn->bits3.break_cont.uip = 0;
}
insn->header.execution_size = execute_size;
return insn;
}
+/* This function is only used for gen6-style IF instructions with an
+ * embedded comparison (conditional modifier). It is not used on gen7.
+ */
struct brw_instruction *
gen6_IF(struct brw_compile *p, uint32_t conditional,
struct brw_reg src0, struct brw_reg src1)
insn = next_insn(p, BRW_OPCODE_IF);
brw_set_dest(p, insn, brw_imm_w(0));
- insn->header.execution_size = BRW_EXECUTE_8;
+ if (p->compressed) {
+ insn->header.execution_size = BRW_EXECUTE_16;
+ } else {
+ insn->header.execution_size = BRW_EXECUTE_8;
+ }
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(p, insn, src0);
brw_set_src1(p, insn, src1);
if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
if_inst->bits3.if_else.pop_count = 0;
if_inst->bits3.if_else.pad0 = 0;
- } else {
+ } else if (intel->gen == 6) {
/* As of gen6, there is no IFF and IF must point to the ENDIF. */
if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
+ } else {
+ if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
+ if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
}
} else {
else_inst->header.execution_size = if_inst->header.execution_size;
else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
else_inst->bits3.if_else.pop_count = 1;
else_inst->bits3.if_else.pad0 = 0;
- } else {
+ } else if (intel->gen == 6) {
/* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
+ } else {
+ /* The IF instruction's JIP should point just past the ELSE */
+ if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
+ /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
+ if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
+ else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
}
}
}
brw_set_dest(p, insn, brw_ip_reg());
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
- } else {
+ } else if (intel->gen == 6) {
brw_set_dest(p, insn, brw_imm_w(0));
insn->bits1.branch_gen6.jump_count = 0;
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ } else {
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_ud(0));
+ insn->bits3.break_cont.jip = 0;
+ insn->bits3.break_cont.uip = 0;
}
insn->header.compression_control = BRW_COMPRESSION_NONE;
brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
brw_set_src1(p, insn, brw_imm_d(0x0));
- } else {
+ } else if (intel->gen == 6) {
brw_set_dest(p, insn, brw_imm_w(0));
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ } else {
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_ud(0));
}
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->bits3.if_else.jump_count = 0;
insn->bits3.if_else.pop_count = 1;
insn->bits3.if_else.pad0 = 0;
- } else {
+ } else if (intel->gen == 6) {
insn->bits1.branch_gen6.jump_count = 2;
+ } else {
+ insn->bits3.break_cont.jip = 2;
}
patch_IF_ELSE(p, if_inst, else_inst, insn);
}
struct brw_instruction *do_insn)
{
struct brw_instruction *insn;
- int br = 2;
insn = next_insn(p, BRW_OPCODE_CONTINUE);
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src0(p, insn, brw_ip_reg());
brw_set_src1(p, insn, brw_imm_d(0x0));
- insn->bits3.break_cont.uip = br * (do_insn - insn);
-
insn->header.compression_control = BRW_COMPRESSION_NONE;
insn->header.execution_size = BRW_EXECUTE_8;
return insn;
if (intel->gen >= 5)
br = 2;
- if (intel->gen >= 6) {
+ if (intel->gen >= 7) {
+ insn = next_insn(p, BRW_OPCODE_WHILE);
+
+ brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ brw_set_src1(p, insn, brw_imm_ud(0));
+ insn->bits3.break_cont.jip = br * (do_insn - insn);
+
+ insn->header.execution_size = BRW_EXECUTE_8;
+ } else if (intel->gen == 6) {
insn = next_insn(p, BRW_OPCODE_WHILE);
brw_set_dest(p, insn, brw_imm_w(0));
brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
- insn->header.execution_size = do_insn->header.execution_size;
- assert(insn->header.execution_size == BRW_EXECUTE_8);
+ insn->header.execution_size = BRW_EXECUTE_8;
} else {
if (p->single_program_flow) {
insn = next_insn(p, BRW_OPCODE_ADD);
brw_set_dest(p, insn, dest);
brw_set_src0(p, insn, src);
- if (intel->gen == 6)
+ if (intel->gen >= 6)
msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
else if (intel->gen == 5 || intel->is_g4x)
msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
if (intel->gen == 7) {
/* Enable Channel Masks in the URB_WRITE_HWORD message header */
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
BRW_REGISTER_TYPE_UD),
retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
brw_imm_ud(0xff00));
+ brw_pop_insn_state(p);
}
insn = next_insn(p, BRW_OPCODE_SEND);
static int
brw_find_loop_end(struct brw_compile *p, int start)
{
+ struct intel_context *intel = &p->brw->intel;
int ip;
int br = 2;
struct brw_instruction *insn = &p->store[ip];
if (insn->header.opcode == BRW_OPCODE_WHILE) {
- if (ip + insn->bits1.branch_gen6.jump_count / br < start)
+ int jip = intel->gen == 6 ? insn->bits1.branch_gen6.jump_count
+ : insn->bits3.break_cont.jip;
+ if (ip + jip / br <= start)
return ip;
}
}
switch (insn->header.opcode) {
case BRW_OPCODE_BREAK:
insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
- insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip + 1);
+ /* Gen7 UIP points to WHILE; Gen6 points just after it */
+ insn->bits3.break_cont.uip =
+ br * (brw_find_loop_end(p, ip) - ip + (intel->gen == 6 ? 1 : 0));
break;
case BRW_OPCODE_CONTINUE:
- /* JIP is set at CONTINUE emit time, since that's when we
- * know where the start of the loop is.
- */
insn->bits3.break_cont.jip = br * (brw_find_next_block_end(p, ip) - ip);
+ insn->bits3.break_cont.uip = br * (brw_find_loop_end(p, ip) - ip);
+
assert(insn->bits3.break_cont.uip != 0);
assert(insn->bits3.break_cont.jip != 0);
break;