#include "brw_defines.h"
#include "brw_eu.h"
-static INLINE
-GLboolean brw_is_arithmetic_inst(const struct brw_instruction *inst)
-{
- switch (inst->header.opcode) {
- case BRW_OPCODE_MOV:
- case BRW_OPCODE_SEL:
- case BRW_OPCODE_NOT:
- case BRW_OPCODE_AND:
- case BRW_OPCODE_OR:
- case BRW_OPCODE_XOR:
- case BRW_OPCODE_SHR:
- case BRW_OPCODE_SHL:
- case BRW_OPCODE_RSR:
- case BRW_OPCODE_RSL:
- case BRW_OPCODE_ADD:
- case BRW_OPCODE_MUL:
- case BRW_OPCODE_AVG:
- case BRW_OPCODE_FRC:
- case BRW_OPCODE_RNDU:
- case BRW_OPCODE_RNDD:
- case BRW_OPCODE_RNDE:
- case BRW_OPCODE_RNDZ:
- case BRW_OPCODE_MAC:
- case BRW_OPCODE_MACH:
- case BRW_OPCODE_LINE:
- return GL_TRUE;
- default:
- return GL_FALSE;
- }
-}
-
-static const struct {
- char *name;
- int nsrc;
- int ndst;
-} inst_opcode[128] = {
- [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
+const struct brw_instruction_info brw_opcodes[128] = {
+ [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1, .is_arith = 1 },
[BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
- [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1, .is_arith = 1 },
[BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
- [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
+ [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1, .is_arith = 1 },
+ [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1, .is_arith = 1 },
[BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
[BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
};
+static INLINE
+GLboolean brw_is_arithmetic_inst(const struct brw_instruction *inst)
+{
+ return brw_opcodes[inst->header.opcode].is_arith;
+}
+
static const GLuint inst_stride[7] = {
[0] = 0,
[1] = 1,
};
static const GLuint inst_type_size[8] = {
- [0] = 4,
- [1] = 4,
- [2] = 2,
- [3] = 2,
- [4] = 1,
- [5] = 1,
- [7] = 4
+ [BRW_REGISTER_TYPE_UD] = 4,
+ [BRW_REGISTER_TYPE_D] = 4,
+ [BRW_REGISTER_TYPE_UW] = 2,
+ [BRW_REGISTER_TYPE_W] = 2,
+ [BRW_REGISTER_TYPE_UB] = 1,
+ [BRW_REGISTER_TYPE_B] = 1,
+ [BRW_REGISTER_TYPE_F] = 4
};
-#define BRW_MAX_OFFSET(x0,x1) ((x0) > (x1) ? (x0) : (x1))
-#define BRW_MIN_OFFSET(x0,x1) ((x0) < (x1) ? (x0) : (x1));
-
static INLINE GLboolean
brw_is_grf_written(const struct brw_instruction *inst,
int reg_index, int size,
int gen)
{
- if (inst_opcode[inst->header.opcode].ndst == 0)
+ if (brw_opcodes[inst->header.opcode].ndst == 0)
return GL_FALSE;
if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
/* If the two intervals intersect, we overwrite the register */
write_end = write_start + length;
- const int left = BRW_MAX_OFFSET(write_start, reg_start);
- const int right = BRW_MIN_OFFSET(write_end, reg_end);
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);
return left < right;
}
-/* Specific path for message register since we need to handle the compr4 case */
-static INLINE GLboolean
-brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size)
+static GLboolean
+brw_is_mrf_written_alu(const struct brw_instruction *inst,
+ int reg_index, int size)
{
- if (inst_opcode[inst->header.opcode].ndst == 0)
+ if (brw_opcodes[inst->header.opcode].ndst == 0)
return GL_FALSE;
- if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
- if (inst->bits1.ia1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE)
- return GL_TRUE;
-
if (inst->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE)
return GL_FALSE;
+ if (inst->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT)
+ return GL_TRUE;
+
const int reg_start = reg_index * REG_SIZE;
const int reg_end = reg_start + size;
const int mrf_index = inst->bits1.da1.dest_reg_nr & 0x0f;
- const int is_compr4 = inst->bits1.da1.dest_reg_nr & 0xf0;
+ const int is_compr4 = inst->bits1.da1.dest_reg_nr & BRW_MRF_COMPR4;
const int type_size = inst_type_size[inst->bits1.da1.dest_reg_type];
/* We use compr4 with a size != 16 elements. Strange, we conservatively
if (is_compr4 && inst->header.execution_size != BRW_EXECUTE_16)
return GL_TRUE;
- GLboolean is_written = GL_FALSE;
-
/* Here we write mrf_{i} and mrf_{i+4}. So we read two times 8 elements */
if (is_compr4) {
const int length = 8 * type_size * inst->bits1.da1.dest_horiz_stride;
const int write_end1 = write_start1 + length;
/* If the two intervals intersect, we overwrite the register */
- const int left0 = BRW_MAX_OFFSET(write_start0, reg_start);
- const int right0 = BRW_MIN_OFFSET(write_end0, reg_end);
- const int left1 = BRW_MAX_OFFSET(write_start1, reg_start);
- const int right1 = BRW_MIN_OFFSET(write_end1, reg_end);
+ const int left0 = MAX2(write_start0, reg_start);
+ const int right0 = MIN2(write_end0, reg_end);
+ const int left1 = MAX2(write_start1, reg_start);
+ const int right1 = MIN2(write_end1, reg_end);
- is_written = left0 < right0 || left1 < right1;
+ if (left0 < right0 || left1 < right1)
+ return GL_TRUE;
}
else {
int length;
const int write_start = inst->bits1.da1.dest_reg_nr*REG_SIZE
+ inst->bits1.da1.dest_subreg_nr;
const int write_end = write_start + length;
- const int left = BRW_MAX_OFFSET(write_start, reg_start);
- const int right = BRW_MIN_OFFSET(write_end, reg_end);;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);
- is_written = left < right;
+ if (left < right)
+ return GL_TRUE;
}
- /* SEND may perform an implicit mov to a mrf register */
- if (is_written == GL_FALSE &&
- inst->header.opcode == BRW_OPCODE_SEND &&
- inst->bits1.da1.src0_reg_file != 0) {
-
- const int mrf_start = inst->header.destreg__conditionalmod;
- const int write_start = mrf_start * REG_SIZE;
- const int write_end = write_start + REG_SIZE;
- const int left = BRW_MAX_OFFSET(write_start, reg_start);
- const int right = BRW_MIN_OFFSET(write_end, reg_end);;
- is_written = left < right;
- }
+ return GL_FALSE;
+}
+
+/* SEND may perform an implicit mov to a mrf register */
+static GLboolean brw_is_mrf_written_send(const struct brw_instruction *inst,
+ int reg_index, int size)
+{
- return is_written;
+ const int reg_start = reg_index * REG_SIZE;
+ const int reg_end = reg_start + size;
+ const int mrf_start = inst->header.destreg__conditionalmod;
+ const int write_start = mrf_start * REG_SIZE;
+ const int write_end = write_start + REG_SIZE;
+ const int left = MAX2(write_start, reg_start);
+ const int right = MIN2(write_end, reg_end);
+
+ if (inst->header.opcode != BRW_OPCODE_SEND ||
+ inst->bits1.da1.src0_reg_file == 0)
+ return GL_FALSE;
+
+ return left < right;
+}
+
+/* Specific path for message register since we need to handle the compr4 case */
+static INLINE GLboolean
+brw_is_mrf_written(const struct brw_instruction *inst, int reg_index, int size)
+{
+ return (brw_is_mrf_written_alu(inst, reg_index, size) ||
+ brw_is_mrf_written_send(inst, reg_index, size));
}
static INLINE GLboolean
read_start *= REG_SIZE;
read_end = read_start + length;
- const int left = BRW_MAX_OFFSET(read_start, reg_start);
- const int right = BRW_MIN_OFFSET(read_end, reg_end);
+ const int left = MAX2(read_start, reg_start);
+ const int right = MIN2(read_end, reg_end);
return left < right;
}
brw_is_grf_read(const struct brw_instruction *inst, int reg_index, int size)
{
int i, j;
- if (inst_opcode[inst->header.opcode].nsrc == 0)
+ if (brw_opcodes[inst->header.opcode].nsrc == 0)
return GL_FALSE;
/* Look at first source. We must take into account register regions to
* since we do not take into account the fact that some complete registers
* may be skipped
*/
- if (inst_opcode[inst->header.opcode].nsrc >= 1) {
+ if (brw_opcodes[inst->header.opcode].nsrc >= 1) {
if (inst->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT)
if (inst->bits1.ia1.src0_reg_file == BRW_GENERAL_REGISTER_FILE)
}
/* Second src register */
- if (inst_opcode[inst->header.opcode].nsrc >= 2) {
+ if (brw_opcodes[inst->header.opcode].nsrc >= 2) {
if (inst->bits3.da1.src1_address_mode != BRW_ADDRESS_DIRECT)
if (inst->bits1.ia1.src1_reg_file == BRW_GENERAL_REGISTER_FILE)
if (mov->bits1.da1.dest_address_mode != BRW_ADDRESS_DIRECT ||
mov->bits1.da1.dest_reg_file != BRW_MESSAGE_REGISTER_FILE ||
- mov->bits1.da1.dest_reg_type != 7 ||
- mov->bits1.da1.dest_horiz_stride != 1 ||
+ mov->bits1.da1.dest_reg_type != BRW_REGISTER_TYPE_F ||
+ mov->bits1.da1.dest_horiz_stride != BRW_HORIZONTAL_STRIDE_1 ||
mov->bits1.da1.dest_subreg_nr != 0)
return GL_FALSE;
if (mov->bits2.da1.src0_address_mode != BRW_ADDRESS_DIRECT ||
mov->bits1.da1.src0_reg_file != BRW_GENERAL_REGISTER_FILE ||
- mov->bits1.da1.src0_reg_type != 7 ||
- mov->bits2.da1.src0_width != 3 ||
- mov->bits2.da1.src0_horiz_stride != 1 ||
- mov->bits2.da1.src0_vert_stride != 4 ||
+ mov->bits1.da1.src0_reg_type != BRW_REGISTER_TYPE_F ||
+ mov->bits2.da1.src0_width != BRW_WIDTH_8 ||
+ mov->bits2.da1.src0_horiz_stride != BRW_HORIZONTAL_STRIDE_1 ||
+ mov->bits2.da1.src0_vert_stride != BRW_VERTICAL_STRIDE_8 ||
mov->bits2.da1.src0_subreg_nr != 0 ||
mov->bits2.da1.src0_abs != 0 ||
mov->bits2.da1.src0_negate != 0)
*grf_index = mov->bits2.da1.src0_reg_nr;
*mrf_index = mov->bits1.da1.dest_reg_nr & 0x0f;
- *is_compr4 = (mov->bits1.da1.dest_reg_nr & 0xf0) != 0;
+ *is_compr4 = (mov->bits1.da1.dest_reg_nr & BRW_MRF_COMPR4) != 0;
return GL_TRUE;
}
inst->header.access_mode == BRW_ALIGN_1 &&
inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT &&
inst->bits1.da1.dest_reg_file == BRW_GENERAL_REGISTER_FILE &&
- inst->bits1.da1.dest_reg_type == 7 &&
- inst->bits1.da1.dest_horiz_stride == 1 &&
+ inst->bits1.da1.dest_reg_type == BRW_REGISTER_TYPE_F &&
+ inst->bits1.da1.dest_horiz_stride == BRW_HORIZONTAL_STRIDE_1 &&
inst->bits1.da1.dest_reg_nr == grf_index &&
inst->bits1.da1.dest_subreg_nr == 0 &&
brw_is_arithmetic_inst(inst))
p->nr_insn = nr_insn;
}
-/* The gen code emitter generates a lot of duplications in the mrf-to-grf moves.
- * Here, we monitor same mov mrf-to-grf instrutions and remove them as soon as
- * none of the two operands have been written
+/* The gen code emitter generates a lot of duplications in the
+ * grf-to-mrf moves, for example when texture sampling with the same
+ * coordinates from multiple textures.. Here, we monitor same mov
+ * grf-to-mrf instrutions and remove repeated ones where the operands
+ * and dst ahven't changed in between.
*/
void brw_remove_duplicate_mrf_moves(struct brw_compile *p)
{
free(removeInst);
}
-void brw_remove_mrf_to_grf_moves(struct brw_compile *p)
+/* Replace moves to MRFs where the value moved is the result of a
+ * normal arithmetic operation with computation right into the MRF.
+ */
+void brw_remove_grf_to_mrf_moves(struct brw_compile *p)
{
int i, j, prev;
struct brw_context *brw = p->brw;