* Calculate the high 32-bits of a 32x32 multiply.
*/
SHADER_OPCODE_MULH,
+
+ /**
+ * A MOV that uses VxH indirect addressing.
+ *
+ * Source 0: A register to start from (HW_REG).
+ * Source 1: An indirect offset (in bytes, UD GRF).
+ * Source 2: The length of the region that could be accessed (in bytes,
+ * UD immediate).
+ */
+ SHADER_OPCODE_MOV_INDIRECT,
};
enum brw_urb_write_flags {
case SHADER_OPCODE_BARRIER:
return 1;
+ case SHADER_OPCODE_MOV_INDIRECT:
+ if (arg == 0) {
+ assert(src[2].file == IMM);
+ unsigned region_length = src[2].ud;
+
+ if (src[0].file == FIXED_GRF) {
+ /* If the start of the region is not register aligned, then
+ * there's some portion of the register that's technically
+ * unread at the beginning.
+ *
+ * However, the register allocator works in terms of whole
+ * registers, and does not use subnr. It assumes that the
+ * read starts at the beginning of the register, and extends
+ * regs_read() whole registers beyond that.
+ *
+ * To compensate, we extend the region length to include this
+ * unread portion at the beginning.
+ */
+ if (src[0].subnr)
+ region_length += src[0].subnr * type_sz(src[0].type);
+
+ return DIV_ROUND_UP(region_length, REG_SIZE);
+ } else {
+ assert(!"Invalid register file");
+ }
+ }
+ break;
+
default:
if (is_tex() && arg == 0 && src[0].file == VGRF)
return mlen;
struct brw_reg offset,
struct brw_reg value);
+ void generate_mov_indirect(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg reg,
+ struct brw_reg indirect_byte_offset);
+
bool patch_discard_jumps_to_fb_writes();
const struct brw_compiler *compiler;
case FS_OPCODE_LINTERP:
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
case SHADER_OPCODE_BROADCAST:
+ case SHADER_OPCODE_MOV_INDIRECT:
return true;
case SHADER_OPCODE_RCP:
case SHADER_OPCODE_RSQ:
}
}
+void
+fs_generator::generate_mov_indirect(fs_inst *inst,
+ struct brw_reg dst,
+ struct brw_reg reg,
+ struct brw_reg indirect_byte_offset)
+{
+ assert(indirect_byte_offset.type == BRW_REGISTER_TYPE_UD);
+ assert(indirect_byte_offset.file == BRW_GENERAL_REGISTER_FILE);
+
+ unsigned imm_byte_offset = reg.nr * REG_SIZE + reg.subnr;
+
+ /* We use VxH indirect addressing, clobbering a0.0 through a0.7. */
+ struct brw_reg addr = vec8(brw_address_reg(0));
+
+ /* The destination stride of an instruction (in bytes) must be greater
+ * than or equal to the size of the rest of the instruction. Since the
+ * address register is of type UW, we can't use a D-type instruction.
+ * In order to get around this, re re-type to UW and use a stride.
+ */
+ indirect_byte_offset =
+ retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
+
+ /* Prior to Broadwell, there are only 8 address registers. */
+ assert(inst->exec_size == 8 || devinfo->gen >= 8);
+
+ brw_MOV(p, addr, indirect_byte_offset);
+ brw_inst_set_mask_control(devinfo, brw_last_inst, BRW_MASK_DISABLE);
+ brw_MOV(p, dst, retype(brw_VxH_indirect(0, imm_byte_offset), dst.type));
+}
+
void
fs_generator::generate_urb_read(fs_inst *inst,
struct brw_reg dst,
fill_count++;
break;
+ case SHADER_OPCODE_MOV_INDIRECT:
+ generate_mov_indirect(inst, dst, src[0], src[1]);
+ break;
+
case SHADER_OPCODE_URB_READ_SIMD8:
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
generate_urb_read(inst, dst, src[0]);
return "barrier";
case SHADER_OPCODE_MULH:
return "mulh";
+ case SHADER_OPCODE_MOV_INDIRECT:
+ return "mov_indirect";
}
unreachable("not reached");