*/
-#include "brw_context.h"
-#include "brw_defines.h"
+#include "brw_eu_defines.h"
#include "brw_eu.h"
#include "util/ralloc.h"
struct brw_reg *src,
unsigned msg_reg_nr)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
if (devinfo->gen < 6)
return;
* Since we're pretending to have 16 MRFs anyway, we may as well use the
* registers required for messages with EOT.
*/
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
if (devinfo->gen >= 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
reg->file = BRW_GENERAL_REGISTER_FILE;
reg->nr += GEN7_MRF_HACK_START;
* The hardware encoding may depend on whether the value is an immediate.
*/
unsigned
-brw_reg_type_to_hw_type(const struct brw_device_info *devinfo,
+brw_reg_type_to_hw_type(const struct gen_device_info *devinfo,
enum brw_reg_type type, enum brw_reg_file file)
{
if (file == BRW_IMMEDIATE_VALUE) {
assert(type < ARRAY_SIZE(hw_types));
assert(hw_types[type] != -1);
assert(devinfo->gen >= 7 || type < BRW_REGISTER_TYPE_DF);
- assert(devinfo->gen >= 8 || type < BRW_REGISTER_TYPE_HF);
+ assert(devinfo->gen >= 8 || type < BRW_REGISTER_TYPE_Q);
return hw_types[type];
}
}
+/**
+ * Return the element size given a hardware register type and file.
+ *
+ * The hardware encoding may depend on whether the value is an immediate.
+ */
+unsigned
+brw_hw_reg_type_to_size(const struct gen_device_info *devinfo,
+ unsigned type, enum brw_reg_file file)
+{
+ if (file == BRW_IMMEDIATE_VALUE) {
+ static const unsigned imm_hw_sizes[] = {
+ [BRW_HW_REG_TYPE_UD] = 4,
+ [BRW_HW_REG_TYPE_D] = 4,
+ [BRW_HW_REG_TYPE_UW] = 2,
+ [BRW_HW_REG_TYPE_W] = 2,
+ [BRW_HW_REG_IMM_TYPE_UV] = 2,
+ [BRW_HW_REG_IMM_TYPE_VF] = 4,
+ [BRW_HW_REG_IMM_TYPE_V] = 2,
+ [BRW_HW_REG_TYPE_F] = 4,
+ [GEN8_HW_REG_TYPE_UQ] = 8,
+ [GEN8_HW_REG_TYPE_Q] = 8,
+ [GEN8_HW_REG_IMM_TYPE_DF] = 8,
+ [GEN8_HW_REG_IMM_TYPE_HF] = 2,
+ };
+ assert(type < ARRAY_SIZE(imm_hw_sizes));
+ assert(devinfo->gen >= 6 || type != BRW_HW_REG_IMM_TYPE_UV);
+ assert(devinfo->gen >= 8 || type <= BRW_HW_REG_TYPE_F);
+ return imm_hw_sizes[type];
+ } else {
+ /* Non-immediate registers */
+ static const unsigned hw_sizes[] = {
+ [BRW_HW_REG_TYPE_UD] = 4,
+ [BRW_HW_REG_TYPE_D] = 4,
+ [BRW_HW_REG_TYPE_UW] = 2,
+ [BRW_HW_REG_TYPE_W] = 2,
+ [BRW_HW_REG_NON_IMM_TYPE_UB] = 1,
+ [BRW_HW_REG_NON_IMM_TYPE_B] = 1,
+ [GEN7_HW_REG_NON_IMM_TYPE_DF] = 8,
+ [BRW_HW_REG_TYPE_F] = 4,
+ [GEN8_HW_REG_TYPE_UQ] = 8,
+ [GEN8_HW_REG_TYPE_Q] = 8,
+ [GEN8_HW_REG_NON_IMM_TYPE_HF] = 2,
+ };
+ assert(type < ARRAY_SIZE(hw_sizes));
+ assert(devinfo->gen >= 7 ||
+ (type < GEN7_HW_REG_NON_IMM_TYPE_DF || type == BRW_HW_REG_TYPE_F));
+ assert(devinfo->gen >= 8 || type <= BRW_HW_REG_TYPE_F);
+ return hw_sizes[type];
+ }
+}
+
void
brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
if (dest.file == BRW_MESSAGE_REGISTER_FILE)
assert((dest.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
brw_inst_set_exec_size(devinfo, inst, dest.width);
}
-extern int reg_type_size[];
-
static void
-validate_reg(const struct brw_device_info *devinfo,
+validate_reg(const struct gen_device_info *devinfo,
brw_inst *inst, struct brw_reg reg)
{
const int hstride_for_reg[] = {0, 1, 2, 4};
* destination horiz stride has to be a word.
*/
if (reg.type == BRW_REGISTER_TYPE_V) {
+ unsigned UNUSED elem_size = brw_element_size(devinfo, inst, dst);
assert(hstride_for_reg[brw_inst_dst_hstride(devinfo, inst)] *
- reg_type_size[brw_inst_dst_reg_type(devinfo, inst)] == 2);
+ elem_size == 2);
}
return;
void
brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
if (reg.file == BRW_MESSAGE_REGISTER_FILE)
assert((reg.nr & ~BRW_MRF_COMPR4) < BRW_MAX_MRF(devinfo->gen));
brw_inst_set_src0_address_mode(devinfo, inst, reg.address_mode);
if (reg.file == BRW_IMMEDIATE_VALUE) {
- if (reg.type == BRW_REGISTER_TYPE_DF)
+ if (reg.type == BRW_REGISTER_TYPE_DF ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_DIM)
brw_inst_set_imm_df(devinfo, inst, reg.df);
+ else if (reg.type == BRW_REGISTER_TYPE_UQ ||
+ reg.type == BRW_REGISTER_TYPE_Q)
+ brw_inst_set_imm_uq(devinfo, inst, reg.u64);
else
brw_inst_set_imm_ud(devinfo, inst, reg.ud);
* If we see a 0.0:F, change the type to VF so that it can be compacted.
*/
if (brw_inst_imm_ud(devinfo, inst) == 0x0 &&
- brw_inst_src0_reg_type(devinfo, inst) == BRW_HW_REG_TYPE_F) {
+ brw_inst_src0_reg_type(devinfo, inst) == BRW_HW_REG_TYPE_F &&
+ brw_inst_dst_reg_type(devinfo, inst) != GEN7_HW_REG_NON_IMM_TYPE_DF) {
brw_inst_set_src0_reg_type(devinfo, inst, BRW_HW_REG_IMM_TYPE_VF);
}
void
brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
if (reg.file != BRW_ARCHITECTURE_REGISTER_FILE)
assert(reg.nr < 128);
bool header_present,
bool end_of_thread)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_set_src1(p, inst, brw_imm_d(0));
bool low_precision,
unsigned dataType )
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
unsigned msg_length;
unsigned response_length;
unsigned response_length,
bool end_of_thread)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_set_message_descriptor(p, insn, BRW_SFID_URB,
1, response_length, true, end_of_thread);
unsigned offset,
unsigned swizzle_control )
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
assert(devinfo->gen < 7 || swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
assert(devinfo->gen < 7 || !(flags & BRW_URB_WRITE_ALLOCATE));
unsigned binding_table_index,
unsigned msg_control,
unsigned msg_type,
+ unsigned target_cache,
unsigned msg_length,
bool header_present,
unsigned last_render_target,
unsigned end_of_thread,
unsigned send_commit_msg)
{
- const struct brw_device_info *devinfo = p->devinfo;
- unsigned sfid;
-
- if (devinfo->gen >= 7) {
- /* Use the Render Cache for RT writes; otherwise use the Data Cache */
- if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
- sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
- else
- sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
- } else if (devinfo->gen == 6) {
- /* Use the render cache for all write messages. */
- sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
- } else {
- sfid = BRW_SFID_DATAPORT_WRITE;
- }
+ const struct gen_device_info *devinfo = p->devinfo;
+ const unsigned sfid = (devinfo->gen >= 6 ? target_cache :
+ BRW_SFID_DATAPORT_WRITE);
brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
header_present, end_of_thread);
bool header_present,
unsigned response_length)
{
- const struct brw_device_info *devinfo = p->devinfo;
- unsigned sfid;
-
- if (devinfo->gen >= 7) {
- sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
- } else if (devinfo->gen == 6) {
- if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
- sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
- else
- sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
- } else {
- sfid = BRW_SFID_DATAPORT_READ;
- }
+ const struct gen_device_info *devinfo = p->devinfo;
+ const unsigned sfid = (devinfo->gen >= 6 ? target_cache :
+ BRW_SFID_DATAPORT_READ);
brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
header_present, false);
unsigned simd_mode,
unsigned return_format)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_set_message_descriptor(p, inst, BRW_SFID_SAMPLER, msg_length,
response_length, header_present, false);
unsigned rlen,
bool header_present)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
assert(num_regs == 1 || num_regs == 2 || num_regs == 4 ||
(devinfo->gen >= 8 && num_regs == 8));
+ const unsigned block_size = (devinfo->gen >= 8 ? _mesa_logbase2(num_regs) :
+ num_regs - 1);
+
brw_set_message_descriptor(p, inst, GEN7_SFID_DATAPORT_DATA_CACHE,
mlen, rlen, header_present, false);
brw_inst_set_dp_category(devinfo, inst, 1); /* Scratch Block Read/Write msgs */
brw_inst_set_scratch_read_write(devinfo, inst, write);
brw_inst_set_scratch_type(devinfo, inst, dword);
brw_inst_set_scratch_invalidate_after_read(devinfo, inst, invalidate_after_read);
- brw_inst_set_scratch_block_size(devinfo, inst, ffs(num_regs) - 1);
+ brw_inst_set_scratch_block_size(devinfo, inst, block_size);
brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset);
}
brw_inst *
brw_next_insn(struct brw_codegen *p, unsigned opcode)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
if (p->nr_insn + 1 > p->store_size) {
brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
struct brw_reg src0, struct brw_reg src1, struct brw_reg src2)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *inst = next_insn(p, opcode);
gen7_convert_mrf_to_grf(p, &dest);
struct brw_reg dest, \
struct brw_reg src) \
{ \
- const struct brw_device_info *devinfo = p->devinfo; \
+ const struct gen_device_info *devinfo = p->devinfo; \
brw_inst *rnd, *add; \
rnd = next_insn(p, BRW_OPCODE_##OP); \
brw_set_dest(p, rnd, dest); \
ALU2(XOR)
ALU2(SHR)
ALU2(SHL)
+ALU1(DIM)
ALU2(ASR)
ALU1(FRC)
ALU1(RNDD)
brw_inst *
brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
const bool align16 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_16;
/* The F32TO16 instruction doesn't support 32-bit destination types in
* Align1 mode, and neither does the Gen8 implementation in terms of a
if (needs_zero_fill) {
brw_inst_set_no_dd_clear(devinfo, inst, true);
- inst = brw_MOV(p, suboffset(dst, 1), brw_imm_ud(0u));
+ inst = brw_MOV(p, suboffset(dst, 1), brw_imm_w(0));
brw_inst_set_no_dd_check(devinfo, inst, true);
}
brw_inst *
brw_F16TO32(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
bool align16 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_16;
if (align16) {
void brw_NOP(struct brw_codegen *p)
{
brw_inst *insn = next_insn(p, BRW_OPCODE_NOP);
- brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_1);
- brw_set_dest(p, insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
- brw_set_src0(p, insn, retype(brw_vec1_grf(0,0), BRW_REGISTER_TYPE_UD));
- brw_set_src1(p, insn, brw_imm_ud(0x0));
+ memset(insn, 0, sizeof(*insn));
+ brw_inst_set_opcode(p->devinfo, insn, BRW_OPCODE_NOP);
}
brw_JMPI(struct brw_codegen *p, struct brw_reg index,
unsigned predicate_control)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
struct brw_reg ip = brw_ip_reg();
brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
brw_inst *
brw_IF(struct brw_codegen *p, unsigned execute_size)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
insn = next_insn(p, BRW_OPCODE_IF);
gen6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
struct brw_reg src0, struct brw_reg src1)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
insn = next_insn(p, BRW_OPCODE_IF);
convert_IF_ELSE_to_ADD(struct brw_codegen *p,
brw_inst *if_inst, brw_inst *else_inst)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
/* The next instruction (where the ENDIF would be, if it existed) */
brw_inst *next_inst = &p->store[p->nr_insn];
patch_IF_ELSE(struct brw_codegen *p,
brw_inst *if_inst, brw_inst *else_inst, brw_inst *endif_inst)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
/* We shouldn't be patching IF and ELSE instructions in single program flow
* mode when gen < 6, because in single program flow mode on those
void
brw_ELSE(struct brw_codegen *p)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
insn = next_insn(p, BRW_OPCODE_ELSE);
void
brw_ENDIF(struct brw_codegen *p)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn = NULL;
brw_inst *else_inst = NULL;
brw_inst *if_inst = NULL;
brw_inst *
brw_BREAK(struct brw_codegen *p)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
insn = next_insn(p, BRW_OPCODE_BREAK);
brw_inst *
brw_CONT(struct brw_codegen *p)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
insn = next_insn(p, BRW_OPCODE_CONTINUE);
brw_inst *
gen6_HALT(struct brw_codegen *p)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
insn = next_insn(p, BRW_OPCODE_HALT);
brw_inst *
brw_DO(struct brw_codegen *p, unsigned execute_size)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
if (devinfo->gen >= 6 || p->single_program_flow) {
push_loop_stack(p, &p->store[p->nr_insn]);
static void
brw_patch_break_cont(struct brw_codegen *p, brw_inst *while_inst)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *do_inst = get_inner_do_insn(p);
brw_inst *inst;
unsigned br = brw_jump_scale(devinfo);
brw_inst *
brw_WHILE(struct brw_codegen *p)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn, *do_insn;
unsigned br = brw_jump_scale(devinfo);
*/
void brw_land_fwd_jump(struct brw_codegen *p, int jmp_insn_idx)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *jmp_insn = &p->store[jmp_insn_idx];
unsigned jmpi = 1;
struct brw_reg src0,
struct brw_reg src1)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn = next_insn(p, BRW_OPCODE_CMP);
brw_inst_set_cond_modifier(devinfo, insn, conditional);
struct brw_reg src,
unsigned precision )
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
unsigned data_type;
if (has_scalar_region(src)) {
struct brw_reg src0,
struct brw_reg src1)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn = next_insn(p, BRW_OPCODE_MATH);
assert(devinfo->gen >= 6);
assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
(devinfo->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
- assert(src0.file == BRW_GENERAL_REGISTER_FILE ||
- (devinfo->gen >= 8 && src0.file == BRW_IMMEDIATE_VALUE));
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
if (devinfo->gen == 6) {
} else {
assert(src0.type == BRW_REGISTER_TYPE_F);
assert(src1.type == BRW_REGISTER_TYPE_F);
- if (function == BRW_MATH_FUNCTION_POW) {
- assert(src1.file == BRW_GENERAL_REGISTER_FILE ||
- (devinfo->gen >= 8 && src1.file == BRW_IMMEDIATE_VALUE));
- } else {
- assert(src1.file == BRW_ARCHITECTURE_REGISTER_FILE &&
- src1.nr == BRW_ARF_NULL);
- }
}
/* Source modifiers are ignored for extended math instructions on Gen6. */
int num_regs,
unsigned offset)
{
- const struct brw_device_info *devinfo = p->devinfo;
- uint32_t msg_control, msg_type;
- int mlen;
+ const struct gen_device_info *devinfo = p->devinfo;
+ const unsigned target_cache =
+ (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
+ devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
+ uint32_t msg_type;
if (devinfo->gen >= 6)
offset /= 16;
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
- if (num_regs == 1) {
- msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
- mlen = 2;
- } else {
- msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
- mlen = 3;
- }
+ const unsigned mlen = 1 + num_regs;
/* Set up the message header. This is g0, with g0.2 filled with
* the offset. We don't want to leave our offset around in g0 or
brw_set_dp_write_message(p,
insn,
brw_scratch_surface_idx(p),
- msg_control,
+ BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
msg_type,
+ target_cache,
mlen,
true, /* header_present */
0, /* not a render target */
int num_regs,
unsigned offset)
{
- const struct brw_device_info *devinfo = p->devinfo;
- uint32_t msg_control;
- int rlen;
+ const struct gen_device_info *devinfo = p->devinfo;
if (devinfo->gen >= 6)
offset /= 16;
}
dest = retype(dest, BRW_REGISTER_TYPE_UW);
- if (num_regs == 1) {
- msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
- rlen = 1;
- } else {
- msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
- rlen = 2;
- }
+ const unsigned rlen = num_regs;
+ const unsigned target_cache =
+ (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
+ devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
{
brw_push_insn_state(p);
brw_set_dp_read_message(p,
insn,
brw_scratch_surface_idx(p),
- msg_control,
+ BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
- BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
+ target_cache,
1, /* msg_length */
true, /* header_present */
rlen);
int num_regs,
unsigned offset)
{
- const struct brw_device_info *devinfo = p->devinfo;
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
- assert(brw_inst_pred_control(devinfo, insn) == BRW_PREDICATE_NONE);
+ assert(brw_inst_pred_control(p->devinfo, insn) == BRW_PREDICATE_NONE);
brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UW));
}
/**
- * Read a float[4] vector from the data port Data Cache (const buffer).
+ * Read float[4] vectors from the data port constant cache.
* Location (in buffer) should be a multiple of 16.
* Used for fetching shader constants.
*/
uint32_t offset,
uint32_t bind_table_index)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
+ const unsigned target_cache =
+ (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE :
+ BRW_DATAPORT_READ_TARGET_DATA_CACHE);
+ const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
/* On newer hardware, offset is in units of owords. */
if (devinfo->gen >= 6)
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
- brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_push_insn_state(p);
+ brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
/* set message header global offset field (reg 0, element 2) */
mrf.nr,
2), BRW_REGISTER_TYPE_UD),
brw_imm_ud(offset));
+ brw_pop_insn_state(p);
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
}
- brw_set_dp_read_message(p,
- insn,
- bind_table_index,
- BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+ brw_set_dp_read_message(p, insn, bind_table_index,
+ BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
- BRW_DATAPORT_READ_TARGET_DATA_CACHE,
+ target_cache,
1, /* msg_length */
true, /* header_present */
- 1); /* response_length (1 reg, 2 owords!) */
+ DIV_ROUND_UP(exec_size, 8)); /* response_length */
brw_pop_insn_state(p);
}
bool last_render_target,
bool header_present)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
+ const unsigned target_cache =
+ (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
brw_inst *insn;
unsigned msg_type;
struct brw_reg dest, src0;
binding_table_index,
msg_control,
msg_type,
+ target_cache,
msg_length,
header_present,
last_render_target,
0 /* send_commit_msg */);
}
+brw_inst *
+gen9_fb_READ(struct brw_codegen *p,
+ struct brw_reg dst,
+ struct brw_reg payload,
+ unsigned binding_table_index,
+ unsigned msg_length,
+ unsigned response_length,
+ bool per_sample)
+{
+ const struct gen_device_info *devinfo = p->devinfo;
+ assert(devinfo->gen >= 9);
+ const unsigned msg_subtype =
+ brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16 ? 0 : 1;
+ brw_inst *insn = next_insn(p, BRW_OPCODE_SENDC);
+
+ brw_set_dest(p, insn, dst);
+ brw_set_src0(p, insn, payload);
+ brw_set_dp_read_message(p, insn, binding_table_index,
+ per_sample << 5 | msg_subtype,
+ GEN9_DATAPORT_RC_RENDER_TARGET_READ,
+ GEN6_SFID_DATAPORT_RENDER_CACHE,
+ msg_length, true /* header_present */,
+ response_length);
+ brw_inst_set_rt_slot_group(devinfo, insn,
+ brw_inst_qtr_control(devinfo, p->current) / 2);
+
+ return insn;
+}
/**
* Texture sample instruction.
unsigned simd_mode,
unsigned return_format)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
if (msg_reg_nr != -1)
* exclusively use the offset - we have to use both.
*/
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
if (sampler_index.file == BRW_IMMEDIATE_VALUE) {
const int sampler_state_size = 16; /* 16 bytes */
unsigned offset,
unsigned swizzle)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
struct brw_reg payload,
struct brw_reg desc)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
struct brw_inst *send;
int setup;
unsigned response_len,
bool header_present)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
struct brw_inst *insn;
if (surface.file != BRW_IMMEDIATE_VALUE) {
}
static bool
-while_jumps_before_offset(const struct brw_device_info *devinfo,
+while_jumps_before_offset(const struct gen_device_info *devinfo,
brw_inst *insn, int while_offset, int start_offset)
{
int scale = 16 / brw_jump_scale(devinfo);
int jip = devinfo->gen == 6 ? brw_inst_gen6_jump_count(devinfo, insn)
: brw_inst_jip(devinfo, insn);
+ assert(jip < 0);
return while_offset + jip * scale <= start_offset;
}
{
int offset;
void *store = p->store;
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
int depth = 0;
static int
brw_find_loop_end(struct brw_codegen *p, int start_offset)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
int offset;
void *store = p->store;
* BREAK, CONT, and HALT instructions to their correct locations.
*/
void
-brw_set_uip_jip(struct brw_codegen *p)
+brw_set_uip_jip(struct brw_codegen *p, int start_offset)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
int offset;
int br = brw_jump_scale(devinfo);
int scale = 16 / br;
if (devinfo->gen < 6)
return;
- for (offset = 0; offset < p->next_insn_offset;
- offset = next_offset(devinfo, store, offset)) {
+ for (offset = start_offset; offset < p->next_insn_offset; offset += 16) {
brw_inst *insn = store + offset;
-
- if (brw_inst_cmpt_control(devinfo, insn)) {
- /* Fixups for compacted BREAK/CONTINUE not supported yet. */
- assert(brw_inst_opcode(devinfo, insn) != BRW_OPCODE_BREAK &&
- brw_inst_opcode(devinfo, insn) != BRW_OPCODE_CONTINUE &&
- brw_inst_opcode(devinfo, insn) != BRW_OPCODE_HALT);
- continue;
- }
+ assert(brw_inst_cmpt_control(devinfo, insn) == 0);
int block_end_offset = brw_find_next_block_end(p, offset);
switch (brw_inst_opcode(devinfo, insn)) {
unsigned response_length,
bool eot)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_inst *insn;
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
unsigned binding_table_index,
bool send_commit_msg)
{
+ const struct gen_device_info *devinfo = p->devinfo;
+ const unsigned target_cache =
+ (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
+ devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
+ BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
brw_inst *insn;
gen6_resolve_implied_move(p, &src0, msg_reg_nr);
binding_table_index,
0, /* msg_control: ignored */
GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
+ target_cache,
1, /* msg_length */
true, /* header_present */
0, /* last_render_target: ignored */
unsigned atomic_op,
bool response_expected)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
unsigned msg_control =
atomic_op | /* Atomic Operation Type: BRW_AOP_* */
(response_expected ? 1 << 5 : 0); /* Return data expected */
if (devinfo->gen >= 8 || devinfo->is_haswell) {
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
- if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_8)
+ if (brw_inst_exec_size(devinfo, p->current) != BRW_EXECUTE_16)
msg_control |= 1 << 4; /* SIMD8 mode */
brw_inst_set_dp_msg_type(devinfo, insn,
brw_inst_set_dp_msg_type(devinfo, insn,
GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP);
- if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_8)
+ if (brw_inst_exec_size(devinfo, p->current) != BRW_EXECUTE_16)
msg_control |= 1 << 4; /* SIMD8 mode */
}
unsigned msg_length,
bool response_expected)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN7_SFID_DATAPORT_DATA_CACHE);
struct brw_inst *insn,
unsigned num_channels)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
/* Set mask of 32-bit channels to drop. */
unsigned msg_control = 0xf & (0xf << num_channels);
unsigned msg_length,
unsigned num_channels)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN7_SFID_DATAPORT_DATA_CACHE);
struct brw_inst *insn,
unsigned num_channels)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
/* Set mask of 32-bit channels to drop. */
unsigned msg_control = 0xf & (0xf << num_channels);
unsigned msg_length,
unsigned num_channels)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN7_SFID_DATAPORT_DATA_CACHE);
unsigned atomic_op,
bool response_expected)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
unsigned msg_control =
atomic_op | /* Atomic Operation Type: BRW_AOP_* */
(response_expected ? 1 << 5 : 0); /* Return data expected */
if (devinfo->gen >= 8 || devinfo->is_haswell) {
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
- if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
+ if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
brw_inst_set_dp_msg_type(devinfo, insn,
brw_inst_set_dp_msg_type(devinfo, insn,
GEN7_DATAPORT_RC_TYPED_ATOMIC_OP);
- if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
+ if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
}
unsigned atomic_op,
unsigned msg_length,
bool response_expected) {
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN6_SFID_DATAPORT_RENDER_CACHE);
struct brw_inst *insn,
unsigned num_channels)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
/* Set mask of unused channels. */
unsigned msg_control = 0xf & (0xf << num_channels);
if (devinfo->gen >= 8 || devinfo->is_haswell) {
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
- if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
+ if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */
else
msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */
HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ);
} else {
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
- if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
+ if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */
}
unsigned msg_length,
unsigned num_channels)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN6_SFID_DATAPORT_RENDER_CACHE);
struct brw_inst *insn,
unsigned num_channels)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
/* Set mask of unused channels. */
unsigned msg_control = 0xf & (0xf << num_channels);
if (devinfo->gen >= 8 || devinfo->is_haswell) {
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
- if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
+ if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */
else
msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */
} else {
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
- if (brw_inst_qtr_control(devinfo, p->current) == GEN6_COMPRESSION_2Q)
+ if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */
}
unsigned msg_length,
unsigned num_channels)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
HSW_SFID_DATAPORT_DATA_CACHE_1 :
GEN6_SFID_DATAPORT_RENDER_CACHE);
enum brw_message_target sfid,
bool commit_enable)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
brw_set_message_descriptor(p, insn, sfid,
1 /* message length */,
brw_memory_fence(struct brw_codegen *p,
struct brw_reg dst)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell;
struct brw_inst *insn;
+ brw_push_insn_state(p);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
+ dst = vec1(dst);
+
/* Set dst as destination for dependency tracking, the MEMORY_FENCE
* message doesn't write anything back.
*/
* cache messages will be properly ordered with respect to past data and
* render cache messages.
*/
- brw_push_insn_state(p);
- brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
- brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, dst, offset(dst, 1));
- brw_pop_insn_state(p);
}
+
+ brw_pop_insn_state(p);
}
void
unsigned msg_length,
unsigned response_length)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
struct brw_inst *insn;
const uint16_t exec_size = brw_inst_exec_size(devinfo, p->current);
}
void
-brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst)
+brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
+ struct brw_reg mask)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
+ const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
+ const unsigned qtr_control = brw_inst_qtr_control(devinfo, p->current);
brw_inst *inst;
assert(devinfo->gen >= 7);
+ assert(mask.type == BRW_REGISTER_TYPE_UD);
brw_push_insn_state(p);
if (devinfo->gen >= 8) {
/* Getting the first active channel index is easy on Gen8: Just find
- * the first bit set in the mask register. The same register exists
- * on HSW already but it reads back as all ones when the current
+ * the first bit set in the execution mask. The register exists on
+ * HSW already but it reads back as all ones when the current
* instruction has execution masking disabled, so it's kind of
* useless.
*/
- inst = brw_FBL(p, vec1(dst),
- retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD));
+ struct brw_reg exec_mask =
+ retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD);
+
+ if (mask.file != BRW_IMMEDIATE_VALUE || mask.ud != 0xffffffff) {
+ /* Unfortunately, ce0 does not take into account the thread
+ * dispatch mask, which may be a problem in cases where it's not
+ * tightly packed (i.e. it doesn't have the form '2^n - 1' for
+ * some n). Combine ce0 with the given dispatch (or vector) mask
+ * to mask off those channels which were never dispatched by the
+ * hardware.
+ */
+ brw_SHR(p, vec1(dst), mask, brw_imm_ud(qtr_control * 8));
+ brw_AND(p, vec1(dst), exec_mask, vec1(dst));
+ exec_mask = vec1(dst);
+ }
/* Quarter control has the effect of magically shifting the value of
- * this register. Make sure it's set to zero.
+ * ce0 so you'll get the first active channel relative to the
+ * specified quarter control as result.
*/
- brw_inst_set_qtr_control(devinfo, inst, GEN6_COMPRESSION_1Q);
+ inst = brw_FBL(p, vec1(dst), exec_mask);
} else {
- const struct brw_reg flag = retype(brw_flag_reg(1, 0),
- BRW_REGISTER_TYPE_UD);
+ const struct brw_reg flag = brw_flag_reg(1, 0);
- brw_MOV(p, flag, brw_imm_ud(0));
+ brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
- /* Run a 16-wide instruction returning zero with execution masking
- * and a conditional modifier enabled in order to get the current
- * execution mask in f1.0.
+ /* Run enough instructions returning zero with execution masking and
+ * a conditional modifier enabled in order to get the full execution
+ * mask in f1.0. We could use a single 32-wide move here if it
+ * weren't because of the hardware bug that causes channel enables to
+ * be applied incorrectly to the second half of 32-wide instructions
+ * on Gen7.
*/
- inst = brw_MOV(p, brw_null_reg(), brw_imm_ud(0));
- brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_16);
- brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
- brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z);
- brw_inst_set_flag_reg_nr(devinfo, inst, 1);
+ const unsigned lower_size = MIN2(16, exec_size);
+ for (unsigned i = 0; i < exec_size / lower_size; i++) {
+ inst = brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW),
+ brw_imm_uw(0));
+ brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
+ brw_inst_set_group(devinfo, inst, lower_size * i + 8 * qtr_control);
+ brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z);
+ brw_inst_set_flag_reg_nr(devinfo, inst, 1);
+ brw_inst_set_exec_size(devinfo, inst, cvt(lower_size) - 1);
+ }
- brw_FBL(p, vec1(dst), flag);
+ /* Find the first bit set in the exec_size-wide portion of the flag
+ * register that was updated by the last sequence of MOV
+ * instructions.
+ */
+ const enum brw_reg_type type = brw_int_type(exec_size / 8, false);
+ brw_FBL(p, vec1(dst), byte_offset(retype(flag, type), qtr_control));
}
} else {
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- if (devinfo->gen >= 8) {
+ if (devinfo->gen >= 8 &&
+ mask.file == BRW_IMMEDIATE_VALUE && mask.ud == 0xffffffff) {
/* In SIMD4x2 mode the first active channel index is just the
- * negation of the first bit of the mask register.
+ * negation of the first bit of the mask register. Note that ce0
+ * doesn't take into account the dispatch mask, so the Gen7 path
+ * should be used instead unless you have the guarantee that the
+ * dispatch mask is tightly packed (i.e. it has the form '2^n - 1'
+ * for some n).
*/
inst = brw_AND(p, brw_writemask(dst, WRITEMASK_X),
negate(retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)),
struct brw_reg src,
struct brw_reg idx)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1;
brw_inst *inst;
+ brw_push_insn_state(p);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, align1 ? BRW_EXECUTE_1 : BRW_EXECUTE_4);
+
assert(src.file == BRW_GENERAL_REGISTER_FILE &&
src.address_mode == BRW_ADDRESS_DIRECT);
*/
inst = brw_MOV(p,
brw_null_reg(),
- stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 0, 4, 1));
+ stride(brw_swizzle(idx, BRW_SWIZZLE_XXXX), 4, 4, 1));
brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NONE);
brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_NZ);
brw_inst_set_flag_reg_nr(devinfo, inst, 1);
/* and use predicated SEL to pick the right channel. */
inst = brw_SEL(p, dst,
- stride(suboffset(src, 4), 0, 4, 1),
- stride(src, 0, 4, 1));
+ stride(suboffset(src, 4), 4, 4, 1),
+ stride(src, 4, 4, 1));
brw_inst_set_pred_control(devinfo, inst, BRW_PREDICATE_NORMAL);
brw_inst_set_flag_reg_nr(devinfo, inst, 1);
}
}
+
+ brw_pop_insn_state(p);
}
/**
void
brw_barrier(struct brw_codegen *p, struct brw_reg src)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
struct brw_inst *inst;
assert(devinfo->gen >= 7);
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
inst = next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
brw_set_src0(p, inst, src);
BRW_MESSAGE_GATEWAY_SFID_BARRIER_MSG);
brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
+ brw_pop_insn_state(p);
}
void
brw_WAIT(struct brw_codegen *p)
{
- const struct brw_device_info *devinfo = p->devinfo;
+ const struct gen_device_info *devinfo = p->devinfo;
struct brw_inst *insn;
struct brw_reg src = brw_notification_reg();