SHADER_OPCODE_GEN4_SCRATCH_WRITE,
SHADER_OPCODE_GEN7_SCRATCH_READ,
+ SHADER_OPCODE_URB_WRITE_SIMD8,
+
VEC4_OPCODE_PACK_BYTES,
VEC4_OPCODE_UNPACK_UNIFORM,
#define BRW_URB_OPCODE_WRITE_HWORD 0
#define BRW_URB_OPCODE_WRITE_OWORD 1
+#define GEN8_URB_OPCODE_SIMD8_WRITE 7
#define BRW_URB_SWIZZLE_NONE 0
#define BRW_URB_SWIZZLE_INTERLEAVE 1
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_URB_WRITE_SIMD8:
return true;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return src[1].file == GRF;
return mlen;
} else if (opcode == FS_OPCODE_FB_WRITE && arg == 0) {
return mlen;
+ } else if (opcode == SHADER_OPCODE_URB_WRITE_SIMD8 && arg == 0) {
+ return mlen;
} else if (opcode == SHADER_OPCODE_UNTYPED_ATOMIC && arg == 0) {
return mlen;
} else if (opcode == SHADER_OPCODE_UNTYPED_SURFACE_READ && arg == 0) {
return 2;
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_URB_WRITE_SIMD8:
case FS_OPCODE_INTERPOLATE_AT_CENTROID:
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
struct brw_reg implied_header,
GLuint nr);
void generate_fb_write(fs_inst *inst, struct brw_reg payload);
+ void generate_urb_write(fs_inst *inst, struct brw_reg payload);
void generate_blorp_fb_write(fs_inst *inst);
void generate_pixel_xy(struct brw_reg dst, bool is_x);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
}
}
+void
+fs_generator::generate_urb_write(fs_inst *inst, struct brw_reg payload)
+{
+ brw_inst *insn;
+
+ insn = brw_next_insn(p, BRW_OPCODE_SEND);
+
+ brw_set_dest(p, insn, brw_null_reg());
+ brw_set_src0(p, insn, payload);
+ brw_set_src1(p, insn, brw_imm_d(0));
+
+ brw_inst_set_sfid(brw, insn, BRW_SFID_URB);
+ brw_inst_set_urb_opcode(brw, insn, GEN8_URB_OPCODE_SIMD8_WRITE);
+
+ brw_inst_set_mlen(brw, insn, inst->mlen);
+ brw_inst_set_rlen(brw, insn, 0);
+ brw_inst_set_eot(brw, insn, inst->eot);
+ brw_inst_set_header_present(brw, insn, true);
+ brw_inst_set_urb_global_offset(brw, insn, inst->offset);
+}
+
void
fs_generator::generate_blorp_fb_write(fs_inst *inst)
{
generate_scratch_read_gen7(inst, dst);
break;
+ case SHADER_OPCODE_URB_WRITE_SIMD8:
+ generate_urb_write(inst, src[0]);
+ break;
+
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
generate_uniform_pull_constant_load(inst, dst, src[0], src[1]);
break;
/* Special case instructions which have extra implied registers used. */
switch (inst->opcode) {
+ case SHADER_OPCODE_URB_WRITE_SIMD8:
case FS_OPCODE_FB_WRITE:
/* We could omit this for the !inst->header_present case, except that
* the simulator apparently incorrectly reads from g0/g1 instead of
}
}
+static bool
+is_last_send(fs_inst *inst)
+{
+ switch (inst->opcode) {
+ case SHADER_OPCODE_URB_WRITE_SIMD8:
+ case FS_OPCODE_FB_WRITE:
+ return inst->eot;
+ default:
+ assert(!inst->eot);
+ return false;
+ }
+}
+
bool
fs_visitor::assign_regs(bool allow_spilling)
{
* We could just do "something high". Instead, we just pick the
* highest register that works.
*/
- if (inst->opcode == FS_OPCODE_FB_WRITE && inst->eot) {
+ if (is_last_send(inst)) {
int size = virtual_grf_sizes[inst->src[0].reg];
int reg = screen->wm_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
ra_set_node_reg(g, inst->src[0].reg, reg);
return "gen4_scratch_write";
case SHADER_OPCODE_GEN7_SCRATCH_READ:
return "gen7_scratch_read";
+ case SHADER_OPCODE_URB_WRITE_SIMD8:
+ return "gen8_urb_write_simd8";
case VEC4_OPCODE_PACK_BYTES:
return "pack_bytes";
{
switch (opcode) {
case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_URB_WRITE_SIMD8:
case FS_OPCODE_FB_WRITE:
return true;
default: