+
+/**
+ * Emit the SEND instruction necessary to generate stream output data on Gen6
+ * (for transform feedback).
+ *
+ * If send_commit_msg is true, this is the last piece of stream output data
+ * from this thread, so send the data as a committed write. According to the
+ * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
+ *
+ * "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
+ * writes are complete by sending the final write as a committed write."
+ */
+void
+brw_svb_write(struct brw_compile *p,
+ struct brw_reg dest,
+ unsigned msg_reg_nr,
+ struct brw_reg src0,
+ unsigned binding_table_index,
+ bool send_commit_msg)
+{
+ struct brw_instruction *insn;
+
+ gen6_resolve_implied_move(p, &src0, msg_reg_nr);
+
+ insn = next_insn(p, BRW_OPCODE_SEND);
+ brw_set_dest(p, insn, dest);
+ brw_set_src0(p, insn, src0);
+ brw_set_src1(p, insn, brw_imm_d(0));
+ brw_set_dp_write_message(p, insn,
+ binding_table_index,
+ 0, /* msg_control: ignored */
+ GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
+ 1, /* msg_length */
+ true, /* header_present */
+ 0, /* last_render_target: ignored */
+ send_commit_msg, /* response_length */
+ 0, /* end_of_thread */
+ send_commit_msg); /* send_commit_msg */
+}
+
+static void
+brw_set_dp_untyped_atomic_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ unsigned atomic_op,
+ unsigned bind_table_index,
+ unsigned msg_length,
+ unsigned response_length,
+ bool header_present)
+{
+ if (p->brw->is_haswell) {
+ brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
+ msg_length, response_length,
+ header_present, false);
+
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (insn->header.execution_size != BRW_EXECUTE_16)
+ insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
+
+ insn->bits3.gen7_dp.msg_type =
+ HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
+ } else {
+ insn->bits3.gen7_dp.msg_type =
+ HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2;
+ }
+
+ } else {
+ brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
+ msg_length, response_length,
+ header_present, false);
+
+ insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
+
+ if (insn->header.execution_size != BRW_EXECUTE_16)
+ insn->bits3.ud |= 1 << 12; /* SIMD8 mode */
+ }
+
+ if (response_length)
+ insn->bits3.ud |= 1 << 13; /* Return data expected */
+
+ insn->bits3.gen7_dp.binding_table_index = bind_table_index;
+ insn->bits3.ud |= atomic_op << 8;
+}
+
+void
+brw_untyped_atomic(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ unsigned atomic_op,
+ unsigned bind_table_index,
+ unsigned msg_length,
+ unsigned response_length) {
+ struct brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
+
+ brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
+ brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
+ brw_set_src1(p, insn, brw_imm_d(0));
+ brw_set_dp_untyped_atomic_message(
+ p, insn, atomic_op, bind_table_index, msg_length, response_length,
+ insn->header.access_mode == BRW_ALIGN_1);
+}
+
+static void
+brw_set_dp_untyped_surface_read_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ unsigned bind_table_index,
+ unsigned msg_length,
+ unsigned response_length,
+ bool header_present)
+{
+ const unsigned dispatch_width =
+ (insn->header.execution_size == BRW_EXECUTE_16 ? 16 : 8);
+ const unsigned num_channels = response_length / (dispatch_width / 8);
+
+ if (p->brw->is_haswell) {
+ brw_set_message_descriptor(p, insn, HSW_SFID_DATAPORT_DATA_CACHE_1,
+ msg_length, response_length,
+ header_present, false);
+
+ insn->bits3.gen7_dp.msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ;
+ } else {
+ brw_set_message_descriptor(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
+ msg_length, response_length,
+ header_present, false);
+
+ insn->bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ;
+ }
+
+ if (insn->header.access_mode == BRW_ALIGN_1) {
+ if (dispatch_width == 16)
+ insn->bits3.ud |= 1 << 12; /* SIMD16 mode */
+ else
+ insn->bits3.ud |= 2 << 12; /* SIMD8 mode */
+ }
+
+ insn->bits3.gen7_dp.binding_table_index = bind_table_index;
+
+ /* Set mask of 32-bit channels to drop. */
+ insn->bits3.ud |= (0xf & (0xf << num_channels)) << 8;
+}
+
+void
+brw_untyped_surface_read(struct brw_compile *p,
+ struct brw_reg dest,
+ struct brw_reg mrf,
+ unsigned bind_table_index,
+ unsigned msg_length,
+ unsigned response_length)
+{
+ struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+ brw_set_dest(p, insn, retype(dest, BRW_REGISTER_TYPE_UD));
+ brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
+ brw_set_dp_untyped_surface_read_message(
+ p, insn, bind_table_index, msg_length, response_length,
+ insn->header.access_mode == BRW_ALIGN_1);
+}
+
+/**
+ * This instruction is generated as a single-channel align1 instruction by
+ * both the VS and FS stages when using INTEL_DEBUG=shader_time.
+ *
+ * We can't use the typed atomic op in the FS because that has the execution
+ * mask ANDed with the pixel mask, but we just want to write the one dword for
+ * all the pixels.
+ *
+ * We don't use the SIMD4x2 atomic ops in the VS because want to just write
+ * one u32. So we use the same untyped atomic write message as the pixel
+ * shader.
+ *
+ * The untyped atomic operation requires a BUFFER surface type with RAW
+ * format, and is only accessible through the legacy DATA_CACHE dataport
+ * messages.
+ */
+void brw_shader_time_add(struct brw_compile *p,
+ struct brw_reg payload,
+ uint32_t surf_index)
+{
+ struct brw_context *brw = p->brw;
+ assert(brw->gen >= 7);
+
+ brw_push_insn_state(p);
+ brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_set_mask_control(p, BRW_MASK_DISABLE);
+ struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_pop_insn_state(p);
+
+ /* We use brw_vec1_reg and unmasked because we want to increment the given
+ * offset only once.
+ */
+ brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+ BRW_ARF_NULL, 0));
+ brw_set_src0(p, send, brw_vec1_reg(payload.file,
+ payload.nr, 0));
+ brw_set_dp_untyped_atomic_message(p, send, BRW_AOP_ADD, surf_index,
+ 2 /* message length */,
+ 0 /* response length */,
+ false /* header present */);
+}