gen_mi_value_unref(b, dst);
}
+static inline void
+gen_mi_memset(struct gen_mi_builder *b, __gen_address_type dst,
+ uint32_t value, uint32_t size)
+{
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+ assert(b->num_math_dwords == 0);
+#endif
+
+ /* This memset operates in units of dwords. */
+ assert(size % 4 == 0);
+
+ for (uint32_t i = 0; i < size; i += 4) {
+ gen_mi_store(b, gen_mi_mem32(__gen_address_offset(dst, i)),
+ gen_mi_imm(value));
+ }
+}
+
+/* NOTE: On IVB, this function stomps GEN7_3DPRIM_BASE_VERTEX */
+static inline void
+gen_mi_memcpy(struct gen_mi_builder *b, __gen_address_type dst,
+ __gen_address_type src, uint32_t size)
+{
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+ assert(b->num_math_dwords == 0);
+#endif
+
+ /* This memcpy operates in units of dwords. */
+ assert(size % 4 == 0);
+
+ for (uint32_t i = 0; i < size; i += 4) {
+ struct gen_mi_value dst_val = gen_mi_mem32(__gen_address_offset(dst, i));
+ struct gen_mi_value src_val = gen_mi_mem32(__gen_address_offset(src, i));
+#if GEN_GEN >= 8 || GEN_IS_HASWELL
+ gen_mi_store(b, dst_val, src_val);
+#else
+ /* IVB does not have a general purpose register for command streamer
+ * commands. Therefore, we use an alternate temporary register.
+ */
+ struct gen_mi_value tmp_reg = gen_mi_reg32(0x2440); /* GEN7_3DPRIM_BASE_VERTEX */
+ gen_mi_store(b, tmp_reg, src_val);
+ gen_mi_store(b, dst_val, tmp_reg);
+#endif
+ }
+}
+
/*
* MI_MATH Section. Only available on Haswell+
*/
EXPECT_EQ(*(uint64_t *)(output + 24), (uint64_t)(uint32_t)value);
}
+TEST_F(gen_mi_builder_test, memset)
+{
+ const unsigned memset_size = 256;
+
+ gen_mi_memset(&b, out_addr(0), 0xdeadbeef, memset_size);
+
+ submit_batch();
+
+ uint32_t *out_u32 = (uint32_t *)output;
+ for (unsigned i = 0; i < memset_size / sizeof(*out_u32); i++)
+ EXPECT_EQ(out_u32[i], 0xdeadbeef);
+}
+
+TEST_F(gen_mi_builder_test, memcpy)
+{
+ const unsigned memcpy_size = 256;
+
+ uint8_t *in_u8 = (uint8_t *)input;
+ for (unsigned i = 0; i < memcpy_size; i++)
+ in_u8[i] = i;
+
+ gen_mi_memcpy(&b, out_addr(0), in_addr(0), 256);
+
+ submit_batch();
+
+ uint8_t *out_u8 = (uint8_t *)output;
+ for (unsigned i = 0; i < memcpy_size; i++)
+ EXPECT_EQ(out_u8[i], i);
+}
+
/* Start of MI_MATH section */
#if GEN_GEN >= 8 || GEN_IS_HASWELL
struct anv_address dst, struct anv_address src,
uint32_t size);
-void genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
- struct anv_address dst, struct anv_address src,
- uint32_t size);
-
-void genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer,
- struct anv_address dst, uint32_t value,
- uint32_t size);
-
void genX(blorp_exec)(struct blorp_batch *batch,
const struct blorp_params *params);
anv_image_get_clear_color_addr(cmd_buffer->device, image, aspect);
unsigned copy_size = cmd_buffer->device->isl_dev.ss.clear_value_size;
+#if GEN_GEN == 7
+ /* On gen7, the combination of commands used here(MI_LOAD_REGISTER_MEM
+ * and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is
+ * in-flight when they are issued even if the memory touched is not
+ * currently active for rendering. The weird bit is that it is not the
+ * MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight
+ * rendering hangs such that the next stalling command after the
+ * MI_LOAD/STORE_REGISTER_MEM commands will catch the hang.
+ *
+ * It is unclear exactly why this hang occurs. Both MI commands come with
+ * warnings about the 3D pipeline but that doesn't seem to fully explain
+ * it. My (Jason's) best theory is that it has something to do with the
+ * fact that we're using a GPU state register as our temporary and that
+ * something with reading/writing it is causing problems.
+ *
+ * In order to work around this issue, we emit a PIPE_CONTROL with the
+ * command streamer stall bit set.
+ */
+ cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
+ genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
+#endif
+
+ struct gen_mi_builder b;
+ gen_mi_builder_init(&b, &cmd_buffer->batch);
+
if (copy_from_surface_state) {
- genX(cmd_buffer_mi_memcpy)(cmd_buffer, entry_addr,
- ss_clear_addr, copy_size);
+ gen_mi_memcpy(&b, entry_addr, ss_clear_addr, copy_size);
} else {
- genX(cmd_buffer_mi_memcpy)(cmd_buffer, ss_clear_addr,
- entry_addr, copy_size);
+ gen_mi_memcpy(&b, ss_clear_addr, entry_addr, copy_size);
/* Updating a surface state object may require that the state cache be
* invalidated. From the SKL PRM, Shared Functions -> State -> State
return 1 << MIN2(a_log2, b_log2);
}
-void
-genX(cmd_buffer_mi_memcpy)(struct anv_cmd_buffer *cmd_buffer,
- struct anv_address dst, struct anv_address src,
- uint32_t size)
-{
- /* This memcpy operates in units of dwords. */
- assert(size % 4 == 0);
- assert(dst.offset % 4 == 0);
- assert(src.offset % 4 == 0);
-
-#if GEN_GEN == 7
- /* On gen7, the combination of commands used here(MI_LOAD_REGISTER_MEM
- * and MI_STORE_REGISTER_MEM) can cause GPU hangs if any rendering is
- * in-flight when they are issued even if the memory touched is not
- * currently active for rendering. The weird bit is that it is not the
- * MI_LOAD/STORE_REGISTER_MEM commands which hang but rather the in-flight
- * rendering hangs such that the next stalling command after the
- * MI_LOAD/STORE_REGISTER_MEM commands will catch the hang.
- *
- * It is unclear exactly why this hang occurs. Both MI commands come with
- * warnings about the 3D pipeline but that doesn't seem to fully explain
- * it. My (Jason's) best theory is that it has something to do with the
- * fact that we're using a GPU state register as our temporary and that
- * something with reading/writing it is causing problems.
- *
- * In order to work around this issue, we emit a PIPE_CONTROL with the
- * command streamer stall bit set.
- */
- cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
- genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
-#endif
-
- for (uint32_t i = 0; i < size; i += 4) {
-#if GEN_GEN >= 8
- anv_batch_emit(&cmd_buffer->batch, GENX(MI_COPY_MEM_MEM), cp) {
- cp.DestinationMemoryAddress = anv_address_add(dst, i);
- cp.SourceMemoryAddress = anv_address_add(src, i);
- }
-#else
- /* IVB does not have a general purpose register for command streamer
- * commands. Therefore, we use an alternate temporary register.
- */
-#define TEMP_REG 0x2440 /* GEN7_3DPRIM_BASE_VERTEX */
- anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_MEM), load) {
- load.RegisterAddress = TEMP_REG;
- load.MemoryAddress = anv_address_add(src, i);
- }
- anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_REGISTER_MEM), store) {
- store.RegisterAddress = TEMP_REG;
- store.MemoryAddress = anv_address_add(dst, i);
- }
-#undef TEMP_REG
-#endif
- }
- return;
-}
-
-void
-genX(cmd_buffer_mi_memset)(struct anv_cmd_buffer *cmd_buffer,
- struct anv_address dst, uint32_t value,
- uint32_t size)
-{
- /* This memset operates in units of dwords. */
- assert(size % 4 == 0);
- assert(dst.offset % 4 == 0);
-
- for (uint32_t i = 0; i < size; i += 4) {
- anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
- sdi.Address = anv_address_add(dst, i);
- sdi.ImmediateData = value;
- }
- }
-}
-
void
genX(cmd_buffer_so_memcpy)(struct anv_cmd_buffer *cmd_buffer,
struct anv_address dst, struct anv_address src,
*/
static void
emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
- struct anv_query_pool *pool,
+ struct gen_mi_builder *b, struct anv_query_pool *pool,
uint32_t first_index, uint32_t num_queries)
{
for (uint32_t i = 0; i < num_queries; i++) {
struct anv_address slot_addr =
anv_query_address(pool, first_index + i);
- genX(cmd_buffer_mi_memset)(cmd_buffer, anv_address_add(slot_addr, 8),
- 0, pool->stride - 8);
+ gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
emit_query_availability(cmd_buffer, slot_addr);
}
}
const uint32_t num_queries =
util_bitcount(cmd_buffer->state.subpass->view_mask);
if (num_queries > 1)
- emit_zero_queries(cmd_buffer, pool, query + 1, num_queries - 1);
+ emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1);
}
}
const uint32_t num_queries =
util_bitcount(cmd_buffer->state.subpass->view_mask);
if (num_queries > 1)
- emit_zero_queries(cmd_buffer, pool, query + 1, num_queries - 1);
+ emit_zero_queries(cmd_buffer, &b, pool, query + 1, num_queries - 1);
}
}