+static void
+tu6_cache_flush(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
+{
+ unsigned seqno;
+
+ seqno = tu6_emit_event_write(cmd, cs, CACHE_FLUSH_AND_INV_EVENT, true);
+
+ tu_cs_emit_pkt7(cs, CP_WAIT_REG_MEM, 6);
+ tu_cs_emit(cs, 0x00000013);
+ tu_cs_emit_qw(cs, cmd->scratch_bo.iova);
+ tu_cs_emit(cs, seqno);
+ tu_cs_emit(cs, 0xffffffff);
+ tu_cs_emit(cs, 0x00000010);
+
+ seqno = tu6_emit_event_write(cmd, cs, CACHE_FLUSH_TS, true);
+
+ tu_cs_emit_pkt7(cs, CP_UNK_A6XX_14, 4);
+ tu_cs_emit(cs, 0x00000000);
+ tu_cs_emit_qw(cs, cmd->scratch_bo.iova);
+ tu_cs_emit(cs, seqno);
+}
+
+static void
+update_vsc_pipe(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
+{
+ const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_VSC_BIN_SIZE, 3);
+ tu_cs_emit(cs, A6XX_VSC_BIN_SIZE_WIDTH(tiling->tile0.extent.width) |
+ A6XX_VSC_BIN_SIZE_HEIGHT(tiling->tile0.extent.height));
+ tu_cs_emit_qw(cs, cmd->vsc_data.iova + 32 * cmd->vsc_data_pitch); /* VSC_SIZE_ADDRESS_LO/HI */
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_VSC_BIN_COUNT, 1);
+ tu_cs_emit(cs, A6XX_VSC_BIN_COUNT_NX(tiling->tile_count.width) |
+ A6XX_VSC_BIN_COUNT_NY(tiling->tile_count.height));
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32);
+ for (unsigned i = 0; i < 32; i++)
+ tu_cs_emit(cs, tiling->pipe_config[i]);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_DATA2_ADDRESS_LO, 4);
+ tu_cs_emit_qw(cs, cmd->vsc_data2.iova);
+ tu_cs_emit(cs, cmd->vsc_data2_pitch);
+ tu_cs_emit(cs, cmd->vsc_data2.size);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_VSC_PIPE_DATA_ADDRESS_LO, 4);
+ tu_cs_emit_qw(cs, cmd->vsc_data.iova);
+ tu_cs_emit(cs, cmd->vsc_data_pitch);
+ tu_cs_emit(cs, cmd->vsc_data.size);
+}
+
+static void
+emit_vsc_overflow_test(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
+{
+ const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
+ const uint32_t used_pipe_count =
+ tiling->pipe_count.width * tiling->pipe_count.height;
+
+ /* Clear vsc_scratch: */
+ tu_cs_emit_pkt7(cs, CP_MEM_WRITE, 3);
+ tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
+ tu_cs_emit(cs, 0x0);
+
+ /* Check for overflow, write vsc_scratch if detected: */
+ for (int i = 0; i < used_pipe_count; i++) {
+ tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 8);
+ tu_cs_emit(cs, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
+ CP_COND_WRITE5_0_WRITE_MEMORY);
+ tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_SIZE_REG(i)));
+ tu_cs_emit(cs, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
+ tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_data_pitch));
+ tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
+ tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
+ tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(1 + cmd->vsc_data_pitch));
+
+ tu_cs_emit_pkt7(cs, CP_COND_WRITE5, 8);
+ tu_cs_emit(cs, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) |
+ CP_COND_WRITE5_0_WRITE_MEMORY);
+ tu_cs_emit(cs, CP_COND_WRITE5_1_POLL_ADDR_LO(REG_A6XX_VSC_SIZE2_REG(i)));
+ tu_cs_emit(cs, CP_COND_WRITE5_2_POLL_ADDR_HI(0));
+ tu_cs_emit(cs, CP_COND_WRITE5_3_REF(cmd->vsc_data2_pitch));
+ tu_cs_emit(cs, CP_COND_WRITE5_4_MASK(~0));
+ tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
+ tu_cs_emit(cs, CP_COND_WRITE5_7_WRITE_DATA(3 + cmd->vsc_data2_pitch));
+ }
+
+ tu_cs_emit_pkt7(cs, CP_WAIT_MEM_WRITES, 0);
+
+ tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
+
+ tu_cs_emit_pkt7(cs, CP_MEM_TO_REG, 3);
+ tu_cs_emit(cs, CP_MEM_TO_REG_0_REG(OVERFLOW_FLAG_REG) |
+ CP_MEM_TO_REG_0_CNT(1 - 1));
+ tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_SCRATCH);
+
+ /*
+ * This is a bit awkward, we really want a way to invert the
+ * CP_REG_TEST/CP_COND_REG_EXEC logic, so that we can conditionally
+ * execute cmds to use hwbinning when a bit is *not* set. This
+ * dance is to invert OVERFLOW_FLAG_REG
+ *
+ * A CP_NOP packet is used to skip executing the 'else' clause
+ * if (b0 set)..
+ */
+
+ /* b0 will be set if VSC_DATA or VSC_DATA2 overflow: */
+ tu_cs_emit_pkt7(cs, CP_REG_TEST, 1);
+ tu_cs_emit(cs, A6XX_CP_REG_TEST_0_REG(OVERFLOW_FLAG_REG) |
+ A6XX_CP_REG_TEST_0_BIT(0) |
+ A6XX_CP_REG_TEST_0_UNK25);
+
+ tu_cs_emit_pkt7(cs, CP_COND_REG_EXEC, 2);
+ tu_cs_emit(cs, 0x10000000);
+ tu_cs_emit(cs, 7); /* conditionally execute next 7 dwords */
+
+ /* if (b0 set) */ {
+ /*
+ * On overflow, mirror the value to control->vsc_overflow
+ * which CPU is checking to detect overflow (see
+ * check_vsc_overflow())
+ */
+ tu_cs_emit_pkt7(cs, CP_REG_TO_MEM, 3);
+ tu_cs_emit(cs, CP_REG_TO_MEM_0_REG(OVERFLOW_FLAG_REG) |
+ CP_REG_TO_MEM_0_CNT(1 - 1));
+ tu_cs_emit_qw(cs, cmd->scratch_bo.iova + VSC_OVERFLOW);
+
+ tu_cs_emit_pkt4(cs, OVERFLOW_FLAG_REG, 1);
+ tu_cs_emit(cs, 0x0);
+
+ tu_cs_emit_pkt7(cs, CP_NOP, 2); /* skip 'else' when 'if' is taken */
+ } /* else */ {
+ tu_cs_emit_pkt4(cs, OVERFLOW_FLAG_REG, 1);
+ tu_cs_emit(cs, 0x1);
+ }
+}
+
+static void
+tu6_emit_binning_pass(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
+{
+ const struct tu_tiling_config *tiling = &cmd->state.tiling_config;
+
+ uint32_t x1 = tiling->tile0.offset.x;
+ uint32_t y1 = tiling->tile0.offset.y;
+ uint32_t x2 = tiling->render_area.offset.x + tiling->render_area.extent.width - 1;
+ uint32_t y2 = tiling->render_area.offset.x + tiling->render_area.extent.height - 1;
+
+ tu6_emit_window_scissor(cmd, cs, x1, y1, x2, y2);
+
+ tu6_emit_marker(cmd, cs);
+ tu_cs_emit_pkt7(cs, CP_SET_MARKER, 1);
+ tu_cs_emit(cs, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING));
+ tu6_emit_marker(cmd, cs);
+
+ tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
+ tu_cs_emit(cs, 0x1);
+
+ tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
+ tu_cs_emit(cs, 0x1);
+
+ tu_cs_emit_wfi(cs);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_VFD_MODE_CNTL, 1);
+ tu_cs_emit(cs, A6XX_VFD_MODE_CNTL_BINNING_PASS);
+
+ update_vsc_pipe(cmd, cs);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_PC_UNKNOWN_9805, 1);
+ tu_cs_emit(cs, 0x1);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_SP_UNKNOWN_A0F8, 1);
+ tu_cs_emit(cs, 0x1);
+
+ tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
+ tu_cs_emit(cs, UNK_2C);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_WINDOW_OFFSET, 1);
+ tu_cs_emit(cs, A6XX_RB_WINDOW_OFFSET_X(0) |
+ A6XX_RB_WINDOW_OFFSET_Y(0));
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_WINDOW_OFFSET, 1);
+ tu_cs_emit(cs, A6XX_SP_TP_WINDOW_OFFSET_X(0) |
+ A6XX_SP_TP_WINDOW_OFFSET_Y(0));
+
+ /* emit IB to binning drawcmds: */
+ tu_cs_emit_call(cs, &cmd->draw_cs);
+
+ tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3);
+ tu_cs_emit(cs, CP_SET_DRAW_STATE__0_COUNT(0) |
+ CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS |
+ CP_SET_DRAW_STATE__0_GROUP_ID(0));
+ tu_cs_emit(cs, CP_SET_DRAW_STATE__1_ADDR_LO(0));
+ tu_cs_emit(cs, CP_SET_DRAW_STATE__2_ADDR_HI(0));
+
+ tu_cs_emit_pkt7(cs, CP_EVENT_WRITE, 1);
+ tu_cs_emit(cs, UNK_2D);
+
+ tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE, false);
+ tu6_cache_flush(cmd, cs);
+
+ tu_cs_emit_wfi(cs);
+
+ tu_cs_emit_pkt7(cs, CP_WAIT_FOR_ME, 0);
+
+ emit_vsc_overflow_test(cmd, cs);
+
+ tu_cs_emit_pkt7(cs, CP_SET_VISIBILITY_OVERRIDE, 1);
+ tu_cs_emit(cs, 0x0);
+
+ tu_cs_emit_pkt7(cs, CP_SET_MODE, 1);
+ tu_cs_emit(cs, 0x0);
+
+ tu_cs_emit_wfi(cs);
+
+ tu_cs_emit_pkt4(cs, REG_A6XX_RB_CCU_CNTL, 1);
+ tu_cs_emit(cs, 0x7c400004);
+
+ cmd->wait_for_idle = false;
+}
+