+/* MI_MATH only exists on Haswell+ */
+#if GEN_IS_HASWELL || GEN_GEN >= 8
+
+static uint32_t
+mi_alu(uint32_t opcode, uint32_t op1, uint32_t op2)
+{
+ struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {
+ .ALUOpcode = opcode,
+ .Operand1 = op1,
+ .Operand2 = op2,
+ };
+
+ uint32_t dw;
+ GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);
+
+ return dw;
+}
+
+#define CS_GPR(n) (0x2600 + (n) * 8)
+
+/* Emit dwords to multiply GPR0 by N */
+static void
+build_alu_multiply_gpr0(uint32_t *dw, unsigned *dw_count, uint32_t N)
+{
+ VK_OUTARRAY_MAKE(out, dw, dw_count);
+
+#define append_alu(opcode, operand1, operand2) \
+ vk_outarray_append(&out, alu_dw) *alu_dw = mi_alu(opcode, operand1, operand2)
+
+ assert(N > 0);
+ unsigned top_bit = 31 - __builtin_clz(N);
+ for (int i = top_bit - 1; i >= 0; i--) {
+ /* We get our initial data in GPR0 and we write the final data out to
+ * GPR0 but we use GPR1 as our scratch register.
+ */
+ unsigned src_reg = i == top_bit - 1 ? MI_ALU_REG0 : MI_ALU_REG1;
+ unsigned dst_reg = i == 0 ? MI_ALU_REG0 : MI_ALU_REG1;
+
+ /* Shift the current value left by 1 */
+ append_alu(MI_ALU_LOAD, MI_ALU_SRCA, src_reg);
+ append_alu(MI_ALU_LOAD, MI_ALU_SRCB, src_reg);
+ append_alu(MI_ALU_ADD, 0, 0);
+
+ if (N & (1 << i)) {
+ /* Store ACCU to R1 and add R0 to R1 */
+ append_alu(MI_ALU_STORE, MI_ALU_REG1, MI_ALU_ACCU);
+ append_alu(MI_ALU_LOAD, MI_ALU_SRCA, MI_ALU_REG0);
+ append_alu(MI_ALU_LOAD, MI_ALU_SRCB, MI_ALU_REG1);
+ append_alu(MI_ALU_ADD, 0, 0);
+ }
+
+ append_alu(MI_ALU_STORE, dst_reg, MI_ALU_ACCU);
+ }
+
+#undef append_alu
+}
+
+static void
+emit_mul_gpr0(struct anv_batch *batch, uint32_t N)
+{
+ uint32_t num_dwords;
+ build_alu_multiply_gpr0(NULL, &num_dwords, N);
+
+ uint32_t *dw = anv_batch_emitn(batch, 1 + num_dwords, GENX(MI_MATH));
+ build_alu_multiply_gpr0(dw + 1, &num_dwords, N);
+}
+
+#endif /* GEN_IS_HASWELL || GEN_GEN >= 8 */
+
+static void
+load_indirect_parameters(struct anv_cmd_buffer *cmd_buffer,
+ struct anv_buffer *buffer, uint64_t offset,
+ bool indexed)
+{
+ struct anv_batch *batch = &cmd_buffer->batch;
+ struct anv_bo *bo = buffer->bo;
+ uint32_t bo_offset = buffer->offset + offset;
+
+ emit_lrm(batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
+
+ unsigned view_count = anv_subpass_view_count(cmd_buffer->state.subpass);
+ if (view_count > 1) {
+#if GEN_IS_HASWELL || GEN_GEN >= 8
+ emit_lrm(batch, CS_GPR(0), bo, bo_offset + 4);
+ emit_mul_gpr0(batch, view_count);
+ emit_lrr(batch, GEN7_3DPRIM_INSTANCE_COUNT, CS_GPR(0));
+#else
+ anv_finishme("Multiview + indirect draw requires MI_MATH\n"
+ "MI_MATH is not supported on Ivy Bridge");
+ emit_lrm(batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
+#endif
+ } else {
+ emit_lrm(batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
+ }
+
+ emit_lrm(batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
+
+ if (indexed) {
+ emit_lrm(batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
+ emit_lrm(batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
+ } else {
+ emit_lrm(batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
+ emit_lri(batch, GEN7_3DPRIM_BASE_VERTEX, 0);
+ }
+}
+