projects
/
mesa.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
pan/mdg: Implement nir_intrinsic_load_sample_mask_in
[mesa.git]
/
src
/
panfrost
/
midgard
/
midgard_compile.c
diff --git
a/src/panfrost/midgard/midgard_compile.c
b/src/panfrost/midgard/midgard_compile.c
index d8d1852f5e212291d9876ad4d607978f363838bc..544a707453ff3895eef96d4adfade9f46ac5bbd8 100644
(file)
--- a/
src/panfrost/midgard/midgard_compile.c
+++ b/
src/panfrost/midgard/midgard_compile.c
@@
-461,6
+461,23
@@
midgard_nir_reorder_writeout(nir_shader *nir)
return progress;
}
return progress;
}
+static bool
+mdg_is_64(const nir_instr *instr, const void *_unused)
+{
+ const nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+ if (nir_dest_bit_size(alu->dest.dest) == 64)
+ return true;
+
+ switch (alu->op) {
+ case nir_op_umul_high:
+ case nir_op_imul_high:
+ return true;
+ default:
+ return false;
+ }
+}
+
/* Flushes undefined values to zero */
static void
/* Flushes undefined values to zero */
static void
@@
-543,6
+560,8
@@
optimise_nir(nir_shader *nir, unsigned quirks, bool is_blend)
NIR_PASS(progress, nir, nir_opt_vectorize);
} while (progress);
NIR_PASS(progress, nir, nir_opt_vectorize);
} while (progress);
+ NIR_PASS_V(nir, nir_lower_alu_to_scalar, mdg_is_64, NULL);
+
/* Run after opts so it can hit more */
if (!is_blend)
NIR_PASS(progress, nir, nir_fuse_io_16);
/* Run after opts so it can hit more */
if (!is_blend)
NIR_PASS(progress, nir, nir_fuse_io_16);
@@
-858,8
+877,8
@@
emit_alu(compiler_context *ctx, nir_alu_instr *instr)
/* Should we swap arguments? */
bool flip_src12 = false;
/* Should we swap arguments? */
bool flip_src12 = false;
- unsigned src_bitsize = nir_src_bit_size(instr->src[0].src);
- unsigned dst_bitsize = nir_dest_bit_size(*dest);
+
ASSERTED
unsigned src_bitsize = nir_src_bit_size(instr->src[0].src);
+
ASSERTED
unsigned dst_bitsize = nir_dest_bit_size(*dest);
enum midgard_roundmode roundmode = MIDGARD_RTE;
enum midgard_roundmode roundmode = MIDGARD_RTE;
@@
-881,6
+900,8
@@
emit_alu(compiler_context *ctx, nir_alu_instr *instr)
ALU_CASE(iadd, iadd);
ALU_CASE(isub, isub);
ALU_CASE(imul, imul);
ALU_CASE(iadd, iadd);
ALU_CASE(isub, isub);
ALU_CASE(imul, imul);
+ ALU_CASE(imul_high, imul);
+ ALU_CASE(umul_high, imul);
/* Zero shoved as second-arg */
ALU_CASE(iabs, iabsdiff);
/* Zero shoved as second-arg */
ALU_CASE(iabs, iabsdiff);
@@
-888,7
+909,7
@@
emit_alu(compiler_context *ctx, nir_alu_instr *instr)
ALU_CASE(mov, imov);
ALU_CASE_CMP(feq32, feq, false);
ALU_CASE(mov, imov);
ALU_CASE_CMP(feq32, feq, false);
- ALU_CASE_CMP(fne32, fne, false);
+ ALU_CASE_CMP(fne
u
32, fne, false);
ALU_CASE_CMP(flt32, flt, false);
ALU_CASE_CMP(ieq32, ieq, true);
ALU_CASE_CMP(ine32, ine, true);
ALU_CASE_CMP(flt32, flt, false);
ALU_CASE_CMP(ieq32, ieq, true);
ALU_CASE_CMP(ine32, ine, true);
@@
-1059,7
+1080,9
@@
emit_alu(compiler_context *ctx, nir_alu_instr *instr)
unsigned outmod = 0;
bool is_int = midgard_is_integer_op(op);
unsigned outmod = 0;
bool is_int = midgard_is_integer_op(op);
- if (midgard_is_integer_out_op(op)) {
+ if (instr->op == nir_op_umul_high || instr->op == nir_op_imul_high) {
+ outmod = midgard_outmod_int_high;
+ } else if (midgard_is_integer_out_op(op)) {
outmod = midgard_outmod_int_wrap;
} else if (instr->op == nir_op_fsat) {
outmod = midgard_outmod_sat;
outmod = midgard_outmod_int_wrap;
} else if (instr->op == nir_op_fsat) {
outmod = midgard_outmod_sat;
@@
-1334,6
+1357,15
@@
emit_global(
mir_set_offset(ctx, &ins, offset, is_shared);
mir_set_intr_mask(instr, &ins, is_read);
mir_set_offset(ctx, &ins, offset, is_shared);
mir_set_intr_mask(instr, &ins, is_read);
+ /* Set a valid swizzle for masked out components */
+ assert(ins.mask);
+ unsigned first_component = __builtin_ffs(ins.mask) - 1;
+
+ for (unsigned i = 0; i < ARRAY_SIZE(ins.swizzle[0]); ++i) {
+ if (!(ins.mask & (1 << i)))
+ ins.swizzle[0][i] = first_component;
+ }
+
emit_mir_instruction(ctx, ins);
}
emit_mir_instruction(ctx, ins);
}
@@
-1547,13
+1579,13
@@
emit_vertex_builtin(compiler_context *ctx, nir_intrinsic_instr *instr)
}
static void
}
static void
-emit_
msaa_builtin(compiler_context *ctx, nir_intrinsic_instr *instr
)
+emit_
special(compiler_context *ctx, nir_intrinsic_instr *instr, unsigned idx
)
{
unsigned reg = nir_dest_index(&instr->dest);
midgard_instruction ld = m_ld_color_buffer_32u(reg, 0);
ld.op = midgard_op_ld_color_buffer_32u_old;
{
unsigned reg = nir_dest_index(&instr->dest);
midgard_instruction ld = m_ld_color_buffer_32u(reg, 0);
ld.op = midgard_op_ld_color_buffer_32u_old;
- ld.load_store.address =
97
;
+ ld.load_store.address =
idx
;
ld.load_store.arg_2 = 0x1E;
for (int i = 0; i < 4; ++i)
ld.load_store.arg_2 = 0x1E;
for (int i = 0; i < 4; ++i)
@@
-1836,7
+1868,7
@@
emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
else if (combined)
rt = MIDGARD_ZS_RT;
else
else if (combined)
rt = MIDGARD_ZS_RT;
else
-
assert(0
);
+
unreachable("bad rt"
);
unsigned reg_z = ~0, reg_s = ~0;
if (combined) {
unsigned reg_z = ~0, reg_s = ~0;
if (combined) {
@@
-1947,8
+1979,12
@@
emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
emit_vertex_builtin(ctx, instr);
break;
emit_vertex_builtin(ctx, instr);
break;
+ case nir_intrinsic_load_sample_mask_in:
+ emit_special(ctx, instr, 96);
+ break;
+
case nir_intrinsic_load_sample_id:
case nir_intrinsic_load_sample_id:
- emit_
msaa_builtin(ctx, instr
);
+ emit_
special(ctx, instr, 97
);
break;
case nir_intrinsic_memory_barrier_buffer:
break;
case nir_intrinsic_memory_barrier_buffer:
@@
-2392,6
+2428,13
@@
max_bitsize_for_alu(midgard_instruction *ins)
break;
}
break;
}
+ /* High implies computing at a higher bitsize, e.g umul_high of 32-bit
+ * requires computing at 64-bit */
+ if (midgard_is_integer_out_op(ins->op) && ins->outmod == midgard_outmod_int_high) {
+ max_bitsize *= 2;
+ assert(max_bitsize <= 64);
+ }
+
return max_bitsize;
}
return max_bitsize;
}
@@
-2855,7
+2898,6
@@
midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b
/* Initialize at a global (not block) level hash tables */
ctx->ssa_constants = _mesa_hash_table_u64_create(NULL);
/* Initialize at a global (not block) level hash tables */
ctx->ssa_constants = _mesa_hash_table_u64_create(NULL);
- ctx->hash_to_temp = _mesa_hash_table_u64_create(NULL);
/* Lower gl_Position pre-optimisation, but after lowering vars to ssa
* (so we don't accidentally duplicate the epilogue since mesa/st has
/* Lower gl_Position pre-optimisation, but after lowering vars to ssa
* (so we don't accidentally duplicate the epilogue since mesa/st has
@@
-2898,7
+2940,7
@@
midgard_compile_shader_nir(nir_shader *nir, panfrost_program *program, bool is_b
/* Assign sysvals and counts, now that we're sure
* (post-optimisation) */
/* Assign sysvals and counts, now that we're sure
* (post-optimisation) */
- panfrost_nir_assign_sysvals(&ctx->sysvals, nir);
+ panfrost_nir_assign_sysvals(&ctx->sysvals,
ctx,
nir);
program->sysval_count = ctx->sysvals.sysval_count;
memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);
program->sysval_count = ctx->sysvals.sysval_count;
memcpy(program->sysvals, ctx->sysvals.sysvals, sizeof(ctx->sysvals.sysvals[0]) * ctx->sysvals.sysval_count);