#include "util/ralloc.h"
#include "util/hash_table.h"
#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_lowering.h"
#include "tgsi/tgsi_parse.h"
-#include "glsl/nir/nir.h"
-#include "glsl/nir/nir_builder.h"
+#include "compiler/nir/nir.h"
+#include "compiler/nir/nir_builder.h"
#include "nir/tgsi_to_nir.h"
#include "vc4_context.h"
#include "vc4_qpu.h"
intr->const_index[0] = (VC4_NIR_STATE_UNIFORM_OFFSET + contents) * 4;
intr->num_components = 1;
intr->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_ssa_dest_init(&intr->instr, &intr->dest, 1, NULL);
+ nir_ssa_dest_init(&intr->instr, &intr->dest, 1, 32, NULL);
nir_builder_instr_insert(b, &intr->instr);
return &intr->dest.ssa;
}
uint32_t tile_size = (tile_height * tile_width *
VC4_MAX_SAMPLES * sizeof(uint32_t));
- unsigned unit = instr->sampler_index;
+ unsigned unit = instr->texture_index;
uint32_t w = align(c->key->tex[unit].msaa_width, tile_width);
uint32_t w_tiles = w / tile_width;
uint32_t h = align(c->key->tex[unit].msaa_height, tile_height);
{
struct qreg s, t, r, lod, proj, compare;
bool is_txb = false, is_txl = false, has_proj = false;
- unsigned unit = instr->sampler_index;
+ unsigned unit = instr->texture_index;
if (instr->op == nir_texop_txf) {
ntq_emit_txf(c, instr);
struct qreg src0 = ntq_get_alu_src(c, compare_instr, 0);
struct qreg src1 = ntq_get_alu_src(c, compare_instr, 1);
- if (nir_op_infos[compare_instr->op].input_types[0] == nir_type_float)
+ unsigned unsized_type =
+ nir_alu_type_get_base_type(nir_op_infos[compare_instr->op].input_types[0]);
+ if (unsized_type == nir_type_float)
qir_SF(c, qir_FSUB(c, src0, src1));
else
qir_SF(c, qir_SUB(c, src0, src1));
color = qir_uniform_ui(c, 0);
}
- if (c->discard.file != QFILE_NULL)
- qir_TLB_DISCARD_SETUP(c, c->discard);
+ uint32_t discard_cond = QPU_COND_ALWAYS;
+ if (c->discard.file != QFILE_NULL) {
+ qir_SF(c, c->discard);
+ discard_cond = QPU_COND_ZS;
+ }
if (c->fs_key->stencil_enabled) {
qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 0));
} else {
z = qir_FRAG_Z(c);
}
- qir_TLB_Z_WRITE(c, z);
+ struct qinst *inst = qir_TLB_Z_WRITE(c, z);
+ inst->cond = discard_cond;
}
if (!c->msaa_per_sample_output) {
- qir_TLB_COLOR_WRITE(c, color);
+ struct qinst *inst = qir_TLB_COLOR_WRITE(c, color);
+ inst->cond = discard_cond;
} else {
- for (int i = 0; i < VC4_MAX_SAMPLES; i++)
- qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
+ for (int i = 0; i < VC4_MAX_SAMPLES; i++) {
+ struct qinst *inst = qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
+ inst->cond = discard_cond;
+ }
}
}
do {
progress = false;
- nir_lower_vars_to_ssa(s);
- nir_lower_alu_to_scalar(s);
+ NIR_PASS_V(s, nir_lower_vars_to_ssa);
+ NIR_PASS_V(s, nir_lower_alu_to_scalar);
- progress = nir_copy_prop(s) || progress;
- progress = nir_opt_dce(s) || progress;
- progress = nir_opt_cse(s) || progress;
- progress = nir_opt_peephole_select(s) || progress;
- progress = nir_opt_algebraic(s) || progress;
- progress = nir_opt_constant_folding(s) || progress;
- progress = nir_opt_undef(s) || progress;
+ NIR_PASS(progress, s, nir_copy_prop);
+ NIR_PASS(progress, s, nir_opt_dce);
+ NIR_PASS(progress, s, nir_opt_cse);
+ NIR_PASS(progress, s, nir_opt_peephole_select);
+ NIR_PASS(progress, s, nir_opt_algebraic);
+ NIR_PASS(progress, s, nir_opt_constant_folding);
+ NIR_PASS(progress, s, nir_opt_undef);
} while (progress);
}
{
struct qreg *qregs = ntq_init_ssa_def(c, &instr->def);
for (int i = 0; i < instr->def.num_components; i++)
- qregs[i] = qir_uniform_ui(c, instr->value.u[i]);
+ qregs[i] = qir_uniform_ui(c, instr->value.u32[i]);
_mesa_hash_table_insert(c->def_ht, &instr->def, qregs);
}
assert(instr->num_components == 1);
const_offset = nir_src_as_const_value(instr->src[0]);
if (const_offset) {
- offset = instr->const_index[0] + const_offset->u[0];
+ offset = instr->const_index[0] + const_offset->u32[0];
assert(offset % 4 == 0);
/* We need dwords */
offset = offset / 4;
break;
case nir_intrinsic_load_user_clip_plane:
- *dest = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
- instr->const_index[0]);
+ for (int i = 0; i < instr->num_components; i++) {
+ dest[i] = qir_uniform(c, QUNIFORM_USER_CLIP_PLANE,
+ instr->const_index[0] * 4 + i);
+ }
break;
case nir_intrinsic_load_sample_mask_in:
const_offset = nir_src_as_const_value(instr->src[0]);
assert(const_offset && "vc4 doesn't support indirect inputs");
if (instr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT) {
- assert(const_offset->u[0] == 0);
+ assert(const_offset->u32[0] == 0);
/* Reads of the per-sample color need to be done in
* order.
*/
}
*dest = c->color_reads[sample_index];
} else {
- offset = instr->const_index[0] + const_offset->u[0];
+ offset = instr->const_index[0] + const_offset->u32[0];
*dest = c->inputs[offset];
}
break;
case nir_intrinsic_store_output:
const_offset = nir_src_as_const_value(instr->src[1]);
assert(const_offset && "vc4 doesn't support indirect outputs");
- offset = instr->const_index[0] + const_offset->u[0];
+ offset = instr->const_index[0] + const_offset->u32[0];
/* MSAA color outputs are the only case where we have an
* output that's not lowered to being a store of a single 32
}
}
+static void ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list);
+
+static void
+ntq_emit_loop(struct vc4_compile *c, nir_loop *nloop)
+{
+ fprintf(stderr, "LOOPS not fully handled. Rendering errors likely.\n");
+ ntq_emit_cf_list(c, &nloop->body);
+}
+
+static void
+ntq_emit_function(struct vc4_compile *c, nir_function_impl *func)
+{
+ fprintf(stderr, "FUNCTIONS not handled.\n");
+ abort();
+}
+
static void
ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list)
{
foreach_list_typed(nir_cf_node, node, node, list) {
switch (node->type) {
- /* case nir_cf_node_loop: */
case nir_cf_node_block:
ntq_emit_block(c, nir_cf_node_as_block(node));
break;
ntq_emit_if(c, nir_cf_node_as_if(node));
break;
+ case nir_cf_node_loop:
+ ntq_emit_loop(c, nir_cf_node_as_loop(node));
+ break;
+
+ case nir_cf_node_function:
+ ntq_emit_function(c, nir_cf_node_as_function(node));
+ break;
+
default:
- assert(0);
+ fprintf(stderr, "Unknown NIR node type\n");
+ abort();
}
}
}
}
static const nir_shader_compiler_options nir_options = {
+ .lower_extract_byte = true,
+ .lower_extract_word = true,
.lower_ffma = true,
.lower_flrp = true,
.lower_fpow = true,
}
c->s = tgsi_to_nir(tokens, &nir_options);
- nir_opt_global_to_local(c->s);
- nir_convert_to_ssa(c->s);
+ NIR_PASS_V(c->s, nir_opt_global_to_local);
+ NIR_PASS_V(c->s, nir_convert_to_ssa);
if (stage == QSTAGE_FRAG)
- vc4_nir_lower_blend(c);
+ NIR_PASS_V(c->s, vc4_nir_lower_blend, c);
struct nir_lower_tex_options tex_options = {
/* We would need to implement txs, but we don't want the
}
}
- nir_lower_tex(c->s, &tex_options);
+ NIR_PASS_V(c->s, nir_lower_tex, &tex_options);
if (c->fs_key && c->fs_key->light_twoside)
- nir_lower_two_sided_color(c->s);
+ NIR_PASS_V(c->s, nir_lower_two_sided_color);
if (stage == QSTAGE_FRAG)
- nir_lower_clip_fs(c->s, c->key->ucp_enables);
+ NIR_PASS_V(c->s, nir_lower_clip_fs, c->key->ucp_enables);
else
- nir_lower_clip_vs(c->s, c->key->ucp_enables);
+ NIR_PASS_V(c->s, nir_lower_clip_vs, c->key->ucp_enables);
- vc4_nir_lower_io(c);
- vc4_nir_lower_txf_ms(c);
- nir_lower_idiv(c->s);
- nir_lower_load_const_to_scalar(c->s);
+ NIR_PASS_V(c->s, vc4_nir_lower_io, c);
+ NIR_PASS_V(c->s, vc4_nir_lower_txf_ms, c);
+ NIR_PASS_V(c->s, nir_lower_idiv);
+ NIR_PASS_V(c->s, nir_lower_load_const_to_scalar);
vc4_optimize_nir(c->s);
- nir_remove_dead_variables(c->s);
-
- nir_convert_from_ssa(c->s, true);
+ NIR_PASS_V(c->s, nir_remove_dead_variables);
+ NIR_PASS_V(c->s, nir_convert_from_ssa, true);
if (vc4_debug & VC4_DEBUG_SHADERDB) {
fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d NIR instructions\n",