#include "util/ralloc.h"
#include "util/bitscan.h"
#include "compiler/nir/nir.h"
+#include "pipe/p_state.h"
+
#include "ppir.h"
}
static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
- nir_reg_dest *reg, unsigned mask)
+ nir_register *reg, unsigned mask)
{
- ppir_node *node = ppir_node_create(block, op, reg->reg->index, mask);
+ ppir_node *node = ppir_node_create(block, op, reg->index, mask);
if (!node)
return NULL;
ppir_dest *dest = ppir_node_get_dest(node);
list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
- if (r->index == reg->reg->index) {
+ if (r->index == reg->index) {
dest->reg = r;
break;
}
if (dest->is_ssa)
return ppir_node_create_ssa(block, op, &dest->ssa);
else
- return ppir_node_create_reg(block, op, &dest->reg, mask);
+ return ppir_node_create_reg(block, op, dest->reg.reg, mask);
}
return ppir_node_create(block, op, index, 0);
if (ns->is_ssa) {
child = comp->var_nodes[ns->ssa->index];
+ /* Clone consts for each successor */
+ switch (child->op) {
+ case ppir_op_const:
+ child = ppir_node_clone(node->block, child);
+ break;
+ /* Clone uniforms and load textures for each block */
+ case ppir_op_load_texture:
+ case ppir_op_load_uniform:
+ case ppir_op_load_varying:
+ if (child->block != node->block) {
+ child = ppir_node_clone(node->block, child);
+ comp->var_nodes[ns->ssa->index] = child;
+ }
+ break;
+ default:
+ break;
+ }
+
ppir_node_add_dep(node, child);
}
else {
while (mask) {
int swizzle = ps->swizzle[u_bit_scan(&mask)];
child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
- ppir_node_add_dep(node, child);
+ /* Reg is read before it was written, create a dummy node for it */
+ if (!child) {
+ child = ppir_node_create_reg(node->block, ppir_op_dummy, reg,
+ u_bit_consecutive(0, 4));
+ comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
+ }
+ /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
+ if (child && node != child && child->op != ppir_op_dummy)
+ ppir_node_add_dep(node, child);
}
}
- ppir_dest *dest = ppir_node_get_dest(child);
- ppir_node_target_assign(ps, dest);
+ ppir_node_target_assign(ps, child);
}
static int nir_to_ppir_opcodes[nir_num_opcodes] = {
/* not supported */
[0 ... nir_last_opcode] = -1,
- [nir_op_fmov] = ppir_op_mov,
- [nir_op_imov] = ppir_op_mov,
+ [nir_op_mov] = ppir_op_mov,
[nir_op_fmul] = ppir_op_mul,
+ [nir_op_fabs] = ppir_op_abs,
+ [nir_op_fneg] = ppir_op_neg,
[nir_op_fadd] = ppir_op_add,
- [nir_op_fdot2] = ppir_op_dot2,
- [nir_op_fdot3] = ppir_op_dot3,
- [nir_op_fdot4] = ppir_op_dot4,
+ [nir_op_fsum3] = ppir_op_sum3,
+ [nir_op_fsum4] = ppir_op_sum4,
[nir_op_frsq] = ppir_op_rsqrt,
[nir_op_flog2] = ppir_op_log2,
[nir_op_fexp2] = ppir_op_exp2,
[nir_op_fmin] = ppir_op_min,
[nir_op_frcp] = ppir_op_rcp,
[nir_op_ffloor] = ppir_op_floor,
+ [nir_op_fceil] = ppir_op_ceil,
[nir_op_ffract] = ppir_op_fract,
- [nir_op_fand] = ppir_op_and,
- [nir_op_for] = ppir_op_or,
- [nir_op_fxor] = ppir_op_xor,
[nir_op_sge] = ppir_op_ge,
[nir_op_fge] = ppir_op_ge,
[nir_op_slt] = ppir_op_lt,
[nir_op_feq] = ppir_op_eq,
[nir_op_sne] = ppir_op_ne,
[nir_op_fne] = ppir_op_ne,
- [nir_op_fnot] = ppir_op_not,
[nir_op_fcsel] = ppir_op_select,
[nir_op_inot] = ppir_op_not,
+ [nir_op_ftrunc] = ppir_op_trunc,
+ [nir_op_fsat] = ppir_op_sat,
+ [nir_op_fddx] = ppir_op_ddx,
+ [nir_op_fddy] = ppir_op_ddy,
};
static ppir_node *ppir_emit_alu(ppir_block *block, nir_instr *ni)
unsigned src_mask;
switch (op) {
- case ppir_op_dot2:
- src_mask = 0b0011;
- break;
- case ppir_op_dot3:
+ case ppir_op_sum3:
src_mask = 0b0111;
break;
- case ppir_op_dot4:
+ case ppir_op_sum4:
src_mask = 0b1111;
break;
default:
return &node->node;
}
+static ppir_block *ppir_block_create(ppir_compiler *comp);
+
+static bool ppir_emit_discard_block(ppir_compiler *comp)
+{
+ ppir_block *block = ppir_block_create(comp);
+ ppir_discard_node *discard;
+ if (!block)
+ return false;
+
+ comp->discard_block = block;
+ block->comp = comp;
+
+ discard = ppir_node_create(block, ppir_op_discard, -1, 0);
+ if (discard)
+ list_addtail(&discard->node.list, &block->node_list);
+ else
+ return false;
+
+ return true;
+}
+
+static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
+{
+ nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
+ ppir_node *node;
+ ppir_compiler *comp = block->comp;
+ ppir_branch_node *branch;
+
+ if (!comp->discard_block && !ppir_emit_discard_block(comp))
+ return NULL;
+
+ node = ppir_node_create(block, ppir_op_branch, -1, 0);
+ if (!node)
+ return NULL;
+ branch = ppir_node_to_branch(node);
+
+ /* second src and condition will be updated during lowering */
+ ppir_node_add_src(block->comp, node, &branch->src[0],
+ &instr->src[0], u_bit_consecutive(0, instr->num_components));
+ branch->num_src = 1;
+ branch->target = comp->discard_block;
+
+ return node;
+}
+
+static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
+{
+ ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
+
+ return node;
+}
+
static ppir_node *ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
{
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
return &lnode->node;
+ case nir_intrinsic_load_frag_coord:
+ case nir_intrinsic_load_point_coord:
+ case nir_intrinsic_load_front_face:
+ if (!instr->dest.is_ssa)
+ mask = u_bit_consecutive(0, instr->num_components);
+
+ ppir_op op;
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_frag_coord:
+ op = ppir_op_load_fragcoord;
+ break;
+ case nir_intrinsic_load_point_coord:
+ op = ppir_op_load_pointcoord;
+ break;
+ case nir_intrinsic_load_front_face:
+ op = ppir_op_load_frontface;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
+ if (!lnode)
+ return NULL;
+
+ lnode->num_components = instr->num_components;
+ return &lnode->node;
+
case nir_intrinsic_load_uniform:
if (!instr->dest.is_ssa)
mask = u_bit_consecutive(0, instr->num_components);
return &snode->node;
+ case nir_intrinsic_discard:
+ return ppir_emit_discard(block, ni);
+
+ case nir_intrinsic_discard_if:
+ return ppir_emit_discard_if(block, ni);
+
default:
- ppir_error("unsupported nir_intrinsic_instr %d\n", instr->intrinsic);
+ ppir_error("unsupported nir_intrinsic_instr %s\n",
+ nir_intrinsic_infos[instr->intrinsic].name);
return NULL;
}
}
return NULL;
}
- node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, 0);
+ unsigned mask = 0;
+ if (!instr->dest.is_ssa)
+ mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
+
+ node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
if (!node)
return NULL;
case GLSL_SAMPLER_DIM_EXTERNAL:
break;
default:
- ppir_debug("unsupported sampler dim: %d\n", instr->sampler_dim);
+ ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
return NULL;
}
for (int i = 0; i < instr->coord_components; i++)
node->src_coords.swizzle[i] = i;
- assert(instr->num_srcs == 1);
for (int i = 0; i < instr->num_srcs; i++) {
switch (instr->src[i].src_type) {
case nir_tex_src_coord:
u_bit_consecutive(0, instr->coord_components));
break;
default:
- ppir_debug("unknown texture source");
+ ppir_error("unsupported texture source type\n");
+ assert(0);
return NULL;
}
}
nir_foreach_instr(instr, nblock) {
assert(instr->type < nir_instr_type_phi);
ppir_node *node = ppir_emit_instr[instr->type](block, instr);
- if (node)
- list_addtail(&node->list, &block->node_list);
+ if (!node)
+ return false;
+
+ list_addtail(&node->list, &block->node_list);
}
return true;
return comp;
}
+static void ppir_add_ordering_deps(ppir_compiler *comp)
+{
+ /* Some intrinsics do not have explicit dependencies and thus depend
+ * on instructions order. Consider discard_if and store_ouput as
+ * example. If we don't add fake dependency of discard_if to store_output
+ * scheduler may put store_output first and since store_output terminates
+ * shader on Utgard PP, rest of it will never be executed.
+ * Add fake dependencies for discard/branch/store to preserve
+ * instruction order.
+ *
+ * TODO: scheduler should schedule discard_if as early as possible otherwise
+ * we may end up with suboptimal code for cases like this:
+ *
+ * s3 = s1 < s2
+ * discard_if s3
+ * s4 = s1 + s2
+ * store s4
+ *
+ * In this case store depends on discard_if and s4, but since dependencies can
+ * be scheduled in any order it can result in code like this:
+ *
+ * instr1: s3 = s1 < s3
+ * instr2: s4 = s1 + s2
+ * instr3: discard_if s3
+ * instr4: store s4
+ */
+ list_for_each_entry(ppir_block, block, &comp->block_list, list) {
+ ppir_node *prev_node = NULL;
+ list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
+ if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
+ ppir_node_add_dep(prev_node, node);
+ }
+ if (node->op == ppir_op_discard ||
+ node->op == ppir_op_store_color ||
+ node->op == ppir_op_store_temp ||
+ node->op == ppir_op_branch) {
+ prev_node = node;
+ }
+ }
+ }
+}
+
+static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
+ struct pipe_debug_callback *debug)
+{
+ const struct shader_info *info = &nir->info;
+ char *shaderdb;
+ int ret = asprintf(&shaderdb,
+ "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
+ gl_shader_stage_name(info->stage),
+ comp->cur_instr_index,
+ comp->num_loops,
+ comp->num_spills,
+ comp->num_fills);
+ assert(ret >= 0);
+
+ if (lima_debug & LIMA_DEBUG_SHADERDB)
+ fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
+
+ pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
+ free(shaderdb);
+}
+
+static void ppir_add_write_after_read_deps(ppir_compiler *comp)
+{
+ list_for_each_entry(ppir_block, block, &comp->block_list, list) {
+ list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
+ ppir_node *write = NULL;
+ list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
+ for (int i = 0; i < ppir_node_get_src_num(node); i++) {
+ ppir_src *src = ppir_node_get_src(node, i);
+ if (src && src->type == ppir_target_register &&
+ src->reg == reg &&
+ write)
+ ppir_node_add_dep(write, node);
+ }
+ ppir_dest *dest = ppir_node_get_dest(node);
+ if (dest && dest->type == ppir_target_register &&
+ dest->reg == reg)
+ write = node;
+ }
+ }
+ }
+}
+
bool ppir_compile_nir(struct lima_fs_shader_state *prog, struct nir_shader *nir,
- struct ra_regs *ra)
+ struct ra_regs *ra,
+ struct pipe_debug_callback *debug)
{
nir_function_impl *func = nir_shader_get_entrypoint(nir);
ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
if (!ppir_emit_cf_list(comp, &func->body))
goto err_out0;
+
+ /* If we have discard block add it to the very end */
+ if (comp->discard_block)
+ list_addtail(&comp->discard_block->list, &comp->block_list);
+
ppir_node_print_prog(comp);
if (!ppir_lower_prog(comp))
goto err_out0;
+ ppir_add_ordering_deps(comp);
+ ppir_add_write_after_read_deps(comp);
+
+ ppir_node_print_prog(comp);
+
if (!ppir_node_to_instr(comp))
goto err_out0;
if (!ppir_codegen_prog(comp))
goto err_out0;
+ ppir_print_shader_db(nir, comp, debug);
+
ralloc_free(comp);
return true;