-/* for conditionally setting boolean flag(s): */
-#define COND(bool, val) ((bool) ? (val) : 0)
-
-#define DBG(fmt, ...) \
- do { debug_printf("%s:%d: "fmt "\n", \
- __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
-
-struct ir3_context {
- struct ir3_compiler *compiler;
-
- struct nir_shader *s;
-
- struct nir_instr *cur_instr; /* current instruction, just for debug */
-
- struct ir3 *ir;
- struct ir3_shader_variant *so;
-
- struct ir3_block *block; /* the current block */
- struct ir3_block *in_block; /* block created for shader inputs */
-
- nir_function_impl *impl;
-
- /* For fragment shaders, varyings are not actual shader inputs,
- * instead the hw passes a varying-coord which is used with
- * bary.f.
- *
- * But NIR doesn't know that, it still declares varyings as
- * inputs. So we do all the input tracking normally and fix
- * things up after compile_instructions()
- *
- * NOTE that frag_vcoord is the hardware position (possibly it
- * is actually an index or tag or some such.. it is *not*
- * values that can be directly used for gl_FragCoord..)
- */
- struct ir3_instruction *frag_vcoord;
-
- /* for fragment shaders, for gl_FrontFacing and gl_FragCoord: */
- struct ir3_instruction *frag_face, *frag_coord;
-
- /* For vertex shaders, keep track of the system values sources */
- struct ir3_instruction *vertex_id, *basevertex, *instance_id;
-
- /* For fragment shaders: */
- struct ir3_instruction *samp_id, *samp_mask_in;
-
- /* Compute shader inputs: */
- struct ir3_instruction *local_invocation_id, *work_group_id;
-
- /* mapping from nir_register to defining instruction: */
- struct hash_table *def_ht;
-
- unsigned num_arrays;
-
- /* a common pattern for indirect addressing is to request the
- * same address register multiple times. To avoid generating
- * duplicate instruction sequences (which our backend does not
- * try to clean up, since that should be done as the NIR stage)
- * we cache the address value generated for a given src value:
- *
- * Note that we have to cache these per alignment, since same
- * src used for an array of vec1 cannot be also used for an
- * array of vec4.
- */
- struct hash_table *addr_ht[4];
-
- /* last dst array, for indirect we need to insert a var-store.
- */
- struct ir3_instruction **last_dst;
- unsigned last_dst_n;
-
- /* maps nir_block to ir3_block, mostly for the purposes of
- * figuring out the blocks successors
- */
- struct hash_table *block_ht;
-
- /* on a4xx, bitmask of samplers which need astc+srgb workaround: */
- unsigned astc_srgb;
-
- unsigned samples; /* bitmask of x,y sample shifts */
-
- unsigned max_texture_index;
-
- /* set if we encounter something we can't handle yet, so we
- * can bail cleanly and fallback to TGSI compiler f/e
- */
- bool error;
-};
-
-/* gpu pointer size in units of 32bit registers/slots */
-static unsigned pointer_size(struct ir3_context *ctx)
-{
- return (ctx->compiler->gpu_id >= 500) ? 2 : 1;
-}
-
-static struct ir3_instruction * create_immed(struct ir3_block *block, uint32_t val);
-static struct ir3_block * get_block(struct ir3_context *ctx, const nir_block *nblock);
-
-
-static struct ir3_context *
-compile_init(struct ir3_compiler *compiler,
- struct ir3_shader_variant *so)
-{
- struct ir3_context *ctx = rzalloc(NULL, struct ir3_context);
-
- if (compiler->gpu_id >= 400) {
- if (so->type == MESA_SHADER_VERTEX) {
- ctx->astc_srgb = so->key.vastc_srgb;
- } else if (so->type == MESA_SHADER_FRAGMENT) {
- ctx->astc_srgb = so->key.fastc_srgb;
- }
-
- } else {
- if (so->type == MESA_SHADER_VERTEX) {
- ctx->samples = so->key.vsamples;
- } else if (so->type == MESA_SHADER_FRAGMENT) {
- ctx->samples = so->key.fsamples;
- }
- }
-
- ctx->compiler = compiler;
- ctx->so = so;
- ctx->def_ht = _mesa_hash_table_create(ctx,
- _mesa_hash_pointer, _mesa_key_pointer_equal);
- ctx->block_ht = _mesa_hash_table_create(ctx,
- _mesa_hash_pointer, _mesa_key_pointer_equal);
-
- /* TODO: maybe generate some sort of bitmask of what key
- * lowers vs what shader has (ie. no need to lower
- * texture clamp lowering if no texture sample instrs)..
- * although should be done further up the stack to avoid
- * creating duplicate variants..
- */
-
- if (ir3_key_lowers_nir(&so->key)) {
- nir_shader *s = nir_shader_clone(ctx, so->shader->nir);
- ctx->s = ir3_optimize_nir(so->shader, s, &so->key);
- } else {
- /* fast-path for shader key that lowers nothing in NIR: */
- ctx->s = so->shader->nir;
- }
-
- /* this needs to be the last pass run, so do this here instead of
- * in ir3_optimize_nir():
- */
- NIR_PASS_V(ctx->s, nir_lower_locals_to_regs);
- NIR_PASS_V(ctx->s, nir_convert_from_ssa, true);
-
- if (ir3_shader_debug & IR3_DBG_DISASM) {
- printf("dump nir%dv%d: type=%d, k={cts=%u,hp=%u}",
- so->shader->id, so->id, so->type,
- so->key.color_two_side, so->key.half_precision);
- nir_print_shader(ctx->s, stdout);
- }
-
- if (shader_debug_enabled(so->type)) {
- fprintf(stderr, "NIR (final form) for %s shader:\n",
- _mesa_shader_stage_to_string(so->type));
- nir_print_shader(ctx->s, stderr);
- }
-
- ir3_nir_scan_driver_consts(ctx->s, &so->const_layout);
-
- so->num_uniforms = ctx->s->num_uniforms;
- so->num_ubos = ctx->s->info.num_ubos;
-
- /* Layout of constant registers, each section aligned to vec4. Note
- * that pointer size (ubo, etc) changes depending on generation.
- *
- * user consts
- * UBO addresses
- * SSBO sizes
- * if (vertex shader) {
- * driver params (IR3_DP_*)
- * if (stream_output.num_outputs > 0)
- * stream-out addresses
- * }
- * immediates
- *
- * Immediates go last mostly because they are inserted in the CP pass
- * after the nir -> ir3 frontend.
- */
- unsigned constoff = align(ctx->s->num_uniforms, 4);
- unsigned ptrsz = pointer_size(ctx);
-
- memset(&so->constbase, ~0, sizeof(so->constbase));
-
- if (so->num_ubos > 0) {
- so->constbase.ubo = constoff;
- constoff += align(ctx->s->info.num_ubos * ptrsz, 4) / 4;
- }
-
- if (so->const_layout.ssbo_size.count > 0) {
- unsigned cnt = so->const_layout.ssbo_size.count;
- so->constbase.ssbo_sizes = constoff;
- constoff += align(cnt, 4) / 4;
- }
-
- if (so->const_layout.image_dims.count > 0) {
- unsigned cnt = so->const_layout.image_dims.count;
- so->constbase.image_dims = constoff;
- constoff += align(cnt, 4) / 4;
- }
-
- unsigned num_driver_params = 0;
- if (so->type == MESA_SHADER_VERTEX) {
- num_driver_params = IR3_DP_VS_COUNT;
- } else if (so->type == MESA_SHADER_COMPUTE) {
- num_driver_params = IR3_DP_CS_COUNT;
- }
-
- so->constbase.driver_param = constoff;
- constoff += align(num_driver_params, 4) / 4;
-
- if ((so->type == MESA_SHADER_VERTEX) &&
- (compiler->gpu_id < 500) &&
- so->shader->stream_output.num_outputs > 0) {
- so->constbase.tfbo = constoff;
- constoff += align(IR3_MAX_SO_BUFFERS * ptrsz, 4) / 4;
- }
-
- so->constbase.immediate = constoff;
-
- return ctx;
-}
-
-static void
-compile_error(struct ir3_context *ctx, const char *format, ...)
-{
- struct hash_table *errors = NULL;
- va_list ap;
- va_start(ap, format);
- if (ctx->cur_instr) {
- errors = _mesa_hash_table_create(NULL,
- _mesa_hash_pointer,
- _mesa_key_pointer_equal);
- char *msg = ralloc_vasprintf(errors, format, ap);
- _mesa_hash_table_insert(errors, ctx->cur_instr, msg);
- } else {
- _debug_vprintf(format, ap);
- }
- va_end(ap);
- nir_print_shader_annotated(ctx->s, stdout, errors);
- ralloc_free(errors);
- ctx->error = true;
- debug_assert(0);
-}
-
-#define compile_assert(ctx, cond) do { \
- if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
- } while (0)
-
-static void
-compile_free(struct ir3_context *ctx)
-{
- ralloc_free(ctx);
-}
-
-static void
-declare_array(struct ir3_context *ctx, nir_register *reg)
-{
- struct ir3_array *arr = rzalloc(ctx, struct ir3_array);
- arr->id = ++ctx->num_arrays;
- /* NOTE: sometimes we get non array regs, for example for arrays of
- * length 1. See fs-const-array-of-struct-of-array.shader_test. So
- * treat a non-array as if it was an array of length 1.
- *
- * It would be nice if there was a nir pass to convert arrays of
- * length 1 to ssa.
- */
- arr->length = reg->num_components * MAX2(1, reg->num_array_elems);
- compile_assert(ctx, arr->length > 0);
- arr->r = reg;
- list_addtail(&arr->node, &ctx->ir->array_list);
-}
-
-static struct ir3_array *
-get_array(struct ir3_context *ctx, nir_register *reg)
-{
- list_for_each_entry (struct ir3_array, arr, &ctx->ir->array_list, node) {
- if (arr->r == reg)
- return arr;
- }
- compile_error(ctx, "bogus reg: %s\n", reg->name);
- return NULL;
-}
-
-/* relative (indirect) if address!=NULL */
-static struct ir3_instruction *
-create_array_load(struct ir3_context *ctx, struct ir3_array *arr, int n,
- struct ir3_instruction *address)
-{
- struct ir3_block *block = ctx->block;
- struct ir3_instruction *mov;
- struct ir3_register *src;
-
- mov = ir3_instr_create(block, OPC_MOV);
- mov->cat1.src_type = TYPE_U32;
- mov->cat1.dst_type = TYPE_U32;
- mov->barrier_class = IR3_BARRIER_ARRAY_R;
- mov->barrier_conflict = IR3_BARRIER_ARRAY_W;
- ir3_reg_create(mov, 0, 0);
- src = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
- COND(address, IR3_REG_RELATIV));
- src->instr = arr->last_write;
- src->size = arr->length;
- src->array.id = arr->id;
- src->array.offset = n;
-
- if (address)
- ir3_instr_set_address(mov, address);
-
- return mov;
-}
-
-/* relative (indirect) if address!=NULL */
-static void
-create_array_store(struct ir3_context *ctx, struct ir3_array *arr, int n,
- struct ir3_instruction *src, struct ir3_instruction *address)
-{
- struct ir3_block *block = ctx->block;
- struct ir3_instruction *mov;
- struct ir3_register *dst;
-
- /* if not relative store, don't create an extra mov, since that
- * ends up being difficult for cp to remove.
- */
- if (!address) {
- dst = src->regs[0];
-
- src->barrier_class |= IR3_BARRIER_ARRAY_W;
- src->barrier_conflict |= IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
-
- dst->flags |= IR3_REG_ARRAY;
- dst->instr = arr->last_write;
- dst->size = arr->length;
- dst->array.id = arr->id;
- dst->array.offset = n;
-
- arr->last_write = src;
-
- array_insert(block, block->keeps, src);
-
- return;
- }
-
- mov = ir3_instr_create(block, OPC_MOV);
- mov->cat1.src_type = TYPE_U32;
- mov->cat1.dst_type = TYPE_U32;
- mov->barrier_class = IR3_BARRIER_ARRAY_W;
- mov->barrier_conflict = IR3_BARRIER_ARRAY_R | IR3_BARRIER_ARRAY_W;
- dst = ir3_reg_create(mov, 0, IR3_REG_ARRAY |
- COND(address, IR3_REG_RELATIV));
- dst->instr = arr->last_write;
- dst->size = arr->length;
- dst->array.id = arr->id;
- dst->array.offset = n;
- ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = src;
-
- if (address)
- ir3_instr_set_address(mov, address);
-
- arr->last_write = mov;
-
- /* the array store may only matter to something in an earlier
- * block (ie. loops), but since arrays are not in SSA, depth
- * pass won't know this.. so keep all array stores:
- */
- array_insert(block, block->keeps, mov);
-}
-
-static inline type_t utype_for_size(unsigned bit_size)
-{
- switch (bit_size) {
- case 32: return TYPE_U32;
- case 16: return TYPE_U16;
- case 8: return TYPE_U8;
- default: unreachable("bad bitsize"); return ~0;
- }
-}
-
-static inline type_t utype_src(nir_src src)
-{ return utype_for_size(nir_src_bit_size(src)); }
-
-static inline type_t utype_dst(nir_dest dst)
-{ return utype_for_size(nir_dest_bit_size(dst)); }
-
-/* allocate a n element value array (to be populated by caller) and
- * insert in def_ht
- */
-static struct ir3_instruction **
-get_dst_ssa(struct ir3_context *ctx, nir_ssa_def *dst, unsigned n)
-{
- struct ir3_instruction **value =
- ralloc_array(ctx->def_ht, struct ir3_instruction *, n);
- _mesa_hash_table_insert(ctx->def_ht, dst, value);
- return value;
-}
-
-static struct ir3_instruction **
-get_dst(struct ir3_context *ctx, nir_dest *dst, unsigned n)
-{
- struct ir3_instruction **value;
-
- if (dst->is_ssa) {
- value = get_dst_ssa(ctx, &dst->ssa, n);
- } else {
- value = ralloc_array(ctx, struct ir3_instruction *, n);
- }
-
- /* NOTE: in non-ssa case, we don't really need to store last_dst
- * but this helps us catch cases where put_dst() call is forgotten
- */
- compile_assert(ctx, !ctx->last_dst);
- ctx->last_dst = value;
- ctx->last_dst_n = n;
-
- return value;
-}
-
-static struct ir3_instruction * get_addr(struct ir3_context *ctx, struct ir3_instruction *src, int align);
-
-static struct ir3_instruction * const *
-get_src(struct ir3_context *ctx, nir_src *src)
-{
- if (src->is_ssa) {
- struct hash_entry *entry;
- entry = _mesa_hash_table_search(ctx->def_ht, src->ssa);
- compile_assert(ctx, entry);
- return entry->data;
- } else {
- nir_register *reg = src->reg.reg;
- struct ir3_array *arr = get_array(ctx, reg);
- unsigned num_components = arr->r->num_components;
- struct ir3_instruction *addr = NULL;
- struct ir3_instruction **value =
- ralloc_array(ctx, struct ir3_instruction *, num_components);
-
- if (src->reg.indirect)
- addr = get_addr(ctx, get_src(ctx, src->reg.indirect)[0],
- reg->num_components);
-
- for (unsigned i = 0; i < num_components; i++) {
- unsigned n = src->reg.base_offset * reg->num_components + i;
- compile_assert(ctx, n < arr->length);
- value[i] = create_array_load(ctx, arr, n, addr);
- }
-
- return value;
- }
-}
-
-static void
-put_dst(struct ir3_context *ctx, nir_dest *dst)
-{
- unsigned bit_size = nir_dest_bit_size(*dst);
-
- if (bit_size < 32) {
- for (unsigned i = 0; i < ctx->last_dst_n; i++) {
- struct ir3_instruction *dst = ctx->last_dst[i];
- dst->regs[0]->flags |= IR3_REG_HALF;
- if (ctx->last_dst[i]->opc == OPC_META_FO)
- dst->regs[1]->instr->regs[0]->flags |= IR3_REG_HALF;
- }
- }
-
- if (!dst->is_ssa) {
- nir_register *reg = dst->reg.reg;
- struct ir3_array *arr = get_array(ctx, reg);
- unsigned num_components = ctx->last_dst_n;
- struct ir3_instruction *addr = NULL;
-
- if (dst->reg.indirect)
- addr = get_addr(ctx, get_src(ctx, dst->reg.indirect)[0],
- reg->num_components);
-
- for (unsigned i = 0; i < num_components; i++) {
- unsigned n = dst->reg.base_offset * reg->num_components + i;
- compile_assert(ctx, n < arr->length);
- if (!ctx->last_dst[i])
- continue;
- create_array_store(ctx, arr, n, ctx->last_dst[i], addr);
- }
-
- ralloc_free(ctx->last_dst);
- }
- ctx->last_dst = NULL;
- ctx->last_dst_n = 0;
-}
-
-static struct ir3_instruction *
-create_immed_typed(struct ir3_block *block, uint32_t val, type_t type)
-{
- struct ir3_instruction *mov;
- unsigned flags = (type_size(type) < 32) ? IR3_REG_HALF : 0;
-
- mov = ir3_instr_create(block, OPC_MOV);
- mov->cat1.src_type = type;
- mov->cat1.dst_type = type;
- ir3_reg_create(mov, 0, flags);
- ir3_reg_create(mov, 0, IR3_REG_IMMED)->uim_val = val;
-
- return mov;
-}
-
-static struct ir3_instruction *
-create_immed(struct ir3_block *block, uint32_t val)
-{
- return create_immed_typed(block, val, TYPE_U32);
-}
-
-static struct ir3_instruction *
-create_addr(struct ir3_block *block, struct ir3_instruction *src, int align)
-{
- struct ir3_instruction *instr, *immed;
-
- /* TODO in at least some cases, the backend could probably be
- * made clever enough to propagate IR3_REG_HALF..
- */
- instr = ir3_COV(block, src, TYPE_U32, TYPE_S16);
- instr->regs[0]->flags |= IR3_REG_HALF;
-
- switch(align){
- case 1:
- /* src *= 1: */
- break;
- case 2:
- /* src *= 2 => src <<= 1: */
- immed = create_immed(block, 1);
- immed->regs[0]->flags |= IR3_REG_HALF;
-
- instr = ir3_SHL_B(block, instr, 0, immed, 0);
- instr->regs[0]->flags |= IR3_REG_HALF;
- instr->regs[1]->flags |= IR3_REG_HALF;
- break;
- case 3:
- /* src *= 3: */
- immed = create_immed(block, 3);
- immed->regs[0]->flags |= IR3_REG_HALF;
-
- instr = ir3_MULL_U(block, instr, 0, immed, 0);
- instr->regs[0]->flags |= IR3_REG_HALF;
- instr->regs[1]->flags |= IR3_REG_HALF;
- break;
- case 4:
- /* src *= 4 => src <<= 2: */
- immed = create_immed(block, 2);
- immed->regs[0]->flags |= IR3_REG_HALF;
-
- instr = ir3_SHL_B(block, instr, 0, immed, 0);
- instr->regs[0]->flags |= IR3_REG_HALF;
- instr->regs[1]->flags |= IR3_REG_HALF;
- break;
- default:
- unreachable("bad align");
- return NULL;
- }
-
- instr = ir3_MOV(block, instr, TYPE_S16);
- instr->regs[0]->num = regid(REG_A0, 0);
- instr->regs[0]->flags |= IR3_REG_HALF;
- instr->regs[1]->flags |= IR3_REG_HALF;
-
- return instr;
-}
-
-/* caches addr values to avoid generating multiple cov/shl/mova
- * sequences for each use of a given NIR level src as address
- */
-static struct ir3_instruction *
-get_addr(struct ir3_context *ctx, struct ir3_instruction *src, int align)
-{
- struct ir3_instruction *addr;
- unsigned idx = align - 1;
-
- compile_assert(ctx, idx < ARRAY_SIZE(ctx->addr_ht));
-
- if (!ctx->addr_ht[idx]) {
- ctx->addr_ht[idx] = _mesa_hash_table_create(ctx,
- _mesa_hash_pointer, _mesa_key_pointer_equal);
- } else {
- struct hash_entry *entry;
- entry = _mesa_hash_table_search(ctx->addr_ht[idx], src);
- if (entry)
- return entry->data;
- }
-
- addr = create_addr(ctx->block, src, align);
- _mesa_hash_table_insert(ctx->addr_ht[idx], src, addr);
-
- return addr;
-}
-
-static struct ir3_instruction *
-get_predicate(struct ir3_context *ctx, struct ir3_instruction *src)
-{
- struct ir3_block *b = ctx->block;
- struct ir3_instruction *cond;
-
- /* NOTE: only cmps.*.* can write p0.x: */
- cond = ir3_CMPS_S(b, src, 0, create_immed(b, 0), 0);
- cond->cat2.condition = IR3_COND_NE;
-
- /* condition always goes in predicate register: */
- cond->regs[0]->num = regid(REG_P0, 0);
-
- return cond;
-}
-
-static struct ir3_instruction *
-create_uniform(struct ir3_context *ctx, unsigned n)
-{
- struct ir3_instruction *mov;
-
- mov = ir3_instr_create(ctx->block, OPC_MOV);
- /* TODO get types right? */
- mov->cat1.src_type = TYPE_F32;
- mov->cat1.dst_type = TYPE_F32;
- ir3_reg_create(mov, 0, 0);
- ir3_reg_create(mov, n, IR3_REG_CONST);
-
- return mov;
-}
-
-static struct ir3_instruction *
-create_uniform_indirect(struct ir3_context *ctx, int n,
- struct ir3_instruction *address)
-{
- struct ir3_instruction *mov;
-
- mov = ir3_instr_create(ctx->block, OPC_MOV);
- mov->cat1.src_type = TYPE_U32;
- mov->cat1.dst_type = TYPE_U32;
- ir3_reg_create(mov, 0, 0);
- ir3_reg_create(mov, 0, IR3_REG_CONST | IR3_REG_RELATIV)->array.offset = n;
-
- ir3_instr_set_address(mov, address);
-
- return mov;
-}
-
-static struct ir3_instruction *
-create_collect(struct ir3_context *ctx, struct ir3_instruction *const *arr,
- unsigned arrsz)
-{
- struct ir3_block *block = ctx->block;
- struct ir3_instruction *collect;
-
- if (arrsz == 0)
- return NULL;
-
- unsigned flags = arr[0]->regs[0]->flags & IR3_REG_HALF;
-
- collect = ir3_instr_create2(block, OPC_META_FI, 1 + arrsz);
- ir3_reg_create(collect, 0, flags); /* dst */
- for (unsigned i = 0; i < arrsz; i++) {
- struct ir3_instruction *elem = arr[i];
-
- /* Since arrays are pre-colored in RA, we can't assume that
- * things will end up in the right place. (Ie. if a collect
- * joins elements from two different arrays.) So insert an
- * extra mov.
- *
- * We could possibly skip this if all the collected elements
- * are contiguous elements in a single array.. not sure how
- * likely that is to happen.
- *
- * Fixes a problem with glamor shaders, that in effect do
- * something like:
- *
- * if (foo)
- * texcoord = ..
- * else
- * texcoord = ..
- * color = texture2D(tex, texcoord);
- *
- * In this case, texcoord will end up as nir registers (which
- * translate to ir3 array's of length 1. And we can't assume
- * the two (or more) arrays will get allocated in consecutive
- * scalar registers.
- *
- */
- if (elem->regs[0]->flags & IR3_REG_ARRAY) {
- type_t type = (flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
- elem = ir3_MOV(block, elem, type);
- }
-
- compile_assert(ctx, (elem->regs[0]->flags & IR3_REG_HALF) == flags);
- ir3_reg_create(collect, 0, IR3_REG_SSA | flags)->instr = elem;
- }
-
- return collect;
-}