freedreno/a3xx/compiler: prepare for new compiler
authorRob Clark <robclark@freedesktop.org>
Wed, 29 Jan 2014 21:25:52 +0000 (16:25 -0500)
committerRob Clark <robclark@freedesktop.org>
Mon, 3 Feb 2014 23:26:53 +0000 (18:26 -0500)
Shuffle things around to prepare for new compiler.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/Makefile.sources
src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
src/gallium/drivers/freedreno/a3xx/fd3_program.c
src/gallium/drivers/freedreno/a3xx/fd3_program.h
src/gallium/drivers/freedreno/a3xx/instr-a3xx.h
src/gallium/drivers/freedreno/a3xx/ir-a3xx.c [deleted file]
src/gallium/drivers/freedreno/a3xx/ir-a3xx.h [deleted file]
src/gallium/drivers/freedreno/a3xx/ir3.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/ir3.h [new file with mode: 0644]

index 3dcec9dceac80f59501301d780248456c45873fe..7d67bf28066b8fa7ec85392528fe72d781fdbe2e 100644 (file)
@@ -42,4 +42,4 @@ a3xx_SOURCES := \
        a3xx/fd3_util.c \
        a3xx/fd3_zsa.c \
        a3xx/disasm-a3xx.c \
-       a3xx/ir-a3xx.c
+       a3xx/ir3.c
index 2d5ae62a64a3563bf7705667087c72e6f586ad1a..0e45ec54b380155833aee7bab10f7e35dd92ef64 100644 (file)
@@ -285,21 +285,7 @@ static void print_instr_cat2(instr_t *instr)
 static void print_instr_cat3(instr_t *instr)
 {
        instr_cat3_t *cat3 = &instr->cat3;
-       bool full = true;
-
-       // XXX is this based on opc or some other bit?
-       switch (cat3->opc) {
-       case OPC_MAD_F16:
-       case OPC_MAD_U16:
-       case OPC_MAD_S16:
-       case OPC_SEL_B16:
-       case OPC_SEL_S16:
-       case OPC_SEL_F16:
-       case OPC_SAD_S16:
-       case OPC_SAD_S32:  // really??
-               full = false;
-               break;
-       }
+       bool full = instr_cat3_full(cat3);
 
        printf(" ");
        print_reg_dst((reg_t)(cat3->dst), full ^ cat3->dst_half, false);
@@ -747,26 +733,12 @@ struct opc_info {
 #undef OPC
 };
 
-#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | getopc(instr)]))
-
-static uint32_t getopc(instr_t *instr)
-{
-       switch (instr->opc_cat) {
-       case 0:  return instr->cat0.opc;
-       case 1:  return 0;
-       case 2:  return instr->cat2.opc;
-       case 3:  return instr->cat3.opc;
-       case 4:  return instr->cat4.opc;
-       case 5:  return instr->cat5.opc;
-       case 6:  return instr->cat6.opc;
-       default: return 0;
-       }
-}
+#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr)]))
 
 static void print_instr(uint32_t *dwords, int level, int n)
 {
        instr_t *instr = (instr_t *)dwords;
-       uint32_t opc = getopc(instr);
+       uint32_t opc = instr_opc(instr);
        const char *name;
 
        printf("%s%04d[%08xx_%08xx] ", levels[level], n, dwords[1], dwords[0]);
index 2c32c0fa2a7602c8d964f5d431aa02e3b1cfa295..5ab34e557b9ce5ff01740174cb87973ab5bab487 100644 (file)
 #include "fd3_util.h"
 
 #include "instr-a3xx.h"
-#include "ir-a3xx.h"
+#include "ir3.h"
 
 
 struct fd3_compile_context {
        const struct tgsi_token *tokens;
        struct ir3_shader *ir;
+       struct ir3_block *block;
        struct fd3_shader_stateobj *so;
 
        struct tgsi_parse_context parser;
@@ -124,6 +125,7 @@ compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
 
        ctx->tokens = tokens;
        ctx->ir = so->ir;
+       ctx->block = ir3_block_create(ctx->ir, 0, 0, 0);
        ctx->so = so;
        ctx->last_input = NULL;
        ctx->last_rel = NULL;
@@ -176,7 +178,7 @@ compile_error(struct fd3_compile_context *ctx, const char *format, ...)
        _debug_vprintf(format, ap);
        va_end(ap);
        tgsi_dump(ctx->tokens, 0);
-       assert(0);
+       debug_assert(0);
 }
 
 #define compile_assert(ctx, cond) do { \
@@ -208,11 +210,17 @@ handle_last_rel(struct fd3_compile_context *ctx)
        }
 }
 
+static struct ir3_instruction *
+instr_create(struct fd3_compile_context *ctx, int category, opc_t opc)
+{
+       return ir3_instr_create(ctx->block, category, opc);
+}
+
 static void
 add_nop(struct fd3_compile_context *ctx, unsigned count)
 {
        while (count-- > 0)
-               ir3_instr_create(ctx->ir, 0, OPC_NOP);
+               instr_create(ctx, 0, OPC_NOP);
 }
 
 static unsigned
@@ -241,6 +249,7 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
                const struct tgsi_dst_register *dst, unsigned chan)
 {
        unsigned flags = 0, num = 0;
+       struct ir3_register *reg;
 
        switch (dst->File) {
        case TGSI_FILE_OUTPUT:
@@ -256,10 +265,17 @@ add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
                break;
        }
 
+       if (dst->Indirect)
+               flags |= IR3_REG_RELATIV;
        if (ctx->so->half_precision)
                flags |= IR3_REG_HALF;
 
-       return ir3_reg_create(instr, regid(num, chan), flags);
+       reg = ir3_reg_create(instr, regid(num, chan), flags);
+
+       if (dst->Indirect)
+               ctx->last_rel = instr;
+
+       return reg;
 }
 
 static struct ir3_register *
@@ -517,9 +533,9 @@ create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
                                /* can't have abs or neg on a mov instr, so use
                                 * absneg.f instead to handle these cases:
                                 */
-                               instr = ir3_instr_create(ctx->ir, 2, OPC_ABSNEG_F);
+                               instr = instr_create(ctx, 2, OPC_ABSNEG_F);
                        } else {
-                               instr = ir3_instr_create(ctx->ir, 1, 0);
+                               instr = instr_create(ctx, 1, 0);
                                instr->cat1.src_type = type_mov;
                                instr->cat1.dst_type = type_mov;
                        }
@@ -539,10 +555,10 @@ create_clamp(struct fd3_compile_context *ctx,
 {
        struct ir3_instruction *instr;
 
-       instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
+       instr = instr_create(ctx, 2, OPC_MAX_F);
        vectorize(ctx, instr, dst, 2, val, 0, minval, 0);
 
-       instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F);
+       instr = instr_create(ctx, 2, OPC_MIN_F);
        vectorize(ctx, instr, dst, 2, val, 0, maxval, 0);
 }
 
@@ -707,7 +723,7 @@ trans_arl(const struct instr_translater *t,
        tmp_src = get_internal_temp_hr(ctx, &tmp_dst);
 
        /* cov.{f32,f16}s16 Rtmp, Rsrc */
-       instr = ir3_instr_create(ctx->ir, 1, 0);
+       instr = instr_create(ctx, 1, 0);
        instr->cat1.src_type = get_ftype(ctx);
        instr->cat1.dst_type = TYPE_S16;
        add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
@@ -716,7 +732,7 @@ trans_arl(const struct instr_translater *t,
        add_nop(ctx, 3);
 
        /* shl.b Rtmp, Rtmp, 2 */
-       instr = ir3_instr_create(ctx->ir, 2, OPC_SHL_B);
+       instr = instr_create(ctx, 2, OPC_SHL_B);
        add_dst_reg(ctx, instr, &tmp_dst, chan)->flags |= IR3_REG_HALF;
        add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
        ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 2;
@@ -724,7 +740,7 @@ trans_arl(const struct instr_translater *t,
        add_nop(ctx, 3);
 
        /* mova a0, Rtmp */
-       instr = ir3_instr_create(ctx->ir, 1, 0);
+       instr = instr_create(ctx, 1, 0);
        instr->cat1.src_type = TYPE_S16;
        instr->cat1.dst_type = TYPE_S16;
        add_dst_reg(ctx, instr, dst, 0)->flags |= IR3_REG_HALF;
@@ -804,7 +820,7 @@ trans_samp(const struct instr_translater *t,
                tmp_src = get_internal_temp(ctx, &tmp_dst);
 
                for (j = 0; (j < 4) && (order[j] >= 0); j++) {
-                       instr = ir3_instr_create(ctx->ir, 1, 0);
+                       instr = instr_create(ctx, 1, 0);
                        instr->cat1.src_type = type_mov;
                        instr->cat1.dst_type = type_mov;
                        add_dst_reg(ctx, instr, &tmp_dst, j);
@@ -817,7 +833,7 @@ trans_samp(const struct instr_translater *t,
                add_nop(ctx, 4 - j);
        }
 
-       instr = ir3_instr_create(ctx->ir, 5, t->opc);
+       instr = instr_create(ctx, 5, t->opc);
        instr->cat5.type = get_ftype(ctx);
        instr->cat5.samp = samp->Index;
        instr->cat5.tex  = samp->Index;
@@ -915,7 +931,7 @@ trans_cmp(const struct instr_translater *t,
                a0 = get_unconst(ctx, a0);
 
        /* cmps.f.ge tmp, a0, a1 */
-       instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+       instr = instr_create(ctx, 2, OPC_CMPS_F);
        instr->cat2.condition = condition;
        vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
 
@@ -924,7 +940,7 @@ trans_cmp(const struct instr_translater *t,
        case TGSI_OPCODE_SGE:
        case TGSI_OPCODE_SLE:
                /* cov.u16f16 dst, tmp0 */
-               instr = ir3_instr_create(ctx->ir, 1, 0);
+               instr = instr_create(ctx, 1, 0);
                instr->cat1.src_type = get_utype(ctx);
                instr->cat1.dst_type = get_ftype(ctx);
                vectorize(ctx, instr, dst, 1, tmp_src, 0);
@@ -934,12 +950,12 @@ trans_cmp(const struct instr_translater *t,
        case TGSI_OPCODE_SLT:
        case TGSI_OPCODE_CMP:
                /* add.s tmp, tmp, -1 */
-               instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
+               instr = instr_create(ctx, 2, OPC_ADD_S);
                vectorize(ctx, instr, &tmp_dst, 2, tmp_src, 0, -1, IR3_REG_IMMED);
 
                if (t->tgsi_opc == TGSI_OPCODE_CMP) {
                        /* sel.{f32,f16} dst, src2, tmp, src1 */
-                       instr = ir3_instr_create(ctx->ir, 3,
+                       instr = instr_create(ctx, 3,
                                        ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
                        vectorize(ctx, instr, dst, 3,
                                        &inst->Src[2].Register, 0,
@@ -949,7 +965,7 @@ trans_cmp(const struct instr_translater *t,
                        get_immediate(ctx, &constval0, fui(0.0));
                        get_immediate(ctx, &constval1, fui(1.0));
                        /* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
-                       instr = ir3_instr_create(ctx->ir, 3,
+                       instr = instr_create(ctx, 3,
                                        ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
                        vectorize(ctx, instr, dst, 3,
                                        &constval0, 0, tmp_src, 0, &constval1, 0);
@@ -990,7 +1006,7 @@ pop_branch(struct fd3_compile_context *ctx)
         * and set (jp) flag on whatever the next instruction was, rather
         * than inserting an extra nop..
         */
-       instr = ir3_instr_create(ctx->ir, 0, OPC_NOP);
+       instr = instr_create(ctx, 0, OPC_NOP);
        instr->flags |= IR3_INSTR_JP;
 
        /* pop the branch instruction from the stack and fix up branch target: */
@@ -1018,13 +1034,13 @@ trans_if(const struct instr_translater *t,
        if (is_const(src))
                src = get_unconst(ctx, src);
 
-       instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+       instr = instr_create(ctx, 2, OPC_CMPS_F);
        ir3_reg_create(instr, regid(REG_P0, 0), 0);
        add_src_reg(ctx, instr, src, src->SwizzleX);
        add_src_reg(ctx, instr, &constval, constval.SwizzleX);
        instr->cat2.condition = IR3_COND_EQ;
 
-       instr = ir3_instr_create(ctx->ir, 0, OPC_BR);
+       instr = instr_create(ctx, 0, OPC_BR);
        push_branch(ctx, instr);
 }
 
@@ -1036,7 +1052,7 @@ trans_else(const struct instr_translater *t,
        struct ir3_instruction *instr;
 
        /* for first half of if/else/endif, generate a jump past the else: */
-       instr = ir3_instr_create(ctx->ir, 0, OPC_JUMP);
+       instr = instr_create(ctx, 0, OPC_JUMP);
 
        pop_branch(ctx);
        push_branch(ctx, instr);
@@ -1060,7 +1076,7 @@ instr_cat0(const struct instr_translater *t,
                struct fd3_compile_context *ctx,
                struct tgsi_full_instruction *inst)
 {
-       ir3_instr_create(ctx->ir, 0, t->opc);
+       instr_create(ctx, 0, t->opc);
 }
 
 static void
@@ -1083,7 +1099,7 @@ instr_cat1(const struct instr_translater *t,
                 * in the future if we start supporting widening/narrowing or
                 * conversion to/from integer..
                 */
-               instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+               instr = instr_create(ctx, 2, OPC_ADD_F);
                get_immediate(ctx, &constval, fui(0.0));
                vectorize(ctx, instr, dst, 2, src, 0, &constval, 0);
        } else {
@@ -1129,14 +1145,14 @@ instr_cat2(const struct instr_translater *t,
        case OPC_SETRM:
        case OPC_CBITS_B:
                /* these only have one src reg */
-               instr = ir3_instr_create(ctx->ir, 2, t->opc);
+               instr = instr_create(ctx, 2, t->opc);
                vectorize(ctx, instr, dst, 1, src0, src0_flags);
                break;
        default:
                if (is_const(src0) && is_const(src1))
                        src0 = get_unconst(ctx, src0);
 
-               instr = ir3_instr_create(ctx->ir, 2, t->opc);
+               instr = instr_create(ctx, 2, t->opc);
                vectorize(ctx, instr, dst, 2, src0, src0_flags,
                                src1, src1_flags);
                break;
@@ -1186,7 +1202,7 @@ instr_cat3(const struct instr_translater *t,
                }
        }
 
-       instr = ir3_instr_create(ctx->ir, 3,
+       instr = instr_create(ctx, 3,
                        ctx->so->half_precision ? t->hopc : t->opc);
        vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
                        &inst->Src[2].Register, 0);
@@ -1214,8 +1230,8 @@ instr_cat4(const struct instr_translater *t,
        for (i = 0, n = 0; i < 4; i++) {
                if (dst->WriteMask & (1 << i)) {
                        if (n++)
-                               ir3_instr_create(ctx->ir, 0, OPC_NOP);
-                       instr = ir3_instr_create(ctx->ir, 4, t->opc);
+                               add_nop(ctx, 1);
+                       instr = instr_create(ctx, 4, t->opc);
                        add_dst_reg(ctx, instr, dst, i);
                        add_src_reg(ctx, instr, src, src->SwizzleX);
                }
@@ -1315,7 +1331,7 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
                                struct ir3_instruction *instr;
                                struct ir3_register *dst;
 
-                               instr = ir3_instr_create(ctx->ir, 2, OPC_BARY_F);
+                               instr = instr_create(ctx, 2, OPC_BARY_F);
 
                                /* dst register: */
                                dst = ir3_reg_create(instr, r + j, flags);
index da25cdce88a49cc9be0e6e5cdeeaf8f56550b3c4..5cdb245640b68bbad4885ea3c73e7e3762ce97af 100644 (file)
 #include "fd3_util.h"
 
 
-/* ************************************************************************* */
-/* split this out or find some helper to use.. like main/bitset.h.. */
-
-#define MAX_REG 256
-
-typedef uint8_t regmask_t[2 * MAX_REG / 8];
-
-static inline unsigned regmask_idx(struct ir3_register *reg)
-{
-       unsigned num = reg->num;
-       assert(num < MAX_REG);
-       if (reg->flags & IR3_REG_HALF)
-               num += MAX_REG;
-       return num;
-}
-
-static inline void regmask_init(regmask_t *regmask)
-{
-       memset(regmask, 0, sizeof(*regmask));
-}
-
-static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
-{
-       unsigned idx = regmask_idx(reg);
-       unsigned i;
-       for (i = 0; i < 4; i++, idx++)
-               if (reg->wrmask & (1 << i))
-                       (*regmask)[idx / 8] |= 1 << (idx % 8);
-}
-
-static inline unsigned regmask_get(regmask_t *regmask,
-               struct ir3_register *reg)
-{
-       unsigned idx = regmask_idx(reg);
-       unsigned i;
-       for (i = 0; i < 4; i++, idx++)
-               if (reg->wrmask & (1 << i))
-                       if ((*regmask)[idx / 8] & (1 << (idx % 8)))
-                               return true;
-       return false;
-}
-
-/* comp:
- *   0 - x
- *   1 - y
- *   2 - z
- *   3 - w
- */
-static inline uint32_t regid(int num, int comp)
-{
-       return (num << 2) | (comp & 0x3);
-}
-
-/* ************************************************************************* */
-
 int fd3_compile_shader(struct fd3_shader_stateobj *so,
                const struct tgsi_token *tokens);
 
index 3df29ecc91151e260cc26042cb3d4bfb30763087..ddb33ca58448c7ac1ad007648b4f759179034029 100644 (file)
@@ -79,9 +79,10 @@ static void
 fixup_vp_regfootprint(struct fd3_shader_stateobj *so)
 {
        unsigned i;
-       for (i = 0; i < so->inputs_count; i++) {
+       for (i = 0; i < so->inputs_count; i++)
                so->info.max_reg = MAX2(so->info.max_reg, so->inputs[i].regid >> 2);
-       }
+       for (i = 0; i < so->outputs_count; i++)
+               so->info.max_reg = MAX2(so->info.max_reg, so->outputs[i].regid >> 2);
 }
 
 static struct fd3_shader_stateobj *
@@ -230,7 +231,7 @@ find_output(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
 }
 
 static uint32_t
-find_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
+find_output_regid(const struct fd3_shader_stateobj *so, fd3_semantic semantic)
 {
        int j;
        for (j = 0; j < so->outputs_count; j++)
@@ -257,13 +258,13 @@ fd3_program_emit(struct fd_ringbuffer *ring,
                fsi = &fp->info;
        }
 
-       pos_regid = find_regid(vp,
+       pos_regid = find_output_regid(vp,
                fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-       posz_regid = find_regid(fp,
+       posz_regid = find_output_regid(fp,
                fd3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
-       psize_regid = find_regid(vp,
+       psize_regid = find_output_regid(vp,
                fd3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
-       color_regid = find_regid(fp,
+       color_regid = find_output_regid(fp,
                fd3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
 
        /* we could probably divide this up into things that need to be
@@ -501,10 +502,11 @@ create_blit_fp(struct pipe_context *pctx)
 {
        struct fd3_shader_stateobj *so;
        struct ir3_shader *ir = ir3_shader_create();
+       struct ir3_block *block = ir3_block_create(ir, 0, 0, 0);
        struct ir3_instruction *instr;
 
        /* (sy)(ss)(rpt1)bary.f (ei)r0.z, (r)0, r0.x */
-       instr = ir3_instr_create(ir, 2, OPC_BARY_F);
+       instr = ir3_instr_create(block, 2, OPC_BARY_F);
        instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
        instr->repeat = 1;
 
@@ -514,11 +516,11 @@ create_blit_fp(struct pipe_context *pctx)
        ir3_reg_create(instr, regid(0,0), 0);             /* r0.x */
 
        /* (rpt5)nop */
-       instr = ir3_instr_create(ir, 0, OPC_NOP);
+       instr = ir3_instr_create(block, 0, OPC_NOP);
        instr->repeat = 5;
 
        /* sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 */
-       instr = ir3_instr_create(ir, 5, OPC_SAM);
+       instr = ir3_instr_create(block, 5, OPC_SAM);
        instr->cat5.samp = 0;
        instr->cat5.tex  = 0;
        instr->cat5.type = TYPE_F32;
@@ -528,7 +530,7 @@ create_blit_fp(struct pipe_context *pctx)
        ir3_reg_create(instr, regid(0,2), 0);             /* r0.z */
 
        /* (sy)(rpt3)cov.f32f16 hr0.x, (r)r0.x */
-       instr = ir3_instr_create(ir, 1, 0);  /* mov/cov instructions have no opc */
+       instr = ir3_instr_create(block, 1, 0);  /* mov/cov instructions have no opc */
        instr->flags = IR3_INSTR_SY;
        instr->repeat = 3;
        instr->cat1.src_type = TYPE_F32;
@@ -538,7 +540,7 @@ create_blit_fp(struct pipe_context *pctx)
        ir3_reg_create(instr, regid(0,0), IR3_REG_R);     /* (r)r0.x */
 
        /* end */
-       instr = ir3_instr_create(ir, 0, OPC_END);
+       instr = ir3_instr_create(block, 0, OPC_END);
 
        so = create_internal_shader(pctx, SHADER_FRAGMENT, ir);
        if (!so)
@@ -573,10 +575,11 @@ create_blit_vp(struct pipe_context *pctx)
 {
        struct fd3_shader_stateobj *so;
        struct ir3_shader *ir = ir3_shader_create();
+       struct ir3_block *block = ir3_block_create(ir, 0, 0, 0);
        struct ir3_instruction *instr;
 
        /* (sy)(ss)end */
-       instr = ir3_instr_create(ir, 0, OPC_END);
+       instr = ir3_instr_create(block, 0, OPC_END);
        instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
 
        so = create_internal_shader(pctx, SHADER_VERTEX, ir);
@@ -611,10 +614,11 @@ create_solid_fp(struct pipe_context *pctx)
 {
        struct fd3_shader_stateobj *so;
        struct ir3_shader *ir = ir3_shader_create();
+       struct ir3_block *block = ir3_block_create(ir, 0, 0, 0);
        struct ir3_instruction *instr;
 
        /* (sy)(ss)(rpt3)mov.f16f16 hr0.x, (r)hc0.x */
-       instr = ir3_instr_create(ir, 1, 0);  /* mov/cov instructions have no opc */
+       instr = ir3_instr_create(block, 1, 0);  /* mov/cov instructions have no opc */
        instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
        instr->repeat = 3;
        instr->cat1.src_type = TYPE_F16;
@@ -625,7 +629,7 @@ create_solid_fp(struct pipe_context *pctx)
                        IR3_REG_CONST | IR3_REG_R);
 
        /* end */
-       instr = ir3_instr_create(ir, 0, OPC_END);
+       instr = ir3_instr_create(block, 0, OPC_END);
 
        so = create_internal_shader(pctx, SHADER_FRAGMENT, ir);
        if (!so)
@@ -650,10 +654,11 @@ create_solid_vp(struct pipe_context *pctx)
 {
        struct fd3_shader_stateobj *so;
        struct ir3_shader *ir = ir3_shader_create();
+       struct ir3_block *block = ir3_block_create(ir, 0, 0, 0);
        struct ir3_instruction *instr;
 
        /* (sy)(ss)end */
-       instr = ir3_instr_create(ir, 0, OPC_END);
+       instr = ir3_instr_create(block, 0, OPC_END);
        instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
 
 
index 4aeeb2e30062c8be3ef222f3b0693bb881151c31..c781dfe4be98921a57fe1b77ef3142a39df9f8b6 100644 (file)
@@ -33,7 +33,7 @@
 
 #include "freedreno_context.h"
 
-#include "ir-a3xx.h"
+#include "ir3.h"
 #include "disasm.h"
 
 typedef uint16_t fd3_semantic;  /* semantic name + index */
@@ -43,6 +43,16 @@ fd3_semantic_name(uint8_t name, uint16_t index)
        return (name << 8) | (index & 0xff);
 }
 
+static inline uint8_t sem2name(fd3_semantic sem)
+{
+       return sem >> 8;
+}
+
+static inline uint16_t sem2idx(fd3_semantic sem)
+{
+       return sem & 0xff;
+}
+
 struct fd3_shader_stateobj {
        enum shader_t type;
 
index 1085ddf8c12e5423f3ce4e47849d6286aa7c85fa..b0f78341131139bb1fe9db0f356f278b9c3a73d3 100644 (file)
@@ -438,6 +438,23 @@ typedef struct PACKED {
        uint32_t opc_cat  : 3;
 } instr_cat3_t;
 
+static inline bool instr_cat3_full(instr_cat3_t *cat3)
+{
+       switch (cat3->opc) {
+       case OPC_MAD_F16:
+       case OPC_MAD_U16:
+       case OPC_MAD_S16:
+       case OPC_SEL_B16:
+       case OPC_SEL_S16:
+       case OPC_SEL_F16:
+       case OPC_SAD_S16:
+       case OPC_SAD_S32:  // really??
+               return false;
+       default:
+               return true;
+       }
+}
+
 typedef struct PACKED {
        /* dword0: */
        union PACKED {
@@ -612,4 +629,18 @@ typedef union PACKED {
        };
 } instr_t;
 
+static inline uint32_t instr_opc(instr_t *instr)
+{
+       switch (instr->opc_cat) {
+       case 0:  return instr->cat0.opc;
+       case 1:  return 0;
+       case 2:  return instr->cat2.opc;
+       case 3:  return instr->cat3.opc;
+       case 4:  return instr->cat4.opc;
+       case 5:  return instr->cat5.opc;
+       case 6:  return instr->cat6.opc;
+       default: return 0;
+       }
+}
+
 #endif /* INSTR_A3XX_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c
deleted file mode 100644 (file)
index a39214e..0000000
+++ /dev/null
@@ -1,599 +0,0 @@
-/*
- * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ir-a3xx.h"
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <assert.h>
-#include <stdbool.h>
-#include <errno.h>
-
-#include "freedreno_util.h"
-#include "instr-a3xx.h"
-
-/* simple allocator to carve allocations out of an up-front allocated heap,
- * so that we can free everything easily in one shot.
- */
-static void * ir3_alloc(struct ir3_shader *shader, int sz)
-{
-       void *ptr = &shader->heap[shader->heap_idx];
-       shader->heap_idx += align(sz, 4);
-       return ptr;
-}
-
-struct ir3_shader * ir3_shader_create(void)
-{
-       return calloc(1, sizeof(struct ir3_shader));
-}
-
-void ir3_shader_destroy(struct ir3_shader *shader)
-{
-       free(shader);
-}
-
-#define iassert(cond) do { \
-       if (!(cond)) { \
-               assert(cond); \
-               return -1; \
-       } } while (0)
-
-static uint32_t reg(struct ir3_register *reg, struct ir3_shader_info *info,
-               uint32_t repeat, uint32_t valid_flags)
-{
-       reg_t val = { .dummy32 = 0 };
-
-       assert(!(reg->flags & ~valid_flags));
-
-       if (!(reg->flags & IR3_REG_R))
-               repeat = 0;
-
-       if (reg->flags & IR3_REG_IMMED) {
-               val.iim_val = reg->iim_val;
-       } else {
-               int8_t max = (reg->num + repeat) >> 2;
-
-               val.comp = reg->num & 0x3;
-               val.num  = reg->num >> 2;
-
-               if (reg->flags & IR3_REG_CONST) {
-                       info->max_const = MAX2(info->max_const, max);
-               } else if ((max != REG_A0) && (max != REG_P0)) {
-                       if (reg->flags & IR3_REG_HALF) {
-                               info->max_half_reg = MAX2(info->max_half_reg, max);
-                       } else {
-                               info->max_reg = MAX2(info->max_reg, max);
-                       }
-               }
-       }
-
-       return val.dummy32;
-}
-
-static int emit_cat0(struct ir3_instruction *instr, void *ptr,
-               struct ir3_shader_info *info)
-{
-       instr_cat0_t *cat0 = ptr;
-
-       cat0->immed    = instr->cat0.immed;
-       cat0->repeat   = instr->repeat;
-       cat0->ss       = !!(instr->flags & IR3_INSTR_SS);
-       cat0->inv      = instr->cat0.inv;
-       cat0->comp     = instr->cat0.comp;
-       cat0->opc      = instr->opc;
-       cat0->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-       cat0->sync     = !!(instr->flags & IR3_INSTR_SY);
-       cat0->opc_cat  = 0;
-
-       return 0;
-}
-
-static uint32_t type_flags(type_t type)
-{
-       return (type_size(type) == 32) ? 0 : IR3_REG_HALF;
-}
-
-static int emit_cat1(struct ir3_instruction *instr, void *ptr,
-               struct ir3_shader_info *info)
-{
-       struct ir3_register *dst = instr->regs[0];
-       struct ir3_register *src = instr->regs[1];
-       instr_cat1_t *cat1 = ptr;
-
-       iassert(instr->regs_count == 2);
-       iassert(!((dst->flags ^ type_flags(instr->cat1.dst_type)) & IR3_REG_HALF));
-       iassert((src->flags & IR3_REG_IMMED) ||
-                       !((src->flags ^ type_flags(instr->cat1.src_type)) & IR3_REG_HALF));
-
-       if (src->flags & IR3_REG_IMMED) {
-               cat1->iim_val = src->iim_val;
-               cat1->src_im  = 1;
-       } else if (src->flags & IR3_REG_RELATIV) {
-               cat1->off       = src->offset;
-               cat1->src_rel   = 1;
-               cat1->src_rel_c = !!(src->flags & IR3_REG_CONST);
-       } else {
-               cat1->src  = reg(src, info, instr->repeat,
-                               IR3_REG_IMMED | IR3_REG_R |
-                               IR3_REG_CONST | IR3_REG_HALF);
-               cat1->src_c     = !!(src->flags & IR3_REG_CONST);
-       }
-
-       cat1->dst      = reg(dst, info, instr->repeat,
-                       IR3_REG_RELATIV | IR3_REG_EVEN |
-                       IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF);
-       cat1->repeat   = instr->repeat;
-       cat1->src_r    = !!(src->flags & IR3_REG_R);
-       cat1->ss       = !!(instr->flags & IR3_INSTR_SS);
-       cat1->ul       = !!(instr->flags & IR3_INSTR_UL);
-       cat1->dst_type = instr->cat1.dst_type;
-       cat1->dst_rel  = !!(dst->flags & IR3_REG_RELATIV);
-       cat1->src_type = instr->cat1.src_type;
-       cat1->even     = !!(dst->flags & IR3_REG_EVEN);
-       cat1->pos_inf  = !!(dst->flags & IR3_REG_POS_INF);
-       cat1->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-       cat1->sync     = !!(instr->flags & IR3_INSTR_SY);
-       cat1->opc_cat  = 1;
-
-       return 0;
-}
-
-static int emit_cat2(struct ir3_instruction *instr, void *ptr,
-               struct ir3_shader_info *info)
-{
-       struct ir3_register *dst = instr->regs[0];
-       struct ir3_register *src1 = instr->regs[1];
-       struct ir3_register *src2 = instr->regs[2];
-       instr_cat2_t *cat2 = ptr;
-
-       iassert((instr->regs_count == 2) || (instr->regs_count == 3));
-
-       if (src1->flags & IR3_REG_RELATIV) {
-               iassert(src1->num < (1 << 10));
-               cat2->rel1.src1      = reg(src1, info, instr->repeat,
-                               IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
-                               IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF);
-               cat2->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
-               cat2->rel1.src1_rel  = 1;
-       } else if (src1->flags & IR3_REG_CONST) {
-               iassert(src1->num < (1 << 12));
-               cat2->c1.src1   = reg(src1, info, instr->repeat,
-                               IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS |
-                               IR3_REG_R | IR3_REG_HALF);
-               cat2->c1.src1_c = 1;
-       } else {
-               iassert(src1->num < (1 << 11));
-               cat2->src1 = reg(src1, info, instr->repeat,
-                               IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS |
-                               IR3_REG_R | IR3_REG_HALF);
-       }
-       cat2->src1_im  = !!(src1->flags & IR3_REG_IMMED);
-       cat2->src1_neg = !!(src1->flags & IR3_REG_NEGATE);
-       cat2->src1_abs = !!(src1->flags & IR3_REG_ABS);
-       cat2->src1_r   = !!(src1->flags & IR3_REG_R);
-
-       if (src2) {
-               iassert((src2->flags & IR3_REG_IMMED) ||
-                               !((src1->flags ^ src2->flags) & IR3_REG_HALF));
-
-               if (src2->flags & IR3_REG_RELATIV) {
-                       iassert(src2->num < (1 << 10));
-                       cat2->rel2.src2      = reg(src2, info, instr->repeat,
-                                       IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
-                                       IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF);
-                       cat2->rel2.src2_c    = !!(src2->flags & IR3_REG_CONST);
-                       cat2->rel2.src2_rel  = 1;
-               } else if (src2->flags & IR3_REG_CONST) {
-                       iassert(src2->num < (1 << 12));
-                       cat2->c2.src2   = reg(src2, info, instr->repeat,
-                                       IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS |
-                                       IR3_REG_R | IR3_REG_HALF);
-                       cat2->c2.src2_c = 1;
-               } else {
-                       iassert(src2->num < (1 << 11));
-                       cat2->src2 = reg(src2, info, instr->repeat,
-                                       IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS |
-                                       IR3_REG_R | IR3_REG_HALF);
-               }
-
-               cat2->src2_im  = !!(src2->flags & IR3_REG_IMMED);
-               cat2->src2_neg = !!(src2->flags & IR3_REG_NEGATE);
-               cat2->src2_abs = !!(src2->flags & IR3_REG_ABS);
-               cat2->src2_r   = !!(src2->flags & IR3_REG_R);
-       }
-
-       cat2->dst      = reg(dst, info, instr->repeat,
-                       IR3_REG_R | IR3_REG_EI | IR3_REG_HALF);
-       cat2->repeat   = instr->repeat;
-       cat2->ss       = !!(instr->flags & IR3_INSTR_SS);
-       cat2->ul       = !!(instr->flags & IR3_INSTR_UL);
-       cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF);
-       cat2->ei       = !!(dst->flags & IR3_REG_EI);
-       cat2->cond     = instr->cat2.condition;
-       cat2->full     = ! (src1->flags & IR3_REG_HALF);
-       cat2->opc      = instr->opc;
-       cat2->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-       cat2->sync     = !!(instr->flags & IR3_INSTR_SY);
-       cat2->opc_cat  = 2;
-
-       return 0;
-}
-
-static int emit_cat3(struct ir3_instruction *instr, void *ptr,
-               struct ir3_shader_info *info)
-{
-       struct ir3_register *dst = instr->regs[0];
-       struct ir3_register *src1 = instr->regs[1];
-       struct ir3_register *src2 = instr->regs[2];
-       struct ir3_register *src3 = instr->regs[3];
-       instr_cat3_t *cat3 = ptr;
-       uint32_t src_flags = 0;
-
-       switch (instr->opc) {
-       case OPC_MAD_F16:
-       case OPC_MAD_U16:
-       case OPC_MAD_S16:
-       case OPC_SEL_B16:
-       case OPC_SEL_S16:
-       case OPC_SEL_F16:
-       case OPC_SAD_S16:
-       case OPC_SAD_S32:  // really??
-               src_flags |= IR3_REG_HALF;
-               break;
-       default:
-               break;
-       }
-
-       iassert(instr->regs_count == 4);
-       iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF));
-       iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF));
-       iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
-
-       if (src1->flags & IR3_REG_RELATIV) {
-               iassert(src1->num < (1 << 10));
-               cat3->rel1.src1      = reg(src1, info, instr->repeat,
-                               IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
-                               IR3_REG_R | IR3_REG_HALF);
-               cat3->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
-               cat3->rel1.src1_rel  = 1;
-       } else if (src1->flags & IR3_REG_CONST) {
-               iassert(src1->num < (1 << 12));
-               cat3->c1.src1   = reg(src1, info, instr->repeat,
-                               IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_R |
-                               IR3_REG_HALF);
-               cat3->c1.src1_c = 1;
-       } else {
-               iassert(src1->num < (1 << 11));
-               cat3->src1 = reg(src1, info, instr->repeat,
-                               IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF);
-       }
-
-       cat3->src1_neg = !!(src1->flags & IR3_REG_NEGATE);
-       cat3->src1_r   = !!(src1->flags & IR3_REG_R);
-
-       cat3->src2     = reg(src2, info, instr->repeat,
-                       IR3_REG_CONST | IR3_REG_NEGATE |
-                       IR3_REG_R | IR3_REG_HALF);
-       cat3->src2_c   = !!(src2->flags & IR3_REG_CONST);
-       cat3->src2_neg = !!(src2->flags & IR3_REG_NEGATE);
-       cat3->src2_r   = !!(src2->flags & IR3_REG_R);
-
-
-       if (src3->flags & IR3_REG_RELATIV) {
-               iassert(src3->num < (1 << 10));
-               cat3->rel2.src3      = reg(src3, info, instr->repeat,
-                               IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
-                               IR3_REG_R | IR3_REG_HALF);
-               cat3->rel2.src3_c    = !!(src3->flags & IR3_REG_CONST);
-               cat3->rel2.src3_rel  = 1;
-       } else if (src3->flags & IR3_REG_CONST) {
-               iassert(src3->num < (1 << 12));
-               cat3->c2.src3   = reg(src3, info, instr->repeat,
-                               IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_R |
-                               IR3_REG_HALF);
-               cat3->c2.src3_c = 1;
-       } else {
-               iassert(src3->num < (1 << 11));
-               cat3->src3 = reg(src3, info, instr->repeat,
-                               IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF);
-       }
-
-       cat3->src3_neg = !!(src3->flags & IR3_REG_NEGATE);
-       cat3->src3_r   = !!(src3->flags & IR3_REG_R);
-
-       cat3->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
-       cat3->repeat   = instr->repeat;
-       cat3->ss       = !!(instr->flags & IR3_INSTR_SS);
-       cat3->ul       = !!(instr->flags & IR3_INSTR_UL);
-       cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF);
-       cat3->opc      = instr->opc;
-       cat3->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-       cat3->sync     = !!(instr->flags & IR3_INSTR_SY);
-       cat3->opc_cat  = 3;
-
-       return 0;
-}
-
-static int emit_cat4(struct ir3_instruction *instr, void *ptr,
-               struct ir3_shader_info *info)
-{
-       struct ir3_register *dst = instr->regs[0];
-       struct ir3_register *src = instr->regs[1];
-       instr_cat4_t *cat4 = ptr;
-
-       iassert(instr->regs_count == 2);
-
-       if (src->flags & IR3_REG_RELATIV) {
-               iassert(src->num < (1 << 10));
-               cat4->rel.src      = reg(src, info, instr->repeat,
-                               IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
-                               IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF);
-               cat4->rel.src_c    = !!(src->flags & IR3_REG_CONST);
-               cat4->rel.src_rel  = 1;
-       } else if (src->flags & IR3_REG_CONST) {
-               iassert(src->num < (1 << 12));
-               cat4->c.src   = reg(src, info, instr->repeat,
-                               IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS |
-                               IR3_REG_R | IR3_REG_HALF);
-               cat4->c.src_c = 1;
-       } else {
-               iassert(src->num < (1 << 11));
-               cat4->src = reg(src, info, instr->repeat,
-                               IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS |
-                               IR3_REG_R | IR3_REG_HALF);
-       }
-
-       cat4->src_im   = !!(src->flags & IR3_REG_IMMED);
-       cat4->src_neg  = !!(src->flags & IR3_REG_NEGATE);
-       cat4->src_abs  = !!(src->flags & IR3_REG_ABS);
-       cat4->src_r    = !!(src->flags & IR3_REG_R);
-
-       cat4->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
-       cat4->repeat   = instr->repeat;
-       cat4->ss       = !!(instr->flags & IR3_INSTR_SS);
-       cat4->ul       = !!(instr->flags & IR3_INSTR_UL);
-       cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF);
-       cat4->full     = ! (src->flags & IR3_REG_HALF);
-       cat4->opc      = instr->opc;
-       cat4->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-       cat4->sync     = !!(instr->flags & IR3_INSTR_SY);
-       cat4->opc_cat  = 4;
-
-       return 0;
-}
-
-static int emit_cat5(struct ir3_instruction *instr, void *ptr,
-               struct ir3_shader_info *info)
-{
-       struct ir3_register *dst = instr->regs[0];
-       struct ir3_register *src1 = instr->regs[1];
-       struct ir3_register *src2 = instr->regs[2];
-       struct ir3_register *src3 = instr->regs[3];
-       instr_cat5_t *cat5 = ptr;
-
-       iassert(!((dst->flags ^ type_flags(instr->cat5.type)) & IR3_REG_HALF));
-
-       if (src1) {
-               cat5->full = ! (src1->flags & IR3_REG_HALF);
-               cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
-       }
-
-
-       if (instr->flags & IR3_INSTR_S2EN) {
-               if (src2) {
-                       iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
-                       cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
-               }
-               if (src3) {
-                       iassert(src3->flags & IR3_REG_HALF);
-                       cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF);
-               }
-               iassert(!(instr->cat5.samp | instr->cat5.tex));
-       } else {
-               iassert(!src3);
-               if (src2) {
-                       iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
-                       cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
-               }
-               cat5->norm.samp = instr->cat5.samp;
-               cat5->norm.tex  = instr->cat5.tex;
-       }
-
-       cat5->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
-       cat5->wrmask   = dst->wrmask;
-       cat5->type     = instr->cat5.type;
-       cat5->is_3d    = !!(instr->flags & IR3_INSTR_3D);
-       cat5->is_a     = !!(instr->flags & IR3_INSTR_A);
-       cat5->is_s     = !!(instr->flags & IR3_INSTR_S);
-       cat5->is_s2en  = !!(instr->flags & IR3_INSTR_S2EN);
-       cat5->is_o     = !!(instr->flags & IR3_INSTR_O);
-       cat5->is_p     = !!(instr->flags & IR3_INSTR_P);
-       cat5->opc      = instr->opc;
-       cat5->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-       cat5->sync     = !!(instr->flags & IR3_INSTR_SY);
-       cat5->opc_cat  = 5;
-
-       return 0;
-}
-
-static int emit_cat6(struct ir3_instruction *instr, void *ptr,
-               struct ir3_shader_info *info)
-{
-       struct ir3_register *dst = instr->regs[0];
-       struct ir3_register *src = instr->regs[1];
-       instr_cat6_t *cat6 = ptr;
-
-       iassert(instr->regs_count == 2);
-
-       switch (instr->opc) {
-       /* load instructions: */
-       case OPC_LDG:
-       case OPC_LDP:
-       case OPC_LDL:
-       case OPC_LDLW:
-       case OPC_LDLV:
-       case OPC_PREFETCH: {
-               instr_cat6a_t *cat6a = ptr;
-
-               iassert(!((dst->flags ^ type_flags(instr->cat6.type)) & IR3_REG_HALF));
-
-               cat6a->must_be_one1  = 1;
-               cat6a->must_be_one2  = 1;
-               cat6a->off = instr->cat6.offset;
-               cat6a->src = reg(src, info, instr->repeat, 0);
-               cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
-               break;
-       }
-       /* store instructions: */
-       case OPC_STG:
-       case OPC_STP:
-       case OPC_STL:
-       case OPC_STLW:
-       case OPC_STI: {
-               instr_cat6b_t *cat6b = ptr;
-               uint32_t src_flags = type_flags(instr->cat6.type);
-               uint32_t dst_flags = (instr->opc == OPC_STI) ? IR3_REG_HALF : 0;
-
-               iassert(!((src->flags ^ src_flags) & IR3_REG_HALF));
-
-               cat6b->must_be_one1  = 1;
-               cat6b->must_be_one2  = 1;
-               cat6b->src    = reg(src, info, instr->repeat, src_flags);
-               cat6b->off_hi = instr->cat6.offset >> 8;
-               cat6b->off    = instr->cat6.offset;
-               cat6b->dst    = reg(dst, info, instr->repeat, IR3_REG_R | dst_flags);
-
-               break;
-       }
-       default:
-               // TODO
-               break;
-       }
-
-       cat6->iim_val  = instr->cat6.iim_val;
-       cat6->type     = instr->cat6.type;
-       cat6->opc      = instr->opc;
-       cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
-       cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
-       cat6->opc_cat  = 6;
-
-       return 0;
-}
-
-static int (*emit[])(struct ir3_instruction *instr, void *ptr,
-               struct ir3_shader_info *info) = {
-       emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
-};
-
-void * ir3_shader_assemble(struct ir3_shader *shader, struct ir3_shader_info *info)
-{
-       uint32_t *ptr, *dwords;
-       uint32_t i;
-
-       info->max_reg       = -1;
-       info->max_half_reg  = -1;
-       info->max_const     = -1;
-
-       /* need a integer number of instruction "groups" (sets of four
-        * instructions), so pad out w/ NOPs if needed:
-        * (each instruction is 64bits)
-        */
-       info->sizedwords = 2 * align(shader->instrs_count, 4);
-
-       ptr = dwords = calloc(1, 4 * info->sizedwords);
-
-       for (i = 0; i < shader->instrs_count; i++) {
-               struct ir3_instruction *instr = shader->instrs[i];
-               int ret = emit[instr->category](instr, dwords, info);
-               if (ret)
-                       goto fail;
-               dwords += 2;
-       }
-
-       return ptr;
-
-fail:
-       free(ptr);
-       return NULL;
-}
-
-static struct ir3_register * reg_create(struct ir3_shader *shader,
-               int num, int flags)
-{
-       struct ir3_register *reg =
-                       ir3_alloc(shader, sizeof(struct ir3_register));
-       reg->wrmask = 1;
-       reg->flags = flags;
-       reg->num = num;
-       return reg;
-}
-
-static void insert_instr(struct ir3_shader *shader,
-               struct ir3_instruction *instr)
-{
-       assert(shader->instrs_count < ARRAY_SIZE(shader->instrs));
-       shader->instrs[shader->instrs_count++] = instr;
-}
-
-struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader,
-               int category, opc_t opc)
-{
-       struct ir3_instruction *instr =
-                       ir3_alloc(shader, sizeof(struct ir3_instruction));
-       instr->shader = shader;
-       instr->category = category;
-       instr->opc = opc;
-       insert_instr(shader, instr);
-       return instr;
-}
-
-struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
-{
-       struct ir3_instruction *new_instr =
-                       ir3_alloc(instr->shader, sizeof(struct ir3_instruction));
-       unsigned i;
-
-       *new_instr = *instr;
-       insert_instr(instr->shader, new_instr);
-
-       /* clone registers: */
-       new_instr->regs_count = 0;
-       for (i = 0; i < instr->regs_count; i++) {
-               struct ir3_register *reg = instr->regs[i];
-               struct ir3_register *new_reg =
-                               ir3_reg_create(new_instr, reg->num, reg->flags);
-               *new_reg = *reg;
-       }
-
-       return new_instr;
-}
-
-struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
-               int num, int flags)
-{
-       struct ir3_register *reg = reg_create(instr->shader, num, flags);
-       assert(instr->regs_count < ARRAY_SIZE(instr->regs));
-       instr->regs[instr->regs_count++] = reg;
-       return reg;
-}
diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
deleted file mode 100644 (file)
index b0afe18..0000000
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef IR3_H_
-#define IR3_H_
-
-#include <stdint.h>
-#include <stdbool.h>
-
-#include "instr-a3xx.h"
-
-/* low level intermediate representation of an adreno shader program */
-
-struct ir3_shader;
-
-struct ir3_shader * fd_asm_parse(const char *src);
-
-struct ir3_shader_info {
-       uint16_t sizedwords;
-       /* NOTE: max_reg, etc, does not include registers not touched
-        * by the shader (ie. vertex fetched via VFD_DECODE but not
-        * touched by shader)
-        */
-       int8_t   max_reg;   /* highest GPR # used by shader */
-       int8_t   max_half_reg;
-       int8_t   max_const;
-};
-
-struct ir3_register {
-       enum {
-               IR3_REG_CONST  = 0x001,
-               IR3_REG_IMMED  = 0x002,
-               IR3_REG_HALF   = 0x004,
-               IR3_REG_RELATIV= 0x008,
-               IR3_REG_R      = 0x010,
-               IR3_REG_NEGATE = 0x020,
-               IR3_REG_ABS    = 0x040,
-               IR3_REG_EVEN   = 0x080,
-               IR3_REG_POS_INF= 0x100,
-               /* (ei) flag, end-input?  Set on last bary, presumably to signal
-                * that the shader needs no more input:
-                */
-               IR3_REG_EI     = 0x200,
-       } flags;
-       union {
-               /* normal registers:
-                * the component is in the low two bits of the reg #, so
-                * rN.x becomes: (N << 2) | x
-                */
-               int num;
-               /* immediate: */
-               int     iim_val;
-               float   fim_val;
-               /* relative: */
-               int offset;
-       };
-
-       /* used for cat5 instructions, but also for internal/IR level
-        * tracking of what registers are read/written by an instruction.
-        * wrmask may be a bad name since it is used to represent both
-        * src and dst that touch multiple adjacent registers.
-        */
-       int wrmask;
-};
-
-struct ir3_instruction {
-       struct ir3_shader *shader;
-       int category;
-       opc_t opc;
-       enum {
-               /* (sy) flag is set on first instruction, and after sample
-                * instructions (probably just on RAW hazard).
-                */
-               IR3_INSTR_SY    = 0x001,
-               /* (ss) flag is set on first instruction, and first instruction
-                * to depend on the result of "long" instructions (RAW hazard):
-                *
-                *   rcp, rsq, log2, exp2, sin, cos, sqrt
-                *
-                * It seems to synchronize until all in-flight instructions are
-                * completed, for example:
-                *
-                *   rsq hr1.w, hr1.w
-                *   add.f hr2.z, (neg)hr2.z, hc0.y
-                *   mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
-                *   rsq hr2.x, hr2.x
-                *   (rpt1)nop
-                *   mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
-                *   nop
-                *   mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
-                *   (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
-                *   (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
-                *
-                * The last mul.f does not have (ss) set, presumably because the
-                * (ss) on the previous instruction does the job.
-                *
-                * The blob driver also seems to set it on WAR hazards, although
-                * not really clear if this is needed or just blob compiler being
-                * sloppy.  So far I haven't found a case where removing the (ss)
-                * causes problems for WAR hazard, but I could just be getting
-                * lucky:
-                *
-                *   rcp r1.y, r3.y
-                *   (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
-                *
-                */
-               IR3_INSTR_SS    = 0x002,
-               /* (jp) flag is set on jump targets:
-                */
-               IR3_INSTR_JP    = 0x004,
-               IR3_INSTR_UL    = 0x008,
-               IR3_INSTR_3D    = 0x010,
-               IR3_INSTR_A     = 0x020,
-               IR3_INSTR_O     = 0x040,
-               IR3_INSTR_P     = 0x080,
-               IR3_INSTR_S     = 0x100,
-               IR3_INSTR_S2EN  = 0x200,
-       } flags;
-       int repeat;
-       unsigned regs_count;
-       struct ir3_register *regs[4];
-       union {
-               struct {
-                       char inv;
-                       char comp;
-                       int  immed;
-               } cat0;
-               struct {
-                       type_t src_type, dst_type;
-               } cat1;
-               struct {
-                       enum {
-                               IR3_COND_LT = 0,
-                               IR3_COND_LE = 1,
-                               IR3_COND_GT = 2,
-                               IR3_COND_GE = 3,
-                               IR3_COND_EQ = 4,
-                               IR3_COND_NE = 5,
-                       } condition;
-               } cat2;
-               struct {
-                       unsigned samp, tex;
-                       type_t type;
-               } cat5;
-               struct {
-                       type_t type;
-                       int offset;
-                       int iim_val;
-               } cat6;
-       };
-};
-
-#define MAX_INSTRS 1024
-
-struct ir3_shader {
-       unsigned instrs_count;
-       struct ir3_instruction *instrs[MAX_INSTRS];
-       uint32_t heap[128 * MAX_INSTRS];
-       unsigned heap_idx;
-};
-
-struct ir3_shader * ir3_shader_create(void);
-void ir3_shader_destroy(struct ir3_shader *shader);
-void * ir3_shader_assemble(struct ir3_shader *shader,
-               struct ir3_shader_info *info);
-
-struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader,
-               int category, opc_t opc);
-struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
-
-struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
-               int num, int flags);
-
-#endif /* IR3_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.c b/src/gallium/drivers/freedreno/a3xx/ir3.c
new file mode 100644 (file)
index 0000000..2a06d42
--- /dev/null
@@ -0,0 +1,640 @@
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ir3.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <errno.h>
+
+#include "freedreno_util.h"
+#include "instr-a3xx.h"
+
+/* simple allocator to carve allocations out of an up-front allocated heap,
+ * so that we can free everything easily in one shot.
+ */
+static void * ir3_alloc(struct ir3_shader *shader, int sz)
+{
+       void *ptr = &shader->heap[shader->heap_idx];
+       shader->heap_idx += align(sz, 4);
+       return ptr;
+}
+
+struct ir3_shader * ir3_shader_create(void)
+{
+       return calloc(1, sizeof(struct ir3_shader));
+}
+
+void ir3_shader_destroy(struct ir3_shader *shader)
+{
+       free(shader);
+}
+
+#define iassert(cond) do { \
+       if (!(cond)) { \
+               assert(cond); \
+               return -1; \
+       } } while (0)
+
+static uint32_t reg(struct ir3_register *reg, struct ir3_shader_info *info,
+               uint32_t repeat, uint32_t valid_flags)
+{
+       reg_t val = { .dummy32 = 0 };
+
+       assert(!(reg->flags & ~valid_flags));
+
+       if (!(reg->flags & IR3_REG_R))
+               repeat = 0;
+
+       if (reg->flags & IR3_REG_IMMED) {
+               val.iim_val = reg->iim_val;
+       } else {
+               int8_t components = util_last_bit(reg->wrmask);
+               int8_t max = (reg->num + repeat + components - 1) >> 2;
+
+               val.comp = reg->num & 0x3;
+               val.num  = reg->num >> 2;
+
+               if (reg->flags & IR3_REG_CONST) {
+                       info->max_const = MAX2(info->max_const, max);
+               } else if ((max != REG_A0) && (max != REG_P0)) {
+                       if (reg->flags & IR3_REG_HALF) {
+                               info->max_half_reg = MAX2(info->max_half_reg, max);
+                       } else {
+                               info->max_reg = MAX2(info->max_reg, max);
+                       }
+               }
+       }
+
+       return val.dummy32;
+}
+
+static int emit_cat0(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info)
+{
+       instr_cat0_t *cat0 = ptr;
+
+       cat0->immed    = instr->cat0.immed;
+       cat0->repeat   = instr->repeat;
+       cat0->ss       = !!(instr->flags & IR3_INSTR_SS);
+       cat0->inv      = instr->cat0.inv;
+       cat0->comp     = instr->cat0.comp;
+       cat0->opc      = instr->opc;
+       cat0->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat0->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat0->opc_cat  = 0;
+
+       return 0;
+}
+
+static uint32_t type_flags(type_t type)
+{
+       return (type_size(type) == 32) ? 0 : IR3_REG_HALF;
+}
+
+static int emit_cat1(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info)
+{
+       struct ir3_register *dst = instr->regs[0];
+       struct ir3_register *src = instr->regs[1];
+       instr_cat1_t *cat1 = ptr;
+
+       iassert(instr->regs_count == 2);
+       iassert(!((dst->flags ^ type_flags(instr->cat1.dst_type)) & IR3_REG_HALF));
+       iassert((src->flags & IR3_REG_IMMED) ||
+                       !((src->flags ^ type_flags(instr->cat1.src_type)) & IR3_REG_HALF));
+
+       if (src->flags & IR3_REG_IMMED) {
+               cat1->iim_val = src->iim_val;
+               cat1->src_im  = 1;
+       } else if (src->flags & IR3_REG_RELATIV) {
+               cat1->off       = src->offset;
+               cat1->src_rel   = 1;
+               cat1->src_rel_c = !!(src->flags & IR3_REG_CONST);
+       } else {
+               cat1->src  = reg(src, info, instr->repeat,
+                               IR3_REG_IMMED | IR3_REG_R |
+                               IR3_REG_CONST | IR3_REG_HALF);
+               cat1->src_c     = !!(src->flags & IR3_REG_CONST);
+       }
+
+       cat1->dst      = reg(dst, info, instr->repeat,
+                       IR3_REG_RELATIV | IR3_REG_EVEN |
+                       IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF);
+       cat1->repeat   = instr->repeat;
+       cat1->src_r    = !!(src->flags & IR3_REG_R);
+       cat1->ss       = !!(instr->flags & IR3_INSTR_SS);
+       cat1->ul       = !!(instr->flags & IR3_INSTR_UL);
+       cat1->dst_type = instr->cat1.dst_type;
+       cat1->dst_rel  = !!(dst->flags & IR3_REG_RELATIV);
+       cat1->src_type = instr->cat1.src_type;
+       cat1->even     = !!(dst->flags & IR3_REG_EVEN);
+       cat1->pos_inf  = !!(dst->flags & IR3_REG_POS_INF);
+       cat1->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat1->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat1->opc_cat  = 1;
+
+       return 0;
+}
+
+static int emit_cat2(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info)
+{
+       struct ir3_register *dst = instr->regs[0];
+       struct ir3_register *src1 = instr->regs[1];
+       struct ir3_register *src2 = instr->regs[2];
+       instr_cat2_t *cat2 = ptr;
+
+       iassert((instr->regs_count == 2) || (instr->regs_count == 3));
+
+       if (src1->flags & IR3_REG_RELATIV) {
+               iassert(src1->num < (1 << 10));
+               cat2->rel1.src1      = reg(src1, info, instr->repeat,
+                               IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
+                               IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF);
+               cat2->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
+               cat2->rel1.src1_rel  = 1;
+       } else if (src1->flags & IR3_REG_CONST) {
+               iassert(src1->num < (1 << 12));
+               cat2->c1.src1   = reg(src1, info, instr->repeat,
+                               IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS |
+                               IR3_REG_R | IR3_REG_HALF);
+               cat2->c1.src1_c = 1;
+       } else {
+               iassert(src1->num < (1 << 11));
+               cat2->src1 = reg(src1, info, instr->repeat,
+                               IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS |
+                               IR3_REG_R | IR3_REG_HALF);
+       }
+       cat2->src1_im  = !!(src1->flags & IR3_REG_IMMED);
+       cat2->src1_neg = !!(src1->flags & IR3_REG_NEGATE);
+       cat2->src1_abs = !!(src1->flags & IR3_REG_ABS);
+       cat2->src1_r   = !!(src1->flags & IR3_REG_R);
+
+       if (src2) {
+               iassert((src2->flags & IR3_REG_IMMED) ||
+                               !((src1->flags ^ src2->flags) & IR3_REG_HALF));
+
+               if (src2->flags & IR3_REG_RELATIV) {
+                       iassert(src2->num < (1 << 10));
+                       cat2->rel2.src2      = reg(src2, info, instr->repeat,
+                                       IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
+                                       IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF);
+                       cat2->rel2.src2_c    = !!(src2->flags & IR3_REG_CONST);
+                       cat2->rel2.src2_rel  = 1;
+               } else if (src2->flags & IR3_REG_CONST) {
+                       iassert(src2->num < (1 << 12));
+                       cat2->c2.src2   = reg(src2, info, instr->repeat,
+                                       IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS |
+                                       IR3_REG_R | IR3_REG_HALF);
+                       cat2->c2.src2_c = 1;
+               } else {
+                       iassert(src2->num < (1 << 11));
+                       cat2->src2 = reg(src2, info, instr->repeat,
+                                       IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS |
+                                       IR3_REG_R | IR3_REG_HALF);
+               }
+
+               cat2->src2_im  = !!(src2->flags & IR3_REG_IMMED);
+               cat2->src2_neg = !!(src2->flags & IR3_REG_NEGATE);
+               cat2->src2_abs = !!(src2->flags & IR3_REG_ABS);
+               cat2->src2_r   = !!(src2->flags & IR3_REG_R);
+       }
+
+       cat2->dst      = reg(dst, info, instr->repeat,
+                       IR3_REG_R | IR3_REG_EI | IR3_REG_HALF);
+       cat2->repeat   = instr->repeat;
+       cat2->ss       = !!(instr->flags & IR3_INSTR_SS);
+       cat2->ul       = !!(instr->flags & IR3_INSTR_UL);
+       cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF);
+       cat2->ei       = !!(dst->flags & IR3_REG_EI);
+       cat2->cond     = instr->cat2.condition;
+       cat2->full     = ! (src1->flags & IR3_REG_HALF);
+       cat2->opc      = instr->opc;
+       cat2->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat2->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat2->opc_cat  = 2;
+
+       return 0;
+}
+
+static int emit_cat3(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info)
+{
+       struct ir3_register *dst = instr->regs[0];
+       struct ir3_register *src1 = instr->regs[1];
+       struct ir3_register *src2 = instr->regs[2];
+       struct ir3_register *src3 = instr->regs[3];
+       instr_cat3_t *cat3 = ptr;
+       uint32_t src_flags = 0;
+
+       switch (instr->opc) {
+       case OPC_MAD_F16:
+       case OPC_MAD_U16:
+       case OPC_MAD_S16:
+       case OPC_SEL_B16:
+       case OPC_SEL_S16:
+       case OPC_SEL_F16:
+       case OPC_SAD_S16:
+       case OPC_SAD_S32:  // really??
+               src_flags |= IR3_REG_HALF;
+               break;
+       default:
+               break;
+       }
+
+       iassert(instr->regs_count == 4);
+       iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF));
+       iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF));
+       iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
+
+       if (src1->flags & IR3_REG_RELATIV) {
+               iassert(src1->num < (1 << 10));
+               cat3->rel1.src1      = reg(src1, info, instr->repeat,
+                               IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
+                               IR3_REG_R | IR3_REG_HALF);
+               cat3->rel1.src1_c    = !!(src1->flags & IR3_REG_CONST);
+               cat3->rel1.src1_rel  = 1;
+       } else if (src1->flags & IR3_REG_CONST) {
+               iassert(src1->num < (1 << 12));
+               cat3->c1.src1   = reg(src1, info, instr->repeat,
+                               IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_R |
+                               IR3_REG_HALF);
+               cat3->c1.src1_c = 1;
+       } else {
+               iassert(src1->num < (1 << 11));
+               cat3->src1 = reg(src1, info, instr->repeat,
+                               IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF);
+       }
+
+       cat3->src1_neg = !!(src1->flags & IR3_REG_NEGATE);
+       cat3->src1_r   = !!(src1->flags & IR3_REG_R);
+
+       cat3->src2     = reg(src2, info, instr->repeat,
+                       IR3_REG_CONST | IR3_REG_NEGATE |
+                       IR3_REG_R | IR3_REG_HALF);
+       cat3->src2_c   = !!(src2->flags & IR3_REG_CONST);
+       cat3->src2_neg = !!(src2->flags & IR3_REG_NEGATE);
+       cat3->src2_r   = !!(src2->flags & IR3_REG_R);
+
+
+       if (src3->flags & IR3_REG_RELATIV) {
+               iassert(src3->num < (1 << 10));
+               cat3->rel2.src3      = reg(src3, info, instr->repeat,
+                               IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
+                               IR3_REG_R | IR3_REG_HALF);
+               cat3->rel2.src3_c    = !!(src3->flags & IR3_REG_CONST);
+               cat3->rel2.src3_rel  = 1;
+       } else if (src3->flags & IR3_REG_CONST) {
+               iassert(src3->num < (1 << 12));
+               cat3->c2.src3   = reg(src3, info, instr->repeat,
+                               IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_R |
+                               IR3_REG_HALF);
+               cat3->c2.src3_c = 1;
+       } else {
+               iassert(src3->num < (1 << 11));
+               cat3->src3 = reg(src3, info, instr->repeat,
+                               IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF);
+       }
+
+       cat3->src3_neg = !!(src3->flags & IR3_REG_NEGATE);
+       cat3->src3_r   = !!(src3->flags & IR3_REG_R);
+
+       cat3->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+       cat3->repeat   = instr->repeat;
+       cat3->ss       = !!(instr->flags & IR3_INSTR_SS);
+       cat3->ul       = !!(instr->flags & IR3_INSTR_UL);
+       cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF);
+       cat3->opc      = instr->opc;
+       cat3->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat3->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat3->opc_cat  = 3;
+
+       return 0;
+}
+
+static int emit_cat4(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info)
+{
+       struct ir3_register *dst = instr->regs[0];
+       struct ir3_register *src = instr->regs[1];
+       instr_cat4_t *cat4 = ptr;
+
+       iassert(instr->regs_count == 2);
+
+       if (src->flags & IR3_REG_RELATIV) {
+               iassert(src->num < (1 << 10));
+               cat4->rel.src      = reg(src, info, instr->repeat,
+                               IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_NEGATE |
+                               IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF);
+               cat4->rel.src_c    = !!(src->flags & IR3_REG_CONST);
+               cat4->rel.src_rel  = 1;
+       } else if (src->flags & IR3_REG_CONST) {
+               iassert(src->num < (1 << 12));
+               cat4->c.src   = reg(src, info, instr->repeat,
+                               IR3_REG_CONST | IR3_REG_NEGATE | IR3_REG_ABS |
+                               IR3_REG_R | IR3_REG_HALF);
+               cat4->c.src_c = 1;
+       } else {
+               iassert(src->num < (1 << 11));
+               cat4->src = reg(src, info, instr->repeat,
+                               IR3_REG_IMMED | IR3_REG_NEGATE | IR3_REG_ABS |
+                               IR3_REG_R | IR3_REG_HALF);
+       }
+
+       cat4->src_im   = !!(src->flags & IR3_REG_IMMED);
+       cat4->src_neg  = !!(src->flags & IR3_REG_NEGATE);
+       cat4->src_abs  = !!(src->flags & IR3_REG_ABS);
+       cat4->src_r    = !!(src->flags & IR3_REG_R);
+
+       cat4->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+       cat4->repeat   = instr->repeat;
+       cat4->ss       = !!(instr->flags & IR3_INSTR_SS);
+       cat4->ul       = !!(instr->flags & IR3_INSTR_UL);
+       cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF);
+       cat4->full     = ! (src->flags & IR3_REG_HALF);
+       cat4->opc      = instr->opc;
+       cat4->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat4->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat4->opc_cat  = 4;
+
+       return 0;
+}
+
+static int emit_cat5(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info)
+{
+       struct ir3_register *dst = instr->regs[0];
+       struct ir3_register *src1 = instr->regs[1];
+       struct ir3_register *src2 = instr->regs[2];
+       struct ir3_register *src3 = instr->regs[3];
+       instr_cat5_t *cat5 = ptr;
+
+       iassert(!((dst->flags ^ type_flags(instr->cat5.type)) & IR3_REG_HALF));
+
+       if (src1) {
+               cat5->full = ! (src1->flags & IR3_REG_HALF);
+               cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
+       }
+
+
+       if (instr->flags & IR3_INSTR_S2EN) {
+               if (src2) {
+                       iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+                       cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
+               }
+               if (src3) {
+                       iassert(src3->flags & IR3_REG_HALF);
+                       cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF);
+               }
+               iassert(!(instr->cat5.samp | instr->cat5.tex));
+       } else {
+               iassert(!src3);
+               if (src2) {
+                       iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+                       cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
+               }
+               cat5->norm.samp = instr->cat5.samp;
+               cat5->norm.tex  = instr->cat5.tex;
+       }
+
+       cat5->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+       cat5->wrmask   = dst->wrmask;
+       cat5->type     = instr->cat5.type;
+       cat5->is_3d    = !!(instr->flags & IR3_INSTR_3D);
+       cat5->is_a     = !!(instr->flags & IR3_INSTR_A);
+       cat5->is_s     = !!(instr->flags & IR3_INSTR_S);
+       cat5->is_s2en  = !!(instr->flags & IR3_INSTR_S2EN);
+       cat5->is_o     = !!(instr->flags & IR3_INSTR_O);
+       cat5->is_p     = !!(instr->flags & IR3_INSTR_P);
+       cat5->opc      = instr->opc;
+       cat5->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat5->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat5->opc_cat  = 5;
+
+       return 0;
+}
+
+static int emit_cat6(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info)
+{
+       struct ir3_register *dst = instr->regs[0];
+       struct ir3_register *src = instr->regs[1];
+       instr_cat6_t *cat6 = ptr;
+
+       iassert(instr->regs_count == 2);
+
+       switch (instr->opc) {
+       /* load instructions: */
+       case OPC_LDG:
+       case OPC_LDP:
+       case OPC_LDL:
+       case OPC_LDLW:
+       case OPC_LDLV:
+       case OPC_PREFETCH: {
+               instr_cat6a_t *cat6a = ptr;
+
+               iassert(!((dst->flags ^ type_flags(instr->cat6.type)) & IR3_REG_HALF));
+
+               cat6a->must_be_one1  = 1;
+               cat6a->must_be_one2  = 1;
+               cat6a->off = instr->cat6.offset;
+               cat6a->src = reg(src, info, instr->repeat, 0);
+               cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+               break;
+       }
+       /* store instructions: */
+       case OPC_STG:
+       case OPC_STP:
+       case OPC_STL:
+       case OPC_STLW:
+       case OPC_STI: {
+               instr_cat6b_t *cat6b = ptr;
+               uint32_t src_flags = type_flags(instr->cat6.type);
+               uint32_t dst_flags = (instr->opc == OPC_STI) ? IR3_REG_HALF : 0;
+
+               iassert(!((src->flags ^ src_flags) & IR3_REG_HALF));
+
+               cat6b->must_be_one1  = 1;
+               cat6b->must_be_one2  = 1;
+               cat6b->src    = reg(src, info, instr->repeat, src_flags);
+               cat6b->off_hi = instr->cat6.offset >> 8;
+               cat6b->off    = instr->cat6.offset;
+               cat6b->dst    = reg(dst, info, instr->repeat, IR3_REG_R | dst_flags);
+
+               break;
+       }
+       default:
+               // TODO
+               break;
+       }
+
+       cat6->iim_val  = instr->cat6.iim_val;
+       cat6->type     = instr->cat6.type;
+       cat6->opc      = instr->opc;
+       cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat6->opc_cat  = 6;
+
+       return 0;
+}
+
+static int (*emit[])(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info) = {
+       emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
+};
+
+void * ir3_shader_assemble(struct ir3_shader *shader, struct ir3_shader_info *info)
+{
+       uint32_t *ptr, *dwords;
+       uint32_t i;
+
+       info->max_reg       = -1;
+       info->max_half_reg  = -1;
+       info->max_const     = -1;
+       info->instrs_count  = 0;
+
+       /* need a integer number of instruction "groups" (sets of four
+        * instructions), so pad out w/ NOPs if needed:
+        * (each instruction is 64bits)
+        */
+       info->sizedwords = 2 * align(shader->instrs_count, 4);
+
+       ptr = dwords = calloc(1, 4 * info->sizedwords);
+
+       for (i = 0; i < shader->instrs_count; i++) {
+               struct ir3_instruction *instr = shader->instrs[i];
+               int ret = emit[instr->category](instr, dwords, info);
+               if (ret)
+                       goto fail;
+               info->instrs_count += 1 + instr->repeat;
+               dwords += 2;
+       }
+
+       return ptr;
+
+fail:
+       free(ptr);
+       return NULL;
+}
+
+static struct ir3_register * reg_create(struct ir3_shader *shader,
+               int num, int flags)
+{
+       struct ir3_register *reg =
+                       ir3_alloc(shader, sizeof(struct ir3_register));
+       reg->wrmask = 1;
+       reg->flags = flags;
+       reg->num = num;
+       return reg;
+}
+
+static void insert_instr(struct ir3_shader *shader,
+               struct ir3_instruction *instr)
+{
+#ifdef DEBUG
+       static uint32_t serialno = 0;
+       instr->serialno = ++serialno;
+#endif
+       assert(shader->instrs_count < ARRAY_SIZE(shader->instrs));
+       shader->instrs[shader->instrs_count++] = instr;
+}
+
+struct ir3_block * ir3_block_create(struct ir3_shader *shader,
+               unsigned ntmp, unsigned nin, unsigned nout)
+{
+       struct ir3_block *block;
+       unsigned size;
+       char *ptr;
+
+       size = sizeof(*block);
+       size += sizeof(block->temporaries[0]) * ntmp;
+       size += sizeof(block->inputs[0]) * nin;
+       size += sizeof(block->outputs[0]) * nout;
+
+       ptr = ir3_alloc(shader, size);
+
+       block = (void *)ptr;
+       ptr += sizeof(*block);
+
+       block->temporaries = (void *)ptr;
+       block->ntemporaries = ntmp;
+       ptr += sizeof(block->temporaries[0]) * ntmp;
+
+       block->inputs = (void *)ptr;
+       block->ninputs = nin;
+       ptr += sizeof(block->inputs[0]) * nin;
+
+       block->outputs = (void *)ptr;
+       block->noutputs = nout;
+       ptr += sizeof(block->outputs[0]) * nout;
+
+       block->shader = shader;
+
+       return block;
+}
+
+struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
+               int category, opc_t opc)
+{
+       struct ir3_instruction *instr =
+                       ir3_alloc(block->shader, sizeof(struct ir3_instruction));
+       instr->block = block;
+       instr->category = category;
+       instr->opc = opc;
+       insert_instr(block->shader, instr);
+       return instr;
+}
+
+struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
+{
+       struct ir3_instruction *new_instr =
+                       ir3_alloc(instr->block->shader, sizeof(struct ir3_instruction));
+       unsigned i;
+
+       *new_instr = *instr;
+       insert_instr(instr->block->shader, new_instr);
+
+       /* clone registers: */
+       new_instr->regs_count = 0;
+       for (i = 0; i < instr->regs_count; i++) {
+               struct ir3_register *reg = instr->regs[i];
+               struct ir3_register *new_reg =
+                               ir3_reg_create(new_instr, reg->num, reg->flags);
+               *new_reg = *reg;
+       }
+
+       return new_instr;
+}
+
+struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
+               int num, int flags)
+{
+       struct ir3_register *reg = reg_create(instr->block->shader, num, flags);
+       assert(instr->regs_count < ARRAY_SIZE(instr->regs));
+       instr->regs[instr->regs_count++] = reg;
+       return reg;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/ir3.h b/src/gallium/drivers/freedreno/a3xx/ir3.h
new file mode 100644 (file)
index 0000000..896bec1
--- /dev/null
@@ -0,0 +1,336 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IR3_H_
+#define IR3_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "instr-a3xx.h"
+#include "disasm.h"  /* TODO move 'enum shader_t' somewhere else.. */
+
+/* low level intermediate representation of an adreno shader program */
+
+struct ir3_shader;
+struct ir3_instruction;
+struct ir3_block;
+
+struct ir3_shader * fd_asm_parse(const char *src);
+
+struct ir3_shader_info {
+       uint16_t sizedwords;
+       uint16_t instrs_count;   /* expanded to account for rpt's */
+       /* NOTE: max_reg, etc, does not include registers not touched
+        * by the shader (ie. vertex fetched via VFD_DECODE but not
+        * touched by shader)
+        */
+       int8_t   max_reg;   /* highest GPR # used by shader */
+       int8_t   max_half_reg;
+       int8_t   max_const;
+};
+
+struct ir3_register {
+       enum {
+               IR3_REG_CONST  = 0x001,
+               IR3_REG_IMMED  = 0x002,
+               IR3_REG_HALF   = 0x004,
+               IR3_REG_RELATIV= 0x008,
+               IR3_REG_R      = 0x010,
+               IR3_REG_NEGATE = 0x020,
+               IR3_REG_ABS    = 0x040,
+               IR3_REG_EVEN   = 0x080,
+               IR3_REG_POS_INF= 0x100,
+               /* (ei) flag, end-input?  Set on last bary, presumably to signal
+                * that the shader needs no more input:
+                */
+               IR3_REG_EI     = 0x200,
+       } flags;
+       union {
+               /* normal registers:
+                * the component is in the low two bits of the reg #, so
+                * rN.x becomes: (N << 2) | x
+                */
+               int num;
+               /* immediate: */
+               int     iim_val;
+               float   fim_val;
+               /* relative: */
+               int offset;
+       };
+
+       /* used for cat5 instructions, but also for internal/IR level
+        * tracking of what registers are read/written by an instruction.
+        * wrmask may be a bad name since it is used to represent both
+        * src and dst that touch multiple adjacent registers.
+        */
+       int wrmask;
+};
+
+struct ir3_instruction {
+       struct ir3_block *block;
+       int category;
+       opc_t opc;
+       enum {
+               /* (sy) flag is set on first instruction, and after sample
+                * instructions (probably just on RAW hazard).
+                */
+               IR3_INSTR_SY    = 0x001,
+               /* (ss) flag is set on first instruction, and first instruction
+                * to depend on the result of "long" instructions (RAW hazard):
+                *
+                *   rcp, rsq, log2, exp2, sin, cos, sqrt
+                *
+                * It seems to synchronize until all in-flight instructions are
+                * completed, for example:
+                *
+                *   rsq hr1.w, hr1.w
+                *   add.f hr2.z, (neg)hr2.z, hc0.y
+                *   mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
+                *   rsq hr2.x, hr2.x
+                *   (rpt1)nop
+                *   mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
+                *   nop
+                *   mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
+                *   (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
+                *   (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
+                *
+                * The last mul.f does not have (ss) set, presumably because the
+                * (ss) on the previous instruction does the job.
+                *
+                * The blob driver also seems to set it on WAR hazards, although
+                * not really clear if this is needed or just blob compiler being
+                * sloppy.  So far I haven't found a case where removing the (ss)
+                * causes problems for WAR hazard, but I could just be getting
+                * lucky:
+                *
+                *   rcp r1.y, r3.y
+                *   (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
+                *
+                */
+               IR3_INSTR_SS    = 0x002,
+               /* (jp) flag is set on jump targets:
+                */
+               IR3_INSTR_JP    = 0x004,
+               IR3_INSTR_UL    = 0x008,
+               IR3_INSTR_3D    = 0x010,
+               IR3_INSTR_A     = 0x020,
+               IR3_INSTR_O     = 0x040,
+               IR3_INSTR_P     = 0x080,
+               IR3_INSTR_S     = 0x100,
+               IR3_INSTR_S2EN  = 0x200,
+       } flags;
+       int repeat;
+       unsigned regs_count;
+       struct ir3_register *regs[5];
+       union {
+               struct {
+                       char inv;
+                       char comp;
+                       int  immed;
+               } cat0;
+               struct {
+                       type_t src_type, dst_type;
+               } cat1;
+               struct {
+                       enum {
+                               IR3_COND_LT = 0,
+                               IR3_COND_LE = 1,
+                               IR3_COND_GT = 2,
+                               IR3_COND_GE = 3,
+                               IR3_COND_EQ = 4,
+                               IR3_COND_NE = 5,
+                       } condition;
+               } cat2;
+               struct {
+                       unsigned samp, tex;
+                       type_t type;
+               } cat5;
+               struct {
+                       type_t type;
+                       int offset;
+                       int iim_val;
+               } cat6;
+       };
+#ifdef DEBUG
+       uint32_t serialno;
+#endif
+};
+
+#define MAX_INSTRS 1024
+
+struct ir3_shader {
+       unsigned instrs_count;
+       struct ir3_instruction *instrs[MAX_INSTRS];
+       uint32_t heap[128 * MAX_INSTRS];
+       unsigned heap_idx;
+};
+
+struct ir3_block {
+       struct ir3_shader *shader;
+       unsigned ntemporaries, ninputs, noutputs;
+       /* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */
+       struct ir3_instruction **temporaries;
+       struct ir3_instruction **inputs;
+       struct ir3_instruction **outputs;
+       struct ir3_block *parent;
+       struct ir3_instruction *head;
+};
+
+struct ir3_shader * ir3_shader_create(void);
+void ir3_shader_destroy(struct ir3_shader *shader);
+void * ir3_shader_assemble(struct ir3_shader *shader,
+               struct ir3_shader_info *info);
+
+struct ir3_block * ir3_block_create(struct ir3_shader *shader,
+               unsigned ntmp, unsigned nin, unsigned nout);
+
+struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
+               int category, opc_t opc);
+struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
+
+struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
+               int num, int flags);
+
+
+/* comp:
+ *   0 - x
+ *   1 - y
+ *   2 - z
+ *   3 - w
+ */
+static inline uint32_t regid(int num, int comp)
+{
+       return (num << 2) | (comp & 0x3);
+}
+
+static inline uint32_t reg_num(struct ir3_register *reg)
+{
+       return reg->num >> 2;
+}
+
+static inline uint32_t reg_comp(struct ir3_register *reg)
+{
+       return reg->num & 0x3;
+}
+
+static inline bool is_alu(struct ir3_instruction *instr)
+{
+       return (1 <= instr->category) && (instr->category <= 3);
+}
+
+static inline bool is_sfu(struct ir3_instruction *instr)
+{
+       return (instr->category == 4);
+}
+
+static inline bool is_tex(struct ir3_instruction *instr)
+{
+       return (instr->category == 5);
+}
+
+static inline bool is_input(struct ir3_instruction *instr)
+{
+       return (instr->category == 2) && (instr->opc == OPC_BARY_F);
+}
+
+static inline bool is_gpr(struct ir3_register *reg)
+{
+       return !(reg->flags & (IR3_REG_CONST | IR3_REG_IMMED));
+}
+
+/* TODO combine is_gpr()/reg_gpr().. */
+static inline bool reg_gpr(struct ir3_register *r)
+{
+       if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV))
+               return false;
+       if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
+               return false;
+       return true;
+}
+
+#ifndef ARRAY_SIZE
+#  define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+/* ************************************************************************* */
+/* split this out or find some helper to use.. like main/bitset.h.. */
+
+#include <string.h>
+
+#define MAX_REG 256
+
+typedef uint8_t regmask_t[2 * MAX_REG / 8];
+
+static inline unsigned regmask_idx(struct ir3_register *reg)
+{
+       unsigned num = reg->num;
+       assert(num < MAX_REG);
+       if (reg->flags & IR3_REG_HALF)
+               num += MAX_REG;
+       return num;
+}
+
+static inline void regmask_init(regmask_t *regmask)
+{
+       memset(regmask, 0, sizeof(*regmask));
+}
+
+static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
+{
+       unsigned idx = regmask_idx(reg);
+       unsigned i;
+       for (i = 0; i < 4; i++, idx++)
+               if (reg->wrmask & (1 << i))
+                       (*regmask)[idx / 8] |= 1 << (idx % 8);
+}
+
+/* set bits in a if not set in b, conceptually:
+ *   a |= (reg & ~b)
+ */
+static inline void regmask_set_if_not(regmask_t *a,
+               struct ir3_register *reg, regmask_t *b)
+{
+       unsigned idx = regmask_idx(reg);
+       unsigned i;
+       for (i = 0; i < 4; i++, idx++)
+               if (reg->wrmask & (1 << i))
+                       if (!((*b)[idx / 8] & (1 << (idx % 8))))
+                               (*a)[idx / 8] |= 1 << (idx % 8);
+}
+
+static inline unsigned regmask_get(regmask_t *regmask,
+               struct ir3_register *reg)
+{
+       unsigned idx = regmask_idx(reg);
+       unsigned i;
+       for (i = 0; i < 4; i++, idx++)
+               if (reg->wrmask & (1 << i))
+                       if ((*regmask)[idx / 8] & (1 << (idx % 8)))
+                               return true;
+       return false;
+}
+
+/* ************************************************************************* */
+
+#endif /* IR3_H_ */