ilo: add a toy shader compiler
authorChia-I Wu <olv@lunarg.com>
Wed, 12 Dec 2012 21:48:28 +0000 (05:48 +0800)
committerChia-I Wu <olvaffe@gmail.com>
Fri, 26 Apr 2013 08:20:52 +0000 (16:20 +0800)
This is a simple shader compiler that performs almost zero optimizations.  The
generated code is usually much larger comparing to that generated by i965.
The generated code also requires many more registers.

Function-wise, it lacks register spilling and does not support most TGSI
indirections.  Other than those, it works alright.

14 files changed:
src/gallium/drivers/ilo/Makefile.sources
src/gallium/drivers/ilo/shader/toy_compiler.c [new file with mode: 0644]
src/gallium/drivers/ilo/shader/toy_compiler.h [new file with mode: 0644]
src/gallium/drivers/ilo/shader/toy_compiler_asm.c [new file with mode: 0644]
src/gallium/drivers/ilo/shader/toy_compiler_disasm.c [new file with mode: 0644]
src/gallium/drivers/ilo/shader/toy_compiler_reg.h [new file with mode: 0644]
src/gallium/drivers/ilo/shader/toy_helpers.h [new file with mode: 0644]
src/gallium/drivers/ilo/shader/toy_legalize.c [new file with mode: 0644]
src/gallium/drivers/ilo/shader/toy_legalize.h [new file with mode: 0644]
src/gallium/drivers/ilo/shader/toy_legalize_ra.c [new file with mode: 0644]
src/gallium/drivers/ilo/shader/toy_optimize.c [new file with mode: 0644]
src/gallium/drivers/ilo/shader/toy_optimize.h [new file with mode: 0644]
src/gallium/drivers/ilo/shader/toy_tgsi.c [new file with mode: 0644]
src/gallium/drivers/ilo/shader/toy_tgsi.h [new file with mode: 0644]

index 448f1d1973614d1c39384c24e6f6873e698c9e94..c19801e72817797fbd034883f35dc3e67e95c5cf 100644 (file)
@@ -16,4 +16,11 @@ C_SOURCES := \
        ilo_screen.c \
        ilo_shader.c \
        ilo_state.c \
-       ilo_video.c
+       ilo_video.c \
+       shader/toy_compiler.c \
+       shader/toy_compiler_asm.c \
+       shader/toy_compiler_disasm.c \
+       shader/toy_legalize.c \
+       shader/toy_legalize_ra.c \
+       shader/toy_optimize.c \
+       shader/toy_tgsi.c
diff --git a/src/gallium/drivers/ilo/shader/toy_compiler.c b/src/gallium/drivers/ilo/shader/toy_compiler.c
new file mode 100644 (file)
index 0000000..73b03e6
--- /dev/null
@@ -0,0 +1,556 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "toy_compiler.h"
+
+/**
+ * Dump an operand.
+ */
+static void
+tc_dump_operand(struct toy_compiler *tc,
+                enum toy_file file, enum toy_type type, enum toy_rect rect,
+                bool indirect, unsigned indirect_subreg, uint32_t val32,
+                bool is_dst)
+{
+   static const char *toy_file_names[TOY_FILE_COUNT] = {
+      [TOY_FILE_VRF]        = "v",
+      [TOY_FILE_ARF]        = "NOT USED",
+      [TOY_FILE_GRF]        = "r",
+      [TOY_FILE_MRF]        = "m",
+      [TOY_FILE_IMM]        = "NOT USED",
+   };
+   const char *name = toy_file_names[file];
+   int reg, subreg;
+
+   if (file != TOY_FILE_IMM) {
+      reg = val32 / TOY_REG_WIDTH;
+      subreg = (val32 % TOY_REG_WIDTH) / toy_type_size(type);
+   }
+
+   switch (file) {
+   case TOY_FILE_GRF:
+      if (indirect) {
+         const int addr_subreg = indirect_subreg / toy_type_size(TOY_TYPE_UW);
+
+         ilo_printf("%s[a0.%d", name, addr_subreg);
+         if (val32)
+            ilo_printf("%+d", (int) val32);
+         ilo_printf("]");
+         break;
+      }
+      /* fall through */
+   case TOY_FILE_VRF:
+   case TOY_FILE_MRF:
+      ilo_printf("%s%d", name, reg);
+      if (subreg)
+         ilo_printf(".%d", subreg);
+      break;
+   case TOY_FILE_ARF:
+      switch (reg) {
+      case BRW_ARF_NULL:
+         ilo_printf("null");
+         break;
+      case BRW_ARF_ADDRESS:
+         ilo_printf("a0.%d", subreg);
+         break;
+      case BRW_ARF_ACCUMULATOR:
+      case BRW_ARF_ACCUMULATOR + 1:
+         ilo_printf("acc%d.%d", (reg & 1), subreg);
+         break;
+      case BRW_ARF_FLAG:
+         ilo_printf("f0.%d", subreg);
+         break;
+      case BRW_ARF_STATE:
+         ilo_printf("sr0.%d", subreg);
+         break;
+      case BRW_ARF_CONTROL:
+         ilo_printf("cr0.%d", subreg);
+         break;
+      case BRW_ARF_NOTIFICATION_COUNT:
+      case BRW_ARF_NOTIFICATION_COUNT + 1:
+         ilo_printf("n%d.%d", (reg & 1), subreg);
+         break;
+      case BRW_ARF_IP:
+         ilo_printf("ip");
+         break;
+      }
+      break;
+   case TOY_FILE_IMM:
+      switch (type) {
+      case TOY_TYPE_F:
+         {
+            union fi fi = { .ui = val32 };
+            ilo_printf("%f", fi.f);
+         }
+         break;
+      case TOY_TYPE_D:
+         ilo_printf("%d", (int32_t) val32);
+         break;
+      case TOY_TYPE_UD:
+         ilo_printf("%u", val32);
+         break;
+      case TOY_TYPE_W:
+         ilo_printf("%d", (int16_t) (val32 & 0xffff));
+         break;
+      case TOY_TYPE_UW:
+         ilo_printf("%u", val32 & 0xffff);
+         break;
+      case TOY_TYPE_V:
+         ilo_printf("0x%08x", val32);
+         break;
+      default:
+         assert(!"unknown imm type");
+         break;
+      }
+      break;
+   default:
+      assert(!"unexpected file");
+      break;
+   }
+
+   /* dump the region parameter */
+   if (file != TOY_FILE_IMM) {
+      int vert_stride, width, horz_stride;
+
+      switch (rect) {
+      case TOY_RECT_LINEAR:
+         vert_stride = tc->rect_linear_width;
+         width = tc->rect_linear_width;
+         horz_stride = 1;
+         break;
+      case TOY_RECT_041:
+         vert_stride = 0;
+         width = 4;
+         horz_stride = 1;
+         break;
+      case TOY_RECT_010:
+         vert_stride = 0;
+         width = 1;
+         horz_stride = 0;
+         break;
+      case TOY_RECT_220:
+         vert_stride = 2;
+         width = 2;
+         horz_stride = 0;
+         break;
+      case TOY_RECT_440:
+         vert_stride = 4;
+         width = 4;
+         horz_stride = 0;
+         break;
+      case TOY_RECT_240:
+         vert_stride = 2;
+         width = 4;
+         horz_stride = 0;
+         break;
+      default:
+         assert(!"unknown rect parameter");
+         vert_stride = 0;
+         width = 0;
+         horz_stride = 0;
+         break;
+      }
+
+      if (is_dst)
+         ilo_printf("<%d>", horz_stride);
+      else
+         ilo_printf("<%d;%d,%d>", vert_stride, width, horz_stride);
+   }
+
+   switch (type) {
+   case TOY_TYPE_F:
+      ilo_printf(":f");
+      break;
+   case TOY_TYPE_D:
+      ilo_printf(":d");
+      break;
+   case TOY_TYPE_UD:
+      ilo_printf(":ud");
+      break;
+   case TOY_TYPE_W:
+      ilo_printf(":w");
+      break;
+   case TOY_TYPE_UW:
+      ilo_printf(":uw");
+      break;
+   case TOY_TYPE_V:
+      ilo_printf(":v");
+      break;
+   default:
+      assert(!"unexpected type");
+      break;
+   }
+}
+
+/**
+ * Dump a source operand.
+ */
+static void
+tc_dump_src(struct toy_compiler *tc, struct toy_src src)
+{
+   if (src.negate)
+      ilo_printf("-");
+   if (src.absolute)
+      ilo_printf("|");
+
+   tc_dump_operand(tc, src.file, src.type, src.rect,
+         src.indirect, src.indirect_subreg, src.val32, false);
+
+   if (tsrc_is_swizzled(src)) {
+      const char xyzw[] = "xyzw";
+      ilo_printf(".%c%c%c%c",
+            xyzw[src.swizzle_x],
+            xyzw[src.swizzle_y],
+            xyzw[src.swizzle_z],
+            xyzw[src.swizzle_w]);
+   }
+
+   if (src.absolute)
+      ilo_printf("|");
+}
+
+/**
+ * Dump a destination operand.
+ */
+static void
+tc_dump_dst(struct toy_compiler *tc, struct toy_dst dst)
+{
+   tc_dump_operand(tc, dst.file, dst.type, dst.rect,
+         dst.indirect, dst.indirect_subreg, dst.val32, true);
+
+   if (dst.writemask != TOY_WRITEMASK_XYZW) {
+      ilo_printf(".");
+      if (dst.writemask & TOY_WRITEMASK_X)
+         ilo_printf("x");
+      if (dst.writemask & TOY_WRITEMASK_Y)
+         ilo_printf("y");
+      if (dst.writemask & TOY_WRITEMASK_Z)
+         ilo_printf("z");
+      if (dst.writemask & TOY_WRITEMASK_W)
+         ilo_printf("w");
+   }
+}
+
+static const char *
+get_opcode_name(unsigned opcode)
+{
+   switch (opcode) {
+   case BRW_OPCODE_MOV:                   return "mov";
+   case BRW_OPCODE_SEL:                   return "sel";
+   case BRW_OPCODE_NOT:                   return "not";
+   case BRW_OPCODE_AND:                   return "and";
+   case BRW_OPCODE_OR:                    return "or";
+   case BRW_OPCODE_XOR:                   return "xor";
+   case BRW_OPCODE_SHR:                   return "shr";
+   case BRW_OPCODE_SHL:                   return "shl";
+   case BRW_OPCODE_RSR:                   return "rsr";
+   case BRW_OPCODE_RSL:                   return "rsl";
+   case BRW_OPCODE_ASR:                   return "asr";
+   case BRW_OPCODE_CMP:                   return "cmp";
+   case BRW_OPCODE_CMPN:                  return "cmpn";
+   case BRW_OPCODE_JMPI:                  return "jmpi";
+   case BRW_OPCODE_IF:                    return "if";
+   case BRW_OPCODE_IFF:                   return "iff";
+   case BRW_OPCODE_ELSE:                  return "else";
+   case BRW_OPCODE_ENDIF:                 return "endif";
+   case BRW_OPCODE_DO:                    return "do";
+   case BRW_OPCODE_WHILE:                 return "while";
+   case BRW_OPCODE_BREAK:                 return "break";
+   case BRW_OPCODE_CONTINUE:              return "continue";
+   case BRW_OPCODE_HALT:                  return "halt";
+   case BRW_OPCODE_MSAVE:                 return "msave";
+   case BRW_OPCODE_MRESTORE:              return "mrestore";
+   case BRW_OPCODE_PUSH:                  return "push";
+   case BRW_OPCODE_POP:                   return "pop";
+   case BRW_OPCODE_WAIT:                  return "wait";
+   case BRW_OPCODE_SEND:                  return "send";
+   case BRW_OPCODE_SENDC:                 return "sendc";
+   case BRW_OPCODE_MATH:                  return "math";
+   case BRW_OPCODE_ADD:                   return "add";
+   case BRW_OPCODE_MUL:                   return "mul";
+   case BRW_OPCODE_AVG:                   return "avg";
+   case BRW_OPCODE_FRC:                   return "frc";
+   case BRW_OPCODE_RNDU:                  return "rndu";
+   case BRW_OPCODE_RNDD:                  return "rndd";
+   case BRW_OPCODE_RNDE:                  return "rnde";
+   case BRW_OPCODE_RNDZ:                  return "rndz";
+   case BRW_OPCODE_MAC:                   return "mac";
+   case BRW_OPCODE_MACH:                  return "mach";
+   case BRW_OPCODE_LZD:                   return "lzd";
+   case BRW_OPCODE_SAD2:                  return "sad2";
+   case BRW_OPCODE_SADA2:                 return "sada2";
+   case BRW_OPCODE_DP4:                   return "dp4";
+   case BRW_OPCODE_DPH:                   return "dph";
+   case BRW_OPCODE_DP3:                   return "dp3";
+   case BRW_OPCODE_DP2:                   return "dp2";
+   case BRW_OPCODE_DPA2:                  return "dpa2";
+   case BRW_OPCODE_LINE:                  return "line";
+   case BRW_OPCODE_PLN:                   return "pln";
+   case BRW_OPCODE_MAD:                   return "mad";
+   case BRW_OPCODE_NOP:                   return "nop";
+   /* TGSI */
+   case TOY_OPCODE_TGSI_IN:               return "tgsi.in";
+   case TOY_OPCODE_TGSI_CONST:            return "tgsi.const";
+   case TOY_OPCODE_TGSI_SV:               return "tgsi.sv";
+   case TOY_OPCODE_TGSI_IMM:              return "tgsi.imm";
+   case TOY_OPCODE_TGSI_INDIRECT_FETCH:   return "tgsi.indirect_fetch";
+   case TOY_OPCODE_TGSI_INDIRECT_STORE:   return "tgsi.indirect_store";
+   case TOY_OPCODE_TGSI_TEX:              return "tgsi.tex";
+   case TOY_OPCODE_TGSI_TXB:              return "tgsi.txb";
+   case TOY_OPCODE_TGSI_TXD:              return "tgsi.txd";
+   case TOY_OPCODE_TGSI_TXL:              return "tgsi.txl";
+   case TOY_OPCODE_TGSI_TXP:              return "tgsi.txp";
+   case TOY_OPCODE_TGSI_TXF:              return "tgsi.txf";
+   case TOY_OPCODE_TGSI_TXQ:              return "tgsi.txq";
+   case TOY_OPCODE_TGSI_TXQ_LZ:           return "tgsi.txq_lz";
+   case TOY_OPCODE_TGSI_TEX2:             return "tgsi.tex2";
+   case TOY_OPCODE_TGSI_TXB2:             return "tgsi.txb2";
+   case TOY_OPCODE_TGSI_TXL2:             return "tgsi.txl2";
+   case TOY_OPCODE_TGSI_SAMPLE:           return "tgsi.sample";
+   case TOY_OPCODE_TGSI_SAMPLE_I:         return "tgsi.sample_i";
+   case TOY_OPCODE_TGSI_SAMPLE_I_MS:      return "tgsi.sample_i_ms";
+   case TOY_OPCODE_TGSI_SAMPLE_B:         return "tgsi.sample_b";
+   case TOY_OPCODE_TGSI_SAMPLE_C:         return "tgsi.sample_c";
+   case TOY_OPCODE_TGSI_SAMPLE_C_LZ:      return "tgsi.sample_c_lz";
+   case TOY_OPCODE_TGSI_SAMPLE_D:         return "tgsi.sample_d";
+   case TOY_OPCODE_TGSI_SAMPLE_L:         return "tgsi.sample_l";
+   case TOY_OPCODE_TGSI_GATHER4:          return "tgsi.gather4";
+   case TOY_OPCODE_TGSI_SVIEWINFO:        return "tgsi.sviewinfo";
+   case TOY_OPCODE_TGSI_SAMPLE_POS:       return "tgsi.sample_pos";
+   case TOY_OPCODE_TGSI_SAMPLE_INFO:      return "tgsi.sample_info";
+   /* math */
+   case TOY_OPCODE_INV:                   return "math.inv";
+   case TOY_OPCODE_LOG:                   return "math.log";
+   case TOY_OPCODE_EXP:                   return "math.exp";
+   case TOY_OPCODE_SQRT:                  return "math.sqrt";
+   case TOY_OPCODE_RSQ:                   return "math.rsq";
+   case TOY_OPCODE_SIN:                   return "math.sin";
+   case TOY_OPCODE_COS:                   return "math.cos";
+   case TOY_OPCODE_FDIV:                  return "math.fdiv";
+   case TOY_OPCODE_POW:                   return "math.pow";
+   case TOY_OPCODE_INT_DIV_QUOTIENT:      return "math.int_div_quotient";
+   case TOY_OPCODE_INT_DIV_REMAINDER:     return "math.int_div_remainer";
+   /* urb */
+   case TOY_OPCODE_URB_WRITE:             return "urb.urb_write";
+   /* gs */
+   case TOY_OPCODE_EMIT:                  return "gs.emit";
+   case TOY_OPCODE_ENDPRIM:               return "gs.endprim";
+   /* fs */
+   case TOY_OPCODE_DDX:                   return "fs.ddx";
+   case TOY_OPCODE_DDY:                   return "fs.ddy";
+   case TOY_OPCODE_FB_WRITE:              return "fs.fb_write";
+   case TOY_OPCODE_KIL:                   return "fs.kil";
+   default:                               return "unk";
+   }
+}
+
+static const char *
+get_cond_modifier_name(unsigned opcode, unsigned cond_modifier)
+{
+   switch (opcode) {
+   case BRW_OPCODE_SEND:
+   case BRW_OPCODE_SENDC:
+      /* SFID */
+      switch (cond_modifier) {
+      case BRW_SFID_NULL:                       return "Null";
+      case BRW_SFID_SAMPLER:                    return "Sampling Engine";
+      case BRW_SFID_MESSAGE_GATEWAY:            return "Message Gateway";
+      case GEN6_SFID_DATAPORT_SAMPLER_CACHE:    return "Data Port Sampler Cache";
+      case GEN6_SFID_DATAPORT_RENDER_CACHE:     return "Data Port Render Cache";
+      case BRW_SFID_URB:                        return "URB";
+      case BRW_SFID_THREAD_SPAWNER:             return "Thread Spawner";
+      case GEN6_SFID_DATAPORT_CONSTANT_CACHE:   return "Constant Cache";
+      default:                                  return "Unknown";
+      }
+      break;
+   case BRW_OPCODE_MATH:
+      /* FC */
+      switch (cond_modifier) {
+      case BRW_MATH_FUNCTION_INV:               return "INV";
+      case BRW_MATH_FUNCTION_LOG:               return "LOG";
+      case BRW_MATH_FUNCTION_EXP:               return "EXP";
+      case BRW_MATH_FUNCTION_SQRT:              return "SQRT";
+      case BRW_MATH_FUNCTION_RSQ:               return "RSQ";
+      case BRW_MATH_FUNCTION_SIN:               return "SIN";
+      case BRW_MATH_FUNCTION_COS:               return "COS";
+      case BRW_MATH_FUNCTION_FDIV:              return "FDIV";
+      case BRW_MATH_FUNCTION_POW:               return "POW";
+      case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:  return "INT DIV (quotient)";
+      case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: return "INT DIV (remainder)";
+      default:                                  return "UNK";
+      }
+      break;
+   default:
+      switch (cond_modifier) {
+      case BRW_CONDITIONAL_NONE:                return NULL;
+      case BRW_CONDITIONAL_Z:                   return "z";
+      case BRW_CONDITIONAL_NZ:                  return "nz";
+      case BRW_CONDITIONAL_G:                   return "g";
+      case BRW_CONDITIONAL_GE:                  return "ge";
+      case BRW_CONDITIONAL_L:                   return "l";
+      case BRW_CONDITIONAL_LE:                  return "le";
+      default:                                  return "unk";
+      }
+      break;
+   }
+}
+
+/**
+ * Dump an instruction.
+ */
+static void
+tc_dump_inst(struct toy_compiler *tc, const struct toy_inst *inst)
+{
+   const char *name;
+   int i;
+
+   name = get_opcode_name(inst->opcode);
+
+   ilo_printf("  %s", name);
+
+   if (inst->opcode == BRW_OPCODE_NOP) {
+      ilo_printf("\n");
+      return;
+   }
+
+   if (inst->saturate)
+      ilo_printf(".sat");
+
+   name = get_cond_modifier_name(inst->opcode, inst->cond_modifier);
+   if (name)
+      ilo_printf(".%s", name);
+
+   ilo_printf(" ");
+
+   tc_dump_dst(tc, inst->dst);
+
+   for (i = 0; i < Elements(inst->src); i++) {
+      if (tsrc_is_null(inst->src[i]))
+         break;
+
+      ilo_printf(", ");
+      tc_dump_src(tc, inst->src[i]);
+   }
+
+   ilo_printf("\n");
+}
+
+/**
+ * Dump the instructions added to the compiler.
+ */
+void
+toy_compiler_dump(struct toy_compiler *tc)
+{
+   struct toy_inst *inst;
+   int pc;
+
+   pc = 0;
+   tc_head(tc);
+   while ((inst = tc_next_no_skip(tc)) != NULL) {
+      /* we do not generate code for markers */
+      if (inst->marker)
+         ilo_printf("marker:");
+      else
+         ilo_printf("%6d:", pc++);
+
+      tc_dump_inst(tc, inst);
+   }
+}
+
+/**
+ * Clean up the toy compiler.
+ */
+void
+toy_compiler_cleanup(struct toy_compiler *tc)
+{
+   struct toy_inst *inst, *next;
+
+   LIST_FOR_EACH_ENTRY_SAFE(inst, next, &tc->instructions, list)
+      util_slab_free(&tc->mempool, inst);
+
+   util_slab_destroy(&tc->mempool);
+}
+
+/**
+ * Initialize the instruction template, from which tc_add() initializes the
+ * newly added instructions.
+ */
+static void
+tc_init_inst_templ(struct toy_compiler *tc)
+{
+   struct toy_inst *templ = &tc->templ;
+   int i;
+
+   templ->opcode = BRW_OPCODE_NOP;
+   templ->access_mode = BRW_ALIGN_1;
+   templ->mask_ctrl = BRW_MASK_ENABLE;
+   templ->dep_ctrl = BRW_DEPENDENCY_NORMAL;
+   templ->qtr_ctrl = GEN6_COMPRESSION_1Q;
+   templ->thread_ctrl = BRW_THREAD_NORMAL;
+   templ->pred_ctrl = BRW_PREDICATE_NONE;
+   templ->pred_inv = false;
+   templ->exec_size = BRW_EXECUTE_1;
+   templ->cond_modifier = BRW_CONDITIONAL_NONE;
+   templ->acc_wr_ctrl = false;
+   templ->saturate = false;
+
+   templ->marker = false;
+
+   templ->dst = tdst_null();
+   for (i = 0; i < Elements(templ->src); i++)
+      templ->src[i] = tsrc_null();
+
+   for (i = 0; i < Elements(templ->tex.offsets); i++)
+      templ->tex.offsets[i] = tsrc_null();
+
+   list_inithead(&templ->list);
+}
+
+/**
+ * Initialize the toy compiler.
+ */
+void
+toy_compiler_init(struct toy_compiler *tc, int gen)
+{
+   memset(tc, 0, sizeof(*tc));
+
+   tc->gen = gen;
+
+   tc_init_inst_templ(tc);
+
+   util_slab_create(&tc->mempool, sizeof(struct toy_inst),
+         64, UTIL_SLAB_SINGLETHREADED);
+
+   list_inithead(&tc->instructions);
+   /* instructions are added to the tail */
+   tc_tail(tc);
+
+   tc->rect_linear_width = 1;
+
+   /* skip 0 so that util_hash_table_get() never returns NULL */
+   tc->next_vrf = 1;
+}
diff --git a/src/gallium/drivers/ilo/shader/toy_compiler.h b/src/gallium/drivers/ilo/shader/toy_compiler.h
new file mode 100644 (file)
index 0000000..a6413ea
--- /dev/null
@@ -0,0 +1,473 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef TOY_COMPILER_H
+#define TOY_COMPILER_H
+
+#include "brw_defines.h"
+
+#include "util/u_slab.h"
+#include "ilo_common.h"
+#include "toy_compiler_reg.h"
+
+/**
+ * Toy opcodes.
+ */
+enum toy_opcode {
+   /* 0..127 are reserved for BRW_OPCODE_x */
+   TOY_OPCODE_LAST_HW = 127,
+
+   /* TGSI register functions */
+   TOY_OPCODE_TGSI_IN,
+   TOY_OPCODE_TGSI_CONST,
+   TOY_OPCODE_TGSI_SV,
+   TOY_OPCODE_TGSI_IMM,
+   TOY_OPCODE_TGSI_INDIRECT_FETCH,
+   TOY_OPCODE_TGSI_INDIRECT_STORE,
+
+   /* TGSI sampling functions */
+   TOY_OPCODE_TGSI_TEX,
+   TOY_OPCODE_TGSI_TXB,
+   TOY_OPCODE_TGSI_TXD,
+   TOY_OPCODE_TGSI_TXL,
+   TOY_OPCODE_TGSI_TXP,
+   TOY_OPCODE_TGSI_TXF,
+   TOY_OPCODE_TGSI_TXQ,
+   TOY_OPCODE_TGSI_TXQ_LZ,
+   TOY_OPCODE_TGSI_TEX2,
+   TOY_OPCODE_TGSI_TXB2,
+   TOY_OPCODE_TGSI_TXL2,
+   TOY_OPCODE_TGSI_SAMPLE,
+   TOY_OPCODE_TGSI_SAMPLE_I,
+   TOY_OPCODE_TGSI_SAMPLE_I_MS,
+   TOY_OPCODE_TGSI_SAMPLE_B,
+   TOY_OPCODE_TGSI_SAMPLE_C,
+   TOY_OPCODE_TGSI_SAMPLE_C_LZ,
+   TOY_OPCODE_TGSI_SAMPLE_D,
+   TOY_OPCODE_TGSI_SAMPLE_L,
+   TOY_OPCODE_TGSI_GATHER4,
+   TOY_OPCODE_TGSI_SVIEWINFO,
+   TOY_OPCODE_TGSI_SAMPLE_POS,
+   TOY_OPCODE_TGSI_SAMPLE_INFO,
+
+   /* math functions */
+   TOY_OPCODE_INV,
+   TOY_OPCODE_LOG,
+   TOY_OPCODE_EXP,
+   TOY_OPCODE_SQRT,
+   TOY_OPCODE_RSQ,
+   TOY_OPCODE_SIN,
+   TOY_OPCODE_COS,
+   TOY_OPCODE_FDIV,
+   TOY_OPCODE_POW,
+   TOY_OPCODE_INT_DIV_QUOTIENT,
+   TOY_OPCODE_INT_DIV_REMAINDER,
+
+   /* URB functions */
+   TOY_OPCODE_URB_WRITE,
+
+   /* GS-specific functions */
+   TOY_OPCODE_EMIT,
+   TOY_OPCODE_ENDPRIM,
+
+   /* FS-specific functions */
+   TOY_OPCODE_DDX,
+   TOY_OPCODE_DDY,
+   TOY_OPCODE_FB_WRITE,
+   TOY_OPCODE_KIL,
+};
+
+/**
+ * Toy instruction.
+ */
+struct toy_inst {
+   unsigned opcode:8;            /* enum toy_opcode      */
+   unsigned access_mode:1;       /* BRW_ALIGN_x          */
+   unsigned mask_ctrl:1;         /* BRW_MASK_x           */
+   unsigned dep_ctrl:2;          /* BRW_DEPENDENCY_x     */
+   unsigned qtr_ctrl:2;          /* GEN6_COMPRESSION_x   */
+   unsigned thread_ctrl:2;       /* BRW_THREAD_x         */
+   unsigned pred_ctrl:4;         /* BRW_PREDICATE_x      */
+   unsigned pred_inv:1;          /* true or false        */
+   unsigned exec_size:3;         /* BRW_EXECUTE_x        */
+   unsigned cond_modifier:4;     /* BRW_CONDITIONAL_x    */
+   unsigned acc_wr_ctrl:1;       /* true or false        */
+   unsigned saturate:1;          /* true or false        */
+
+   /* true if the instruction should be ignored for instruction iteration */
+   unsigned marker:1;
+
+   unsigned pad:1;
+
+   struct toy_dst dst;
+   struct toy_src src[5];        /* match TGSI_FULL_MAX_SRC_REGISTERS */
+
+   struct {
+      int target;                /* TGSI_TEXTURE_x */
+      struct toy_src offsets[1]; /* need to be 4 when GATHER4 is supported */
+   } tex;
+
+   struct list_head list;
+};
+
+/**
+ * Toy compiler.
+ */
+struct toy_compiler {
+   int gen;
+
+   struct toy_inst templ;
+   struct util_slab_mempool mempool;
+   struct list_head instructions;
+   struct list_head *iter, *iter_next;
+
+   /* this is not set until toy_compiler_legalize_for_asm() */
+   int num_instructions;
+
+   int rect_linear_width;
+   int next_vrf;
+
+   bool fail;
+   const char *reason;
+};
+
+/**
+ * Allocate the given number of VRF registers.
+ */
+static inline int
+tc_alloc_vrf(struct toy_compiler *tc, int count)
+{
+   const int vrf = tc->next_vrf;
+
+   tc->next_vrf += count;
+
+   return vrf;
+}
+
+/**
+ * Allocate a temporary register.
+ */
+static inline struct toy_dst
+tc_alloc_tmp(struct toy_compiler *tc)
+{
+   return tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, 1), 0);
+}
+
+/**
+ * Allocate four temporary registers.
+ */
+static inline void
+tc_alloc_tmp4(struct toy_compiler *tc, struct toy_dst *tmp)
+{
+   tmp[0] = tc_alloc_tmp(tc);
+   tmp[1] = tc_alloc_tmp(tc);
+   tmp[2] = tc_alloc_tmp(tc);
+   tmp[3] = tc_alloc_tmp(tc);
+}
+
+/**
+ * Duplicate an instruction at the current location.
+ */
+static inline struct toy_inst *
+tc_duplicate_inst(struct toy_compiler *tc, const struct toy_inst *inst)
+{
+   struct toy_inst *new_inst;
+
+   new_inst = util_slab_alloc(&tc->mempool);
+   if (!new_inst)
+      return NULL;
+
+   *new_inst = *inst;
+   list_addtail(&new_inst->list, tc->iter_next);
+
+   return new_inst;
+}
+
+/**
+ * Move an instruction to the current location.
+ */
+static inline void
+tc_move_inst(struct toy_compiler *tc, struct toy_inst *inst)
+{
+   list_del(&inst->list);
+   list_addtail(&inst->list, tc->iter_next);
+}
+
+/**
+ * Discard an instruction.
+ */
+static inline void
+tc_discard_inst(struct toy_compiler *tc, struct toy_inst *inst)
+{
+   list_del(&inst->list);
+   util_slab_free(&tc->mempool, inst);
+}
+
+/**
+ * Add a new instruction at the current location, using tc->templ as the
+ * template.
+ */
+static inline struct toy_inst *
+tc_add(struct toy_compiler *tc)
+{
+   return tc_duplicate_inst(tc, &tc->templ);
+}
+
+/**
+ * A convenient version of tc_add() for instructions with 3 source operands.
+ */
+static inline struct toy_inst *
+tc_add3(struct toy_compiler *tc, unsigned opcode,
+        struct toy_dst dst,
+        struct toy_src src0,
+        struct toy_src src1,
+        struct toy_src src2)
+{
+   struct toy_inst *inst;
+
+   inst = tc_add(tc);
+   if (!inst)
+      return NULL;
+
+   inst->opcode = opcode;
+   inst->dst = dst;
+   inst->src[0] = src0;
+   inst->src[1] = src1;
+   inst->src[2] = src2;
+
+   return inst;
+}
+
+/**
+ * A convenient version of tc_add() for instructions with 2 source operands.
+ */
+static inline struct toy_inst *
+tc_add2(struct toy_compiler *tc, int opcode,
+            struct toy_dst dst,
+            struct toy_src src0,
+            struct toy_src src1)
+{
+   return tc_add3(tc, opcode, dst, src0, src1, tsrc_null());
+}
+
+/**
+ * A convenient version of tc_add() for instructions with 1 source operand.
+ */
+static inline struct toy_inst *
+tc_add1(struct toy_compiler *tc, unsigned opcode,
+        struct toy_dst dst,
+        struct toy_src src0)
+{
+   return tc_add2(tc, opcode, dst, src0, tsrc_null());
+}
+
+/**
+ * A convenient version of tc_add() for instructions without source or
+ * destination operands.
+ */
+static inline struct toy_inst *
+tc_add0(struct toy_compiler *tc, unsigned opcode)
+{
+   return tc_add1(tc, opcode, tdst_null(), tsrc_null());
+}
+
+#define TC_ALU0(func, opcode)             \
+static inline struct toy_inst *           \
+func(struct toy_compiler *tc)             \
+{                                         \
+   return tc_add0(tc, opcode);            \
+}
+
+#define TC_ALU1(func, opcode)             \
+static inline struct toy_inst *           \
+func(struct toy_compiler *tc,             \
+     struct toy_dst dst,                  \
+     struct toy_src src)                  \
+{                                         \
+   return tc_add1(tc, opcode, dst, src);  \
+}
+
+#define TC_ALU2(func, opcode)             \
+static inline struct toy_inst *           \
+func(struct toy_compiler *tc,             \
+     struct toy_dst dst,                  \
+     struct toy_src src0,                 \
+     struct toy_src src1)                 \
+{                                         \
+   return tc_add2(tc, opcode,             \
+         dst, src0, src1);                \
+}
+
+#define TC_ALU3(func, opcode)             \
+static inline struct toy_inst *           \
+func(struct toy_compiler *tc,             \
+     struct toy_dst dst,                  \
+     struct toy_src src0,                 \
+     struct toy_src src1,                 \
+     struct toy_src src2)                 \
+{                                         \
+   return tc_add3(tc, opcode,             \
+         dst, src0, src1, src2);          \
+}
+
+#define TC_CND2(func, opcode)             \
+static inline struct toy_inst *           \
+func(struct toy_compiler *tc,             \
+     struct toy_dst dst,                  \
+     struct toy_src src0,                 \
+     struct toy_src src1,                 \
+     unsigned cond_modifier)              \
+{                                         \
+   struct toy_inst *inst;                 \
+   inst = tc_add2(tc, opcode,             \
+         dst, src0, src1);                \
+   inst->cond_modifier = cond_modifier;   \
+   return inst;                           \
+}
+
+TC_ALU0(tc_NOP, BRW_OPCODE_NOP)
+TC_ALU0(tc_ELSE, BRW_OPCODE_ELSE)
+TC_ALU0(tc_ENDIF, BRW_OPCODE_ENDIF)
+TC_ALU1(tc_MOV, BRW_OPCODE_MOV)
+TC_ALU1(tc_RNDD, BRW_OPCODE_RNDD)
+TC_ALU1(tc_INV, TOY_OPCODE_INV)
+TC_ALU1(tc_FRC, BRW_OPCODE_FRC)
+TC_ALU1(tc_EXP, TOY_OPCODE_EXP)
+TC_ALU1(tc_LOG, TOY_OPCODE_LOG)
+TC_ALU2(tc_ADD, BRW_OPCODE_ADD)
+TC_ALU2(tc_MUL, BRW_OPCODE_MUL)
+TC_ALU2(tc_AND, BRW_OPCODE_AND)
+TC_ALU2(tc_OR, BRW_OPCODE_OR)
+TC_ALU2(tc_DP2, BRW_OPCODE_DP2)
+TC_ALU2(tc_DP3, BRW_OPCODE_DP3)
+TC_ALU2(tc_DP4, BRW_OPCODE_DP4)
+TC_ALU2(tc_SHL, BRW_OPCODE_SHL)
+TC_ALU2(tc_SHR, BRW_OPCODE_SHR)
+TC_ALU2(tc_POW, TOY_OPCODE_POW)
+TC_ALU3(tc_MAC, BRW_OPCODE_MAC)
+TC_CND2(tc_SEL, BRW_OPCODE_SEL)
+TC_CND2(tc_CMP, BRW_OPCODE_CMP)
+TC_CND2(tc_IF, BRW_OPCODE_IF)
+TC_CND2(tc_SEND, BRW_OPCODE_SEND)
+
+/**
+ * Upcast a list_head to an instruction.
+ */
+static inline struct toy_inst *
+tc_list_to_inst(struct toy_compiler *tc, struct list_head *item)
+{
+   return container_of(item, (struct toy_inst *) NULL, list);
+}
+
+/**
+ * Return the instruction at the current location.
+ */
+static inline struct toy_inst *
+tc_current(struct toy_compiler *tc)
+{
+   return (tc->iter != &tc->instructions) ?
+      tc_list_to_inst(tc, tc->iter) : NULL;
+}
+
+/**
+ * Set the current location to the head.
+ */
+static inline void
+tc_head(struct toy_compiler *tc)
+{
+   tc->iter = &tc->instructions;
+   tc->iter_next = tc->iter->next;
+}
+
+/**
+ * Set the current location to the tail.
+ */
+static inline void
+tc_tail(struct toy_compiler *tc)
+{
+   tc->iter = &tc->instructions;
+   tc->iter_next = tc->iter;
+}
+
+/**
+ * Advance the current location.
+ */
+static inline struct toy_inst *
+tc_next_no_skip(struct toy_compiler *tc)
+{
+   /* stay at the tail so that new instructions are added there */
+   if (tc->iter_next == &tc->instructions) {
+      tc_tail(tc);
+      return NULL;
+   }
+
+   tc->iter = tc->iter_next;
+   tc->iter_next = tc->iter_next->next;
+
+   return tc_list_to_inst(tc, tc->iter);
+}
+
+/**
+ * Advance the current location, skipping markers.
+ */
+static inline struct toy_inst *
+tc_next(struct toy_compiler *tc)
+{
+   struct toy_inst *inst;
+
+   do {
+      inst = tc_next_no_skip(tc);
+   } while (inst && inst->marker);
+
+   return inst;
+}
+
+static inline void
+tc_fail(struct toy_compiler *tc, const char *reason)
+{
+   if (!tc->fail) {
+      tc->fail = true;
+      tc->reason = reason;
+   }
+}
+
+void
+toy_compiler_init(struct toy_compiler *tc, int gen);
+
+void
+toy_compiler_cleanup(struct toy_compiler *tc);
+
+void
+toy_compiler_dump(struct toy_compiler *tc);
+
+void *
+toy_compiler_assemble(struct toy_compiler *tc, int *size);
+
+void
+toy_compiler_disassemble(struct toy_compiler *tc, const void *kernel, int size);
+
+#endif /* TOY_COMPILER_H */
diff --git a/src/gallium/drivers/ilo/shader/toy_compiler_asm.c b/src/gallium/drivers/ilo/shader/toy_compiler_asm.c
new file mode 100644 (file)
index 0000000..09a00dd
--- /dev/null
@@ -0,0 +1,750 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "toy_compiler.h"
+
+#define CG_REG_SHIFT 5
+#define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT)
+
+struct codegen {
+   const struct toy_inst *inst;
+   int pc;
+
+   unsigned flag_sub_reg_num;
+
+   struct codegen_dst {
+      unsigned file;
+      unsigned type;
+      bool indirect;
+      unsigned indirect_subreg;
+      unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
+
+      unsigned horz_stride;
+
+      unsigned writemask;
+   } dst;
+
+   struct codegen_src {
+      unsigned file;
+      unsigned type;
+      bool indirect;
+      unsigned indirect_subreg;
+      unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
+
+      unsigned vert_stride;
+      unsigned width;
+      unsigned horz_stride;
+
+      unsigned swizzle[4];
+      bool absolute;
+      bool negate;
+   } src[3];
+};
+
+/**
+ * Return true if the source operand is null.
+ */
+static bool
+src_is_null(const struct codegen *cg, int idx)
+{
+   const struct codegen_src *src = &cg->src[idx];
+
+   return (src->file == BRW_ARCHITECTURE_REGISTER_FILE &&
+           src->origin == BRW_ARF_NULL << CG_REG_SHIFT);
+}
+
+/**
+ * Translate a source operand to DW2 or DW3 of the 1-src/2-src format.
+ */
+static uint32_t
+translate_src(const struct codegen *cg, int idx)
+{
+   const struct codegen_src *src = &cg->src[idx];
+   uint32_t dw;
+
+   /* special treatment may be needed if any of the operand is immediate */
+   if (cg->src[0].file == BRW_IMMEDIATE_VALUE) {
+      assert(!cg->src[0].absolute && !cg->src[0].negate);
+      /* only the last src operand can be an immediate */
+      assert(src_is_null(cg, 1));
+
+      if (idx == 0)
+         return cg->flag_sub_reg_num << 25;
+      else
+         return cg->src[0].origin;
+   }
+   else if (idx && cg->src[1].file == BRW_IMMEDIATE_VALUE) {
+      assert(!cg->src[1].absolute && !cg->src[1].negate);
+      return cg->src[1].origin;
+   }
+
+   assert(src->file != BRW_IMMEDIATE_VALUE);
+
+   if (src->indirect) {
+      const int offset = (int) src->origin;
+
+      assert(src->file == BRW_GENERAL_REGISTER_FILE);
+      assert(offset < 512 && offset >= -512);
+
+      if (cg->inst->access_mode == BRW_ALIGN_16) {
+         assert(src->width == BRW_WIDTH_4);
+         assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1);
+
+         /* the lower 4 bits are reserved for the swizzle_[xy] */
+         assert(!(src->origin & 0xf));
+
+         dw = src->vert_stride << 21 |
+              src->swizzle[3] << 18 |
+              src->swizzle[2] << 16 |
+              BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
+              src->negate << 14 |
+              src->absolute << 13 |
+              src->indirect_subreg << 10 |
+              (src->origin & 0x3f0) |
+              src->swizzle[1] << 2 |
+              src->swizzle[0];
+      }
+      else {
+         assert(src->swizzle[0] == TOY_SWIZZLE_X &&
+                src->swizzle[1] == TOY_SWIZZLE_Y &&
+                src->swizzle[2] == TOY_SWIZZLE_Z &&
+                src->swizzle[3] == TOY_SWIZZLE_W);
+
+         dw = src->vert_stride << 21 |
+              src->width << 18 |
+              src->horz_stride << 16 |
+              BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
+              src->negate << 14 |
+              src->absolute << 13 |
+              src->indirect_subreg << 10 |
+              (src->origin & 0x3ff);
+      }
+   }
+   else {
+      switch (src->file) {
+      case BRW_ARCHITECTURE_REGISTER_FILE:
+         break;
+      case BRW_GENERAL_REGISTER_FILE:
+         assert(CG_REG_NUM(src->origin) < 128);
+         break;
+      case BRW_MESSAGE_REGISTER_FILE:
+         assert(cg->inst->opcode == BRW_OPCODE_SEND ||
+                cg->inst->opcode == BRW_OPCODE_SENDC);
+         assert(CG_REG_NUM(src->origin) < 16);
+         break;
+      case BRW_IMMEDIATE_VALUE:
+      default:
+         assert(!"invalid src file");
+         break;
+      }
+
+      if (cg->inst->access_mode == BRW_ALIGN_16) {
+         assert(src->width == BRW_WIDTH_4);
+         assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1);
+
+         /* the lower 4 bits are reserved for the swizzle_[xy] */
+         assert(!(src->origin & 0xf));
+
+         dw = src->vert_stride << 21 |
+              src->swizzle[3] << 18 |
+              src->swizzle[2] << 16 |
+              BRW_ADDRESS_DIRECT << 15 |
+              src->negate << 14 |
+              src->absolute << 13 |
+              src->origin |
+              src->swizzle[1] << 2 |
+              src->swizzle[0];
+      }
+      else {
+         assert(src->swizzle[0] == TOY_SWIZZLE_X &&
+                src->swizzle[1] == TOY_SWIZZLE_Y &&
+                src->swizzle[2] == TOY_SWIZZLE_Z &&
+                src->swizzle[3] == TOY_SWIZZLE_W);
+
+         dw = src->vert_stride << 21 |
+              src->width << 18 |
+              src->horz_stride << 16 |
+              BRW_ADDRESS_DIRECT << 15 |
+              src->negate << 14 |
+              src->absolute << 13 |
+              src->origin;
+      }
+   }
+
+   if (idx == 0)
+      dw |= cg->flag_sub_reg_num << 25;
+
+   return dw;
+}
+
+/**
+ * Translate the destination operand to the higher 16 bits of DW1 of the
+ * 1-src/2-src format.
+ */
+static uint16_t
+translate_dst_region(const struct codegen *cg)
+{
+   const struct codegen_dst *dst = &cg->dst;
+   uint16_t dw1_region;
+
+   if (dst->file == BRW_IMMEDIATE_VALUE) {
+      /* dst is immediate (JIP) when the opcode is a conditional branch */
+      switch (cg->inst->opcode) {
+      case BRW_OPCODE_IF:
+      case BRW_OPCODE_ELSE:
+      case BRW_OPCODE_ENDIF:
+      case BRW_OPCODE_WHILE:
+         assert(dst->type == BRW_REGISTER_TYPE_W);
+         dw1_region = (dst->origin & 0xffff);
+         break;
+      default:
+         assert(!"dst cannot be immediate");
+         dw1_region = 0;
+         break;
+      }
+
+      return dw1_region;
+   }
+
+   if (dst->indirect) {
+      const int offset = (int) dst->origin;
+
+      assert(dst->file == BRW_GENERAL_REGISTER_FILE);
+      assert(offset < 512 && offset >= -512);
+
+      if (cg->inst->access_mode == BRW_ALIGN_16) {
+         /*
+          * From the Sandy Bridge PRM, volume 4 part 2, page 144:
+          *
+          *     "Allthough Dst.HorzStride is a don't care for Align16, HW
+          *      needs this to be programmed as 01."
+          */
+         assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
+         /* the lower 4 bits are reserved for the writemask */
+         assert(!(dst->origin & 0xf));
+
+         dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
+                      dst->horz_stride << 13 |
+                      dst->indirect_subreg << 10 |
+                      (dst->origin & 0x3f0) |
+                      dst->writemask;
+      }
+      else {
+         assert(dst->writemask == TOY_WRITEMASK_XYZW);
+
+         dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
+                      dst->horz_stride << 13 |
+                      dst->indirect_subreg << 10 |
+                      (dst->origin & 0x3ff);
+      }
+   }
+   else {
+      assert((dst->file == BRW_GENERAL_REGISTER_FILE &&
+              CG_REG_NUM(dst->origin) < 128) ||
+             (dst->file == BRW_MESSAGE_REGISTER_FILE &&
+              CG_REG_NUM(dst->origin) < 16) ||
+             (dst->file == BRW_ARCHITECTURE_REGISTER_FILE));
+
+      if (cg->inst->access_mode == BRW_ALIGN_16) {
+         /* similar to the indirect case */
+         assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
+         assert(!(dst->origin & 0xf));
+
+         dw1_region = BRW_ADDRESS_DIRECT << 15 |
+                      dst->horz_stride << 13 |
+                      dst->origin |
+                      dst->writemask;
+      }
+      else {
+         assert(dst->writemask == TOY_WRITEMASK_XYZW);
+
+         dw1_region = BRW_ADDRESS_DIRECT << 15 |
+                      dst->horz_stride << 13 |
+                      dst->origin;
+      }
+   }
+
+   return dw1_region;
+}
+
+/**
+ * Translate the destination operand to DW1 of the 1-src/2-src format.
+ */
+static uint32_t
+translate_dst(const struct codegen *cg)
+{
+   return translate_dst_region(cg) << 16 |
+          cg->src[1].type << 12 |
+          cg->src[1].file << 10 |
+          cg->src[0].type << 7 |
+          cg->src[0].file << 5 |
+          cg->dst.type << 2 |
+          cg->dst.file;
+}
+
+/**
+ * Translate the instruction to DW0 of the 1-src/2-src format.
+ */
+static uint32_t
+translate_inst(const struct codegen *cg)
+{
+   const bool debug_ctrl = false;
+   const bool cmpt_ctrl = false;
+
+   assert(cg->inst->opcode < 128);
+
+   return cg->inst->saturate << 31 |
+          debug_ctrl << 30 |
+          cmpt_ctrl << 29 |
+          cg->inst->acc_wr_ctrl << 28 |
+          cg->inst->cond_modifier << 24 |
+          cg->inst->exec_size << 21 |
+          cg->inst->pred_inv << 20 |
+          cg->inst->pred_ctrl << 16 |
+          cg->inst->thread_ctrl << 14 |
+          cg->inst->qtr_ctrl << 12 |
+          cg->inst->dep_ctrl << 10 |
+          cg->inst->mask_ctrl << 9 |
+          cg->inst->access_mode << 8 |
+          cg->inst->opcode;
+}
+
+/**
+ * Codegen an instruction in 1-src/2-src format.
+ */
+static void
+codegen_inst(const struct codegen *cg, uint32_t *code)
+{
+   code[0] = translate_inst(cg);
+   code[1] = translate_dst(cg);
+   code[2] = translate_src(cg, 0);
+   code[3] = translate_src(cg, 1);
+   assert(src_is_null(cg, 2));
+}
+
+/**
+ * Codegen an instruction in 3-src format.
+ */
+static void
+codegen_inst_3src(const struct codegen *cg, uint32_t *code)
+{
+   const struct codegen_dst *dst = &cg->dst;
+   uint32_t dw0, dw1, dw_src[3];
+   int i;
+
+   dw0 = translate_inst(cg);
+
+   /*
+    * 3-src instruction restrictions
+    *
+    *  - align16 with direct addressing
+    *  - GRF or MRF dst
+    *  - GRF src
+    *  - sub_reg_num is DWORD aligned
+    *  - no regioning except replication control
+    *    (vert_stride == 0 && horz_stride == 0)
+    */
+   assert(cg->inst->access_mode == BRW_ALIGN_16);
+
+   assert(!dst->indirect);
+   assert((dst->file == BRW_GENERAL_REGISTER_FILE &&
+           CG_REG_NUM(dst->origin) < 128) ||
+          (dst->file == BRW_MESSAGE_REGISTER_FILE &&
+           CG_REG_NUM(dst->origin) < 16));
+   assert(!(dst->origin & 0x3));
+   assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
+
+   dw1 = dst->origin << 19 |
+         dst->writemask << 17 |
+         cg->src[2].negate << 9 |
+         cg->src[2].absolute << 8 |
+         cg->src[1].negate << 7 |
+         cg->src[1].absolute << 6 |
+         cg->src[0].negate << 5 |
+         cg->src[0].absolute << 4 |
+         cg->flag_sub_reg_num << 1 |
+         (dst->file == BRW_MESSAGE_REGISTER_FILE);
+
+   for (i = 0; i < 3; i++) {
+      const struct codegen_src *src = &cg->src[i];
+
+      assert(!src->indirect);
+      assert(src->file == BRW_GENERAL_REGISTER_FILE &&
+             CG_REG_NUM(src->origin) < 128);
+      assert(!(src->origin & 0x3));
+
+      assert((src->vert_stride == BRW_VERTICAL_STRIDE_4 &&
+              src->horz_stride == BRW_HORIZONTAL_STRIDE_1) ||
+             (src->vert_stride == BRW_VERTICAL_STRIDE_0 &&
+              src->horz_stride == BRW_HORIZONTAL_STRIDE_0));
+      assert(src->width == BRW_WIDTH_4);
+
+      dw_src[i] = src->origin << 7 |
+                  src->swizzle[3] << 7 |
+                  src->swizzle[2] << 5 |
+                  src->swizzle[1] << 3 |
+                  src->swizzle[0] << 1 |
+                  (src->vert_stride == BRW_VERTICAL_STRIDE_0 &&
+                   src->horz_stride == BRW_HORIZONTAL_STRIDE_0);
+
+      /* only the lower 20 bits are used */
+      assert((dw_src[i] & 0xfffff) == dw_src[i]);
+   }
+
+   code[0] = dw0;
+   code[1] = dw1;
+   /* concatenate the bits of dw_src */
+   code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0];
+   code[3] = dw_src[2] << 10 | (dw_src[1] >> 11);
+}
+
+/**
+ * Sanity check the region parameters of the operands.
+ */
+static void
+codegen_validate_region_restrictions(const struct codegen *cg)
+{
+   const int exec_size_map[] = {
+      [BRW_EXECUTE_1] = 1,
+      [BRW_EXECUTE_2] = 2,
+      [BRW_EXECUTE_4] = 4,
+      [BRW_EXECUTE_8] = 8,
+      [BRW_EXECUTE_16] = 16,
+      [BRW_EXECUTE_32] = 32,
+   };
+   const int width_map[] = {
+      [BRW_WIDTH_1] = 1,
+      [BRW_WIDTH_2] = 2,
+      [BRW_WIDTH_4] = 4,
+      [BRW_WIDTH_8] = 8,
+      [BRW_WIDTH_16] = 16,
+   };
+   const int horz_stride_map[] = {
+      [BRW_HORIZONTAL_STRIDE_0] = 0,
+      [BRW_HORIZONTAL_STRIDE_1] = 1,
+      [BRW_HORIZONTAL_STRIDE_2] = 2,
+      [BRW_HORIZONTAL_STRIDE_4] = 4,
+   };
+   const int vert_stride_map[] = {
+      [BRW_VERTICAL_STRIDE_0] = 0,
+      [BRW_VERTICAL_STRIDE_1] = 1,
+      [BRW_VERTICAL_STRIDE_2] = 2,
+      [BRW_VERTICAL_STRIDE_4] = 4,
+      [BRW_VERTICAL_STRIDE_8] = 8,
+      [BRW_VERTICAL_STRIDE_16] = 16,
+      [BRW_VERTICAL_STRIDE_32] = 32,
+      [BRW_VERTICAL_STRIDE_64] = 64,
+      [BRW_VERTICAL_STRIDE_128] = 128,
+      [BRW_VERTICAL_STRIDE_256] = 256,
+      [BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL] = 0,
+   };
+   const int exec_size = exec_size_map[cg->inst->exec_size];
+   int i;
+
+   /* Sandy Bridge PRM, volume 4 part 2, page 94 */
+
+   /* 1. (we don't do 32 anyway) */
+   assert(exec_size <= 16);
+
+   for (i = 0; i < Elements(cg->src); i++) {
+      const int width = width_map[cg->src[i].width];
+      const int horz_stride = horz_stride_map[cg->src[i].horz_stride];
+      const int vert_stride = vert_stride_map[cg->src[i].vert_stride];
+
+      if (src_is_null(cg, i))
+         break;
+
+      /* 3. */
+      assert(exec_size >= width);
+
+      if (exec_size == width) {
+         /* 4. & 5. */
+         if (horz_stride)
+            assert(vert_stride == width * horz_stride);
+      }
+
+      if (width == 1) {
+         /* 6. */
+         assert(horz_stride == 0);
+
+         /* 7. */
+         if (exec_size == 1)
+            assert(vert_stride == 0);
+      }
+
+      /* 8. */
+      if (!vert_stride && !horz_stride)
+         assert(width == 1);
+   }
+
+   /* derived from 10.1.2. & 10.2. */
+   assert(cg->dst.horz_stride != BRW_HORIZONTAL_STRIDE_0);
+}
+
+static unsigned
+translate_vfile(enum toy_file file)
+{
+   switch (file) {
+   case TOY_FILE_ARF:   return BRW_ARCHITECTURE_REGISTER_FILE;
+   case TOY_FILE_GRF:   return BRW_GENERAL_REGISTER_FILE;
+   case TOY_FILE_MRF:   return BRW_MESSAGE_REGISTER_FILE;
+   case TOY_FILE_IMM:   return BRW_IMMEDIATE_VALUE;
+   default:
+      assert(!"unhandled toy file");
+      return BRW_GENERAL_REGISTER_FILE;
+   }
+}
+
+static unsigned
+translate_vtype(enum toy_type type)
+{
+   switch (type) {
+   case TOY_TYPE_F:     return BRW_REGISTER_TYPE_F;
+   case TOY_TYPE_D:     return BRW_REGISTER_TYPE_D;
+   case TOY_TYPE_UD:    return BRW_REGISTER_TYPE_UD;
+   case TOY_TYPE_W:     return BRW_REGISTER_TYPE_W;
+   case TOY_TYPE_UW:    return BRW_REGISTER_TYPE_UW;
+   case TOY_TYPE_V:     return BRW_REGISTER_TYPE_V;
+   default:
+      assert(!"unhandled toy type");
+      return BRW_REGISTER_TYPE_F;
+   }
+}
+
+static unsigned
+translate_writemask(enum toy_writemask writemask)
+{
+   /* TOY_WRITEMASK_* are compatible with the hardware definitions */
+   assert(writemask <= 0xf);
+   return writemask;
+}
+
+static unsigned
+translate_swizzle(enum toy_swizzle swizzle)
+{
+   /* TOY_SWIZZLE_* are compatible with the hardware definitions */
+   assert(swizzle <= 3);
+   return swizzle;
+}
+
+/**
+ * Prepare for generating an instruction.
+ */
+static void
+codegen_prepare(struct codegen *cg, const struct toy_inst *inst,
+                int pc, int rect_linear_width)
+{
+   int i;
+
+   cg->inst = inst;
+   cg->pc = pc;
+
+   cg->flag_sub_reg_num = 0;
+
+   cg->dst.file = translate_vfile(inst->dst.file);
+   cg->dst.type = translate_vtype(inst->dst.type);
+   cg->dst.indirect = inst->dst.indirect;
+   cg->dst.indirect_subreg = inst->dst.indirect_subreg;
+   cg->dst.origin = inst->dst.val32;
+
+   /*
+    * From the Sandy Bridge PRM, volume 4 part 2, page 81:
+    *
+    *     "For a word or an unsigned word immediate data, software must
+    *      replicate the same 16-bit immediate value to both the lower word
+    *      and the high word of the 32-bit immediate field in an instruction."
+    */
+   if (inst->dst.file == TOY_FILE_IMM) {
+      switch (inst->dst.type) {
+      case TOY_TYPE_W:
+      case TOY_TYPE_UW:
+         cg->dst.origin &= 0xffff;
+         cg->dst.origin |= cg->dst.origin << 16;
+         break;
+      default:
+         break;
+      }
+   }
+
+   cg->dst.writemask = translate_writemask(inst->dst.writemask);
+
+   switch (inst->dst.rect) {
+   case TOY_RECT_LINEAR:
+      cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1;
+      break;
+   default:
+      assert(!"unsupported dst region");
+      cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1;
+      break;
+   }
+
+   for (i = 0; i < Elements(cg->src); i++) {
+      struct codegen_src *src = &cg->src[i];
+
+      src->file = translate_vfile(inst->src[i].file);
+      src->type = translate_vtype(inst->src[i].type);
+      src->indirect = inst->src[i].indirect;
+      src->indirect_subreg = inst->src[i].indirect_subreg;
+      src->origin = inst->src[i].val32;
+
+      /* do the same for src */
+      if (inst->dst.file == TOY_FILE_IMM) {
+         switch (inst->src[i].type) {
+         case TOY_TYPE_W:
+         case TOY_TYPE_UW:
+            src->origin &= 0xffff;
+            src->origin |= src->origin << 16;
+            break;
+         default:
+            break;
+         }
+      }
+
+      src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x);
+      src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y);
+      src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z);
+      src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w);
+      src->absolute = inst->src[i].absolute;
+      src->negate = inst->src[i].negate;
+
+      switch (inst->src[i].rect) {
+      case TOY_RECT_LINEAR:
+         switch (rect_linear_width) {
+         case 1:
+            src->vert_stride = BRW_VERTICAL_STRIDE_1;
+            src->width = BRW_WIDTH_1;
+            break;
+         case 2:
+            src->vert_stride = BRW_VERTICAL_STRIDE_2;
+            src->width = BRW_WIDTH_2;
+            break;
+         case 4:
+            src->vert_stride = BRW_VERTICAL_STRIDE_4;
+            src->width = BRW_WIDTH_4;
+            break;
+         case 8:
+            src->vert_stride = BRW_VERTICAL_STRIDE_8;
+            src->width = BRW_WIDTH_8;
+            break;
+         case 16:
+            src->vert_stride = BRW_VERTICAL_STRIDE_16;
+            src->width = BRW_WIDTH_16;
+            break;
+         default:
+            assert(!"unsupported TOY_RECT_LINEAR width");
+            src->vert_stride = BRW_VERTICAL_STRIDE_1;
+            src->width = BRW_WIDTH_1;
+            break;
+         }
+         src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
+         break;
+      case TOY_RECT_041:
+         src->vert_stride = BRW_VERTICAL_STRIDE_0;
+         src->width = BRW_WIDTH_4;
+         src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
+         break;
+      case TOY_RECT_010:
+         src->vert_stride = BRW_VERTICAL_STRIDE_0;
+         src->width = BRW_WIDTH_1;
+         src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
+         break;
+      case TOY_RECT_220:
+         src->vert_stride = BRW_VERTICAL_STRIDE_2;
+         src->width = BRW_WIDTH_2;
+         src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
+         break;
+      case TOY_RECT_440:
+         src->vert_stride = BRW_VERTICAL_STRIDE_4;
+         src->width = BRW_WIDTH_4;
+         src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
+         break;
+      case TOY_RECT_240:
+         src->vert_stride = BRW_VERTICAL_STRIDE_2;
+         src->width = BRW_WIDTH_4;
+         src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
+         break;
+      default:
+         assert(!"unsupported src region");
+         src->vert_stride = BRW_VERTICAL_STRIDE_1;
+         src->width = BRW_WIDTH_1;
+         src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
+         break;
+      }
+   }
+}
+
+/**
+ * Generate HW shader code.  The instructions should have been legalized.
+ */
+void *
+toy_compiler_assemble(struct toy_compiler *tc, int *size)
+{
+   const struct toy_inst *inst;
+   uint32_t *code;
+   int pc;
+
+   code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t));
+   if (!code)
+      return NULL;
+
+   pc = 0;
+   tc_head(tc);
+   while ((inst = tc_next(tc)) != NULL) {
+      uint32_t *dw = &code[pc * 4];
+      struct codegen cg;
+
+      if (pc >= tc->num_instructions) {
+         tc_fail(tc, "wrong instructoun count");
+         break;
+      }
+
+      codegen_prepare(&cg, inst, pc, tc->rect_linear_width);
+      codegen_validate_region_restrictions(&cg);
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MAD:
+         codegen_inst_3src(&cg, dw);
+         break;
+      default:
+         codegen_inst(&cg, dw);
+         break;
+      }
+
+      pc++;
+   }
+
+   /* never return an invalid kernel */
+   if (tc->fail) {
+      FREE(code);
+      return NULL;
+   }
+
+   if (size)
+      *size = pc * 4 * sizeof(uint32_t);
+
+   return code;
+}
diff --git a/src/gallium/drivers/ilo/shader/toy_compiler_disasm.c b/src/gallium/drivers/ilo/shader/toy_compiler_disasm.c
new file mode 100644 (file)
index 0000000..bedbc3d
--- /dev/null
@@ -0,0 +1,1385 @@
+/*
+ * Copyright Â© 2008 Keith Packard
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <stdarg.h>
+
+typedef short GLshort;
+typedef int GLint;
+typedef unsigned char GLubyte;
+typedef unsigned int GLuint;
+typedef float GLfloat;
+#include <stdint.h>
+#include "brw_defines.h"
+#include "brw_structs.h"
+static int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
+
+#include "toy_compiler.h"
+
+void
+toy_compiler_disassemble(struct toy_compiler *tc, const void *kernel, int size)
+{
+   /* set this to true to dump the hex */
+   const bool dump_hex = false;
+   const struct brw_instruction *instructions = kernel;
+   int i;
+
+   for (i = 0; i < size / sizeof(*instructions); i++) {
+      if (dump_hex) {
+         const uint32_t *dwords = (const uint32_t *) &instructions[i];
+         ilo_printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+               dwords[3], dwords[2], dwords[1], dwords[0]);
+      }
+
+      brw_disasm(stderr, (struct brw_instruction *) &instructions[i],
+            ILO_GEN_GET_MAJOR(tc->gen));
+   }
+}
+
+static const struct opcode_desc {
+   char    *name;
+   int     nsrc;
+   int     ndst;
+} opcode_descs[128] = {
+    [BRW_OPCODE_MOV] = { .name = "mov", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_FRC] = { .name = "frc", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDU] = { .name = "rndu", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDD] = { .name = "rndd", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDE] = { .name = "rnde", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_RNDZ] = { .name = "rndz", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_NOT] = { .name = "not", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_LZD] = { .name = "lzd", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_F32TO16] = { .name = "f32to16", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_F16TO32] = { .name = "f16to32", .nsrc = 1, .ndst = 1 },
+
+    [BRW_OPCODE_MUL] = { .name = "mul", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_MAC] = { .name = "mac", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_MACH] = { .name = "mach", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_LINE] = { .name = "line", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_PLN] = { .name = "pln", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_MAD] = { .name = "mad", .nsrc = 3, .ndst = 1 },
+    [BRW_OPCODE_LRP] = { .name = "lrp", .nsrc = 3, .ndst = 1 },
+    [BRW_OPCODE_SAD2] = { .name = "sad2", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SADA2] = { .name = "sada2", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DP4] = { .name = "dp4", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DPH] = { .name = "dph", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DP3] = { .name = "dp3", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_DP2] = { .name = "dp2", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_MATH] = { .name = "math", .nsrc = 2, .ndst = 1 },
+
+    [BRW_OPCODE_AVG] = { .name = "avg", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_ADD] = { .name = "add", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SEL] = { .name = "sel", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_AND] = { .name = "and", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_OR] = { .name = "or", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_XOR] = { .name = "xor", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SHR] = { .name = "shr", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_SHL] = { .name = "shl", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_ASR] = { .name = "asr", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_CMP] = { .name = "cmp", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_CMPN] = { .name = "cmpn", .nsrc = 2, .ndst = 1 },
+
+    [BRW_OPCODE_SEND] = { .name = "send", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_SENDC] = { .name = "sendc", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_NOP] = { .name = "nop", .nsrc = 0, .ndst = 0 },
+    [BRW_OPCODE_JMPI] = { .name = "jmpi", .nsrc = 0, .ndst = 0 },
+    [BRW_OPCODE_IF] = { .name = "if", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_IFF] = { .name = "iff", .nsrc = 2, .ndst = 1 },
+    [BRW_OPCODE_WHILE] = { .name = "while", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_ELSE] = { .name = "else", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_BREAK] = { .name = "break", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_CONTINUE] = { .name = "cont", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_HALT] = { .name = "halt", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_MSAVE] = { .name = "msave", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_PUSH] = { .name = "push", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_MRESTORE] = { .name = "mrest", .nsrc = 1, .ndst = 1 },
+    [BRW_OPCODE_POP] = { .name = "pop", .nsrc = 2, .ndst = 0 },
+    [BRW_OPCODE_WAIT] = { .name = "wait", .nsrc = 1, .ndst = 0 },
+    [BRW_OPCODE_DO] = { .name = "do", .nsrc = 0, .ndst = 0 },
+    [BRW_OPCODE_ENDIF] = { .name = "endif", .nsrc = 2, .ndst = 0 },
+};
+static const struct opcode_desc *opcode = opcode_descs;
+
+static const char * const conditional_modifier[16] = {
+    [BRW_CONDITIONAL_NONE] = "",
+    [BRW_CONDITIONAL_Z] = ".e",
+    [BRW_CONDITIONAL_NZ] = ".ne",
+    [BRW_CONDITIONAL_G] = ".g",
+    [BRW_CONDITIONAL_GE] = ".ge",
+    [BRW_CONDITIONAL_L] = ".l",
+    [BRW_CONDITIONAL_LE] = ".le",
+    [BRW_CONDITIONAL_R] = ".r",
+    [BRW_CONDITIONAL_O] = ".o",
+    [BRW_CONDITIONAL_U] = ".u",
+};
+
+static const char * const negate[2] = {
+    [0] = "",
+    [1] = "-",
+};
+
+static const char * const _abs[2] = {
+    [0] = "",
+    [1] = "(abs)",
+};
+
+static const char * const vert_stride[16] = {
+    [0] = "0",
+    [1] = "1",
+    [2] = "2",
+    [3] = "4",
+    [4] = "8",
+    [5] = "16",
+    [6] = "32",
+    [15] = "VxH",
+};
+
+static const char * const width[8] = {
+    [0] = "1",
+    [1] = "2",
+    [2] = "4",
+    [3] = "8",
+    [4] = "16",
+};
+
+static const char * const horiz_stride[4] = {
+    [0] = "0",
+    [1] = "1",
+    [2] = "2",
+    [3] = "4"
+};
+
+static const char * const chan_sel[4] = {
+    [0] = "x",
+    [1] = "y",
+    [2] = "z",
+    [3] = "w",
+};
+
+static const char * const debug_ctrl[2] = {
+    [0] = "",
+    [1] = ".breakpoint"
+};
+
+static const char * const saturate[2] = {
+    [0] = "",
+    [1] = ".sat"
+};
+
+static const char * const accwr[2] = {
+    [0] = "",
+    [1] = "AccWrEnable"
+};
+
+static const char * const wectrl[2] = {
+    [0] = "WE_normal",
+    [1] = "WE_all"
+};
+
+static const char * const exec_size[8] = {
+    [0] = "1",
+    [1] = "2",
+    [2] = "4",
+    [3] = "8",
+    [4] = "16",
+    [5] = "32"
+};
+
+static const char * const pred_inv[2] = {
+    [0] = "+",
+    [1] = "-"
+};
+
+static const char * const pred_ctrl_align16[16] = {
+    [1] = "",
+    [2] = ".x",
+    [3] = ".y",
+    [4] = ".z",
+    [5] = ".w",
+    [6] = ".any4h",
+    [7] = ".all4h",
+};
+
+static const char * const pred_ctrl_align1[16] = {
+    [1] = "",
+    [2] = ".anyv",
+    [3] = ".allv",
+    [4] = ".any2h",
+    [5] = ".all2h",
+    [6] = ".any4h",
+    [7] = ".all4h",
+    [8] = ".any8h",
+    [9] = ".all8h",
+    [10] = ".any16h",
+    [11] = ".all16h",
+};
+
+static const char * const thread_ctrl[4] = {
+    [0] = "",
+    [2] = "switch"
+};
+
+static const char * const compr_ctrl[4] = {
+    [0] = "",
+    [1] = "sechalf",
+    [2] = "compr",
+    [3] = "compr4",
+};
+
+static const char * const dep_ctrl[4] = {
+    [0] = "",
+    [1] = "NoDDClr",
+    [2] = "NoDDChk",
+    [3] = "NoDDClr,NoDDChk",
+};
+
+static const char * const mask_ctrl[4] = {
+    [0] = "",
+    [1] = "nomask",
+};
+
+static const char * const access_mode[2] = {
+    [0] = "align1",
+    [1] = "align16",
+};
+
+static const char * const reg_encoding[8] = {
+    [0] = "UD",
+    [1] = "D",
+    [2] = "UW",
+    [3] = "W",
+    [4] = "UB",
+    [5] = "B",
+    [7] = "F"
+};
+
+const int reg_type_size[8] = {
+    [0] = 4,
+    [1] = 4,
+    [2] = 2,
+    [3] = 2,
+    [4] = 1,
+    [5] = 1,
+    [7] = 4
+};
+
+static const char * const reg_file[4] = {
+    [0] = "A",
+    [1] = "g",
+    [2] = "m",
+    [3] = "imm",
+};
+
+static const char * const writemask[16] = {
+    [0x0] = ".",
+    [0x1] = ".x",
+    [0x2] = ".y",
+    [0x3] = ".xy",
+    [0x4] = ".z",
+    [0x5] = ".xz",
+    [0x6] = ".yz",
+    [0x7] = ".xyz",
+    [0x8] = ".w",
+    [0x9] = ".xw",
+    [0xa] = ".yw",
+    [0xb] = ".xyw",
+    [0xc] = ".zw",
+    [0xd] = ".xzw",
+    [0xe] = ".yzw",
+    [0xf] = "",
+};
+
+static const char * const end_of_thread[2] = {
+    [0] = "",
+    [1] = "EOT"
+};
+
+static const char * const target_function[16] = {
+    [BRW_SFID_NULL] = "null",
+    [BRW_SFID_MATH] = "math",
+    [BRW_SFID_SAMPLER] = "sampler",
+    [BRW_SFID_MESSAGE_GATEWAY] = "gateway",
+    [BRW_SFID_DATAPORT_READ] = "read",
+    [BRW_SFID_DATAPORT_WRITE] = "write",
+    [BRW_SFID_URB] = "urb",
+    [BRW_SFID_THREAD_SPAWNER] = "thread_spawner"
+};
+
+static const char * const target_function_gen6[16] = {
+    [BRW_SFID_NULL] = "null",
+    [BRW_SFID_MATH] = "math",
+    [BRW_SFID_SAMPLER] = "sampler",
+    [BRW_SFID_MESSAGE_GATEWAY] = "gateway",
+    [BRW_SFID_URB] = "urb",
+    [BRW_SFID_THREAD_SPAWNER] = "thread_spawner",
+    [GEN6_SFID_DATAPORT_SAMPLER_CACHE] = "sampler",
+    [GEN6_SFID_DATAPORT_RENDER_CACHE] = "render",
+    [GEN6_SFID_DATAPORT_CONSTANT_CACHE] = "const",
+    [GEN7_SFID_DATAPORT_DATA_CACHE] = "data"
+};
+
+static const char * const dp_rc_msg_type_gen6[16] = {
+    [BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ] = "OWORD block read",
+    [GEN6_DATAPORT_READ_MESSAGE_RENDER_UNORM_READ] = "RT UNORM read",
+    [GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ] = "OWORD dual block read",
+    [GEN6_DATAPORT_READ_MESSAGE_MEDIA_BLOCK_READ] = "media block read",
+    [GEN6_DATAPORT_READ_MESSAGE_OWORD_UNALIGN_BLOCK_READ] = "OWORD unaligned block read",
+    [GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ] = "DWORD scattered read",
+    [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_ATOMIC_WRITE] = "DWORD atomic write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE] = "OWORD block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE] = "OWORD dual block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_MEDIA_BLOCK_WRITE] = "media block write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE] = "DWORD scattered write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE] = "RT write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE] = "streamed VB write",
+    [GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE] = "RT UNORMc write",
+};
+
+static const char * const math_function[16] = {
+    [BRW_MATH_FUNCTION_INV] = "inv",
+    [BRW_MATH_FUNCTION_LOG] = "log",
+    [BRW_MATH_FUNCTION_EXP] = "exp",
+    [BRW_MATH_FUNCTION_SQRT] = "sqrt",
+    [BRW_MATH_FUNCTION_RSQ] = "rsq",
+    [BRW_MATH_FUNCTION_SIN] = "sin",
+    [BRW_MATH_FUNCTION_COS] = "cos",
+    [BRW_MATH_FUNCTION_SINCOS] = "sincos",
+    [BRW_MATH_FUNCTION_FDIV] = "fdiv",
+    [BRW_MATH_FUNCTION_POW] = "pow",
+    [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER] = "intdivmod",
+    [BRW_MATH_FUNCTION_INT_DIV_QUOTIENT] = "intdiv",
+    [BRW_MATH_FUNCTION_INT_DIV_REMAINDER] = "intmod",
+};
+
+static const char * const math_saturate[2] = {
+    [0] = "",
+    [1] = "sat"
+};
+
+static const char * const math_signed[2] = {
+    [0] = "",
+    [1] = "signed"
+};
+
+static const char * const math_scalar[2] = {
+    [0] = "",
+    [1] = "scalar"
+};
+
+static const char * const math_precision[2] = {
+    [0] = "",
+    [1] = "partial_precision"
+};
+
+static const char * const urb_opcode[2] = {
+    [0] = "urb_write",
+    [1] = "ff_sync",
+};
+
+static const char * const urb_swizzle[4] = {
+    [BRW_URB_SWIZZLE_NONE] = "",
+    [BRW_URB_SWIZZLE_INTERLEAVE] = "interleave",
+    [BRW_URB_SWIZZLE_TRANSPOSE] = "transpose",
+};
+
+static const char * const urb_allocate[2] = {
+    [0] = "",
+    [1] = "allocate"
+};
+
+static const char * const urb_used[2] = {
+    [0] = "",
+    [1] = "used"
+};
+
+static const char * const urb_complete[2] = {
+    [0] = "",
+    [1] = "complete"
+};
+
+static const char * const sampler_target_format[4] = {
+    [0] = "F",
+    [2] = "UD",
+    [3] = "D"
+};
+
+
+static int column;
+
+static int string (FILE *file, const char *string)
+{
+    fputs (string, file);
+    column += strlen (string);
+    return 0;
+}
+
+static int format (FILE *f, const char *format, ...)
+{
+    char    buf[1024];
+    va_list    args;
+    va_start (args, format);
+
+    vsnprintf (buf, sizeof (buf) - 1, format, args);
+    va_end (args);
+    string (f, buf);
+    return 0;
+}
+
+static int newline (FILE *f)
+{
+    putc ('\n', f);
+    column = 0;
+    return 0;
+}
+
+static int pad (FILE *f, int c)
+{
+    do
+       string (f, " ");
+    while (column < c);
+    return 0;
+}
+
+static int control (FILE *file, const char *name, const char * const ctrl[],
+                    GLuint id, int *space)
+{
+    if (!ctrl[id]) {
+       fprintf (file, "*** invalid %s value %d ",
+                name, id);
+       return 1;
+    }
+    if (ctrl[id][0])
+    {
+       if (space && *space)
+           string (file, " ");
+       string (file, ctrl[id]);
+       if (space)
+           *space = 1;
+    }
+    return 0;
+}
+
+static int print_opcode (FILE *file, int id)
+{
+    if (!opcode[id].name) {
+       format (file, "*** invalid opcode value %d ", id);
+       return 1;
+    }
+    string (file, opcode[id].name);
+    return 0;
+}
+
+static int reg (FILE *file, GLuint _reg_file, GLuint _reg_nr)
+{
+    int        err = 0;
+
+    /* Clear the Compr4 instruction compression bit. */
+    if (_reg_file == BRW_MESSAGE_REGISTER_FILE)
+       _reg_nr &= ~(1 << 7);
+
+    if (_reg_file == BRW_ARCHITECTURE_REGISTER_FILE) {
+       switch (_reg_nr & 0xf0) {
+       case BRW_ARF_NULL:
+           string (file, "null");
+           return -1;
+       case BRW_ARF_ADDRESS:
+           format (file, "a%d", _reg_nr & 0x0f);
+           break;
+       case BRW_ARF_ACCUMULATOR:
+           format (file, "acc%d", _reg_nr & 0x0f);
+           break;
+       case BRW_ARF_FLAG:
+           format (file, "f%d", _reg_nr & 0x0f);
+           break;
+       case BRW_ARF_MASK:
+           format (file, "mask%d", _reg_nr & 0x0f);
+           break;
+       case BRW_ARF_MASK_STACK:
+           format (file, "msd%d", _reg_nr & 0x0f);
+           break;
+       case BRW_ARF_STATE:
+           format (file, "sr%d", _reg_nr & 0x0f);
+           break;
+       case BRW_ARF_CONTROL:
+           format (file, "cr%d", _reg_nr & 0x0f);
+           break;
+       case BRW_ARF_NOTIFICATION_COUNT:
+           format (file, "n%d", _reg_nr & 0x0f);
+           break;
+       case BRW_ARF_IP:
+           string (file, "ip");
+           return -1;
+           break;
+       default:
+           format (file, "ARF%d", _reg_nr);
+           break;
+       }
+    } else {
+       err  |= control (file, "src reg file", reg_file, _reg_file, NULL);
+       format (file, "%d", _reg_nr);
+    }
+    return err;
+}
+
+static int dest (FILE *file, struct brw_instruction *inst)
+{
+    int        err = 0;
+
+    if (inst->header.access_mode == BRW_ALIGN_1)
+    {
+       if (inst->bits1.da1.dest_address_mode == BRW_ADDRESS_DIRECT)
+       {
+           err |= reg (file, inst->bits1.da1.dest_reg_file, inst->bits1.da1.dest_reg_nr);
+           if (err == -1)
+               return 0;
+           if (inst->bits1.da1.dest_subreg_nr)
+               format (file, ".%d", inst->bits1.da1.dest_subreg_nr /
+                                    reg_type_size[inst->bits1.da1.dest_reg_type]);
+           string (file, "<");
+           err |= control (file, "horiz stride", horiz_stride, inst->bits1.da1.dest_horiz_stride, NULL);
+           string (file, ">");
+           err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da1.dest_reg_type, NULL);
+       }
+       else
+       {
+           string (file, "g[a0");
+           if (inst->bits1.ia1.dest_subreg_nr)
+               format (file, ".%d", inst->bits1.ia1.dest_subreg_nr /
+                                       reg_type_size[inst->bits1.ia1.dest_reg_type]);
+           if (inst->bits1.ia1.dest_indirect_offset)
+               format (file, " %d", inst->bits1.ia1.dest_indirect_offset);
+           string (file, "]<");
+           err |= control (file, "horiz stride", horiz_stride, inst->bits1.ia1.dest_horiz_stride, NULL);
+           string (file, ">");
+           err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.ia1.dest_reg_type, NULL);
+       }
+    }
+    else
+    {
+       if (inst->bits1.da16.dest_address_mode == BRW_ADDRESS_DIRECT)
+       {
+           err |= reg (file, inst->bits1.da16.dest_reg_file, inst->bits1.da16.dest_reg_nr);
+           if (err == -1)
+               return 0;
+           if (inst->bits1.da16.dest_subreg_nr)
+               format (file, ".%d", inst->bits1.da16.dest_subreg_nr /
+                                    reg_type_size[inst->bits1.da16.dest_reg_type]);
+           string (file, "<1>");
+           err |= control (file, "writemask", writemask, inst->bits1.da16.dest_writemask, NULL);
+           err |= control (file, "dest reg encoding", reg_encoding, inst->bits1.da16.dest_reg_type, NULL);
+       }
+       else
+       {
+           err = 1;
+           string (file, "Indirect align16 address mode not supported");
+       }
+    }
+
+    return 0;
+}
+
+static int dest_3src (FILE *file, struct brw_instruction *inst)
+{
+    int        err = 0;
+    uint32_t reg_file;
+
+    if (inst->bits1.da3src.dest_reg_file)
+       reg_file = BRW_MESSAGE_REGISTER_FILE;
+    else
+       reg_file = BRW_GENERAL_REGISTER_FILE;
+
+    err |= reg (file, reg_file, inst->bits1.da3src.dest_reg_nr);
+    if (err == -1)
+       return 0;
+    if (inst->bits1.da3src.dest_subreg_nr)
+       format (file, ".%d", inst->bits1.da3src.dest_subreg_nr);
+    string (file, "<1>");
+    err |= control (file, "writemask", writemask, inst->bits1.da3src.dest_writemask, NULL);
+    err |= control (file, "dest reg encoding", reg_encoding, BRW_REGISTER_TYPE_F, NULL);
+
+    return 0;
+}
+
+static int src_align1_region (FILE *file,
+                             GLuint _vert_stride, GLuint _width, GLuint _horiz_stride)
+{
+    int err = 0;
+    string (file, "<");
+    err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+    string (file, ",");
+    err |= control (file, "width", width, _width, NULL);
+    string (file, ",");
+    err |= control (file, "horiz_stride", horiz_stride, _horiz_stride, NULL);
+    string (file, ">");
+    return err;
+}
+
+static int src_da1 (FILE *file, GLuint type, GLuint _reg_file,
+                   GLuint _vert_stride, GLuint _width, GLuint _horiz_stride,
+                   GLuint reg_num, GLuint sub_reg_num, GLuint __abs, GLuint _negate)
+{
+    int err = 0;
+    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "abs", _abs, __abs, NULL);
+
+    err |= reg (file, _reg_file, reg_num);
+    if (err == -1)
+       return 0;
+    if (sub_reg_num)
+       format (file, ".%d", sub_reg_num / reg_type_size[type]); /* use formal style like spec */
+    src_align1_region (file, _vert_stride, _width, _horiz_stride);
+    err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+    return err;
+}
+
+static int src_ia1 (FILE *file,
+                   GLuint type,
+                   GLuint _reg_file,
+                   GLint _addr_imm,
+                   GLuint _addr_subreg_nr,
+                   GLuint _negate,
+                   GLuint __abs,
+                   GLuint _addr_mode,
+                   GLuint _horiz_stride,
+                   GLuint _width,
+                   GLuint _vert_stride)
+{
+    int err = 0;
+    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "abs", _abs, __abs, NULL);
+
+    string (file, "g[a0");
+    if (_addr_subreg_nr)
+       format (file, ".%d", _addr_subreg_nr);
+    if (_addr_imm)
+       format (file, " %d", _addr_imm);
+    string (file, "]");
+    src_align1_region (file, _vert_stride, _width, _horiz_stride);
+    err |= control (file, "src reg encoding", reg_encoding, type, NULL);
+    return err;
+}
+
+static int src_da16 (FILE *file,
+                    GLuint _reg_type,
+                    GLuint _reg_file,
+                    GLuint _vert_stride,
+                    GLuint _reg_nr,
+                    GLuint _subreg_nr,
+                    GLuint __abs,
+                    GLuint _negate,
+                    GLuint swz_x,
+                    GLuint swz_y,
+                    GLuint swz_z,
+                    GLuint swz_w)
+{
+    int err = 0;
+    err |= control (file, "negate", negate, _negate, NULL);
+    err |= control (file, "abs", _abs, __abs, NULL);
+
+    err |= reg (file, _reg_file, _reg_nr);
+    if (err == -1)
+       return 0;
+    if (_subreg_nr)
+       /* bit4 for subreg number byte addressing. Make this same meaning as
+          in da1 case, so output looks consistent. */
+       format (file, ".%d", 16 / reg_type_size[_reg_type]);
+    string (file, "<");
+    err |= control (file, "vert stride", vert_stride, _vert_stride, NULL);
+    string (file, ",4,1>");
+    /*
+     * Three kinds of swizzle display:
+     *  identity - nothing printed
+     *  1->all  - print the single channel
+     *  1->1     - print the mapping
+     */
+    if (swz_x == BRW_CHANNEL_X &&
+       swz_y == BRW_CHANNEL_Y &&
+       swz_z == BRW_CHANNEL_Z &&
+       swz_w == BRW_CHANNEL_W)
+    {
+       ;
+    }
+    else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+    {
+       string (file, ".");
+       err |= control (file, "channel select", chan_sel, swz_x, NULL);
+    }
+    else
+    {
+       string (file, ".");
+       err |= control (file, "channel select", chan_sel, swz_x, NULL);
+       err |= control (file, "channel select", chan_sel, swz_y, NULL);
+       err |= control (file, "channel select", chan_sel, swz_z, NULL);
+       err |= control (file, "channel select", chan_sel, swz_w, NULL);
+    }
+    err |= control (file, "src da16 reg type", reg_encoding, _reg_type, NULL);
+    return err;
+}
+
+static int src0_3src (FILE *file, struct brw_instruction *inst)
+{
+    int err = 0;
+    GLuint swz_x = (inst->bits2.da3src.src0_swizzle >> 0) & 0x3;
+    GLuint swz_y = (inst->bits2.da3src.src0_swizzle >> 2) & 0x3;
+    GLuint swz_z = (inst->bits2.da3src.src0_swizzle >> 4) & 0x3;
+    GLuint swz_w = (inst->bits2.da3src.src0_swizzle >> 6) & 0x3;
+
+    err |= control (file, "negate", negate, inst->bits1.da3src.src0_negate, NULL);
+    err |= control (file, "abs", _abs, inst->bits1.da3src.src0_abs, NULL);
+
+    err |= reg (file, BRW_GENERAL_REGISTER_FILE, inst->bits2.da3src.src0_reg_nr);
+    if (err == -1)
+       return 0;
+    if (inst->bits2.da3src.src0_subreg_nr)
+       format (file, ".%d", inst->bits2.da3src.src0_subreg_nr);
+    string (file, "<4,1,1>");
+    err |= control (file, "src da16 reg type", reg_encoding,
+                   BRW_REGISTER_TYPE_F, NULL);
+    /*
+     * Three kinds of swizzle display:
+     *  identity - nothing printed
+     *  1->all  - print the single channel
+     *  1->1     - print the mapping
+     */
+    if (swz_x == BRW_CHANNEL_X &&
+       swz_y == BRW_CHANNEL_Y &&
+       swz_z == BRW_CHANNEL_Z &&
+       swz_w == BRW_CHANNEL_W)
+    {
+       ;
+    }
+    else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+    {
+       string (file, ".");
+       err |= control (file, "channel select", chan_sel, swz_x, NULL);
+    }
+    else
+    {
+       string (file, ".");
+       err |= control (file, "channel select", chan_sel, swz_x, NULL);
+       err |= control (file, "channel select", chan_sel, swz_y, NULL);
+       err |= control (file, "channel select", chan_sel, swz_z, NULL);
+       err |= control (file, "channel select", chan_sel, swz_w, NULL);
+    }
+    return err;
+}
+
+static int src1_3src (FILE *file, struct brw_instruction *inst)
+{
+    int err = 0;
+    GLuint swz_x = (inst->bits2.da3src.src1_swizzle >> 0) & 0x3;
+    GLuint swz_y = (inst->bits2.da3src.src1_swizzle >> 2) & 0x3;
+    GLuint swz_z = (inst->bits2.da3src.src1_swizzle >> 4) & 0x3;
+    GLuint swz_w = (inst->bits2.da3src.src1_swizzle >> 6) & 0x3;
+    GLuint src1_subreg_nr = (inst->bits2.da3src.src1_subreg_nr_low |
+                            (inst->bits3.da3src.src1_subreg_nr_high << 2));
+
+    err |= control (file, "negate", negate, inst->bits1.da3src.src1_negate,
+                   NULL);
+    err |= control (file, "abs", _abs, inst->bits1.da3src.src1_abs, NULL);
+
+    err |= reg (file, BRW_GENERAL_REGISTER_FILE,
+               inst->bits3.da3src.src1_reg_nr);
+    if (err == -1)
+       return 0;
+    if (src1_subreg_nr)
+       format (file, ".%d", src1_subreg_nr);
+    string (file, "<4,1,1>");
+    err |= control (file, "src da16 reg type", reg_encoding,
+                   BRW_REGISTER_TYPE_F, NULL);
+    /*
+     * Three kinds of swizzle display:
+     *  identity - nothing printed
+     *  1->all  - print the single channel
+     *  1->1     - print the mapping
+     */
+    if (swz_x == BRW_CHANNEL_X &&
+       swz_y == BRW_CHANNEL_Y &&
+       swz_z == BRW_CHANNEL_Z &&
+       swz_w == BRW_CHANNEL_W)
+    {
+       ;
+    }
+    else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+    {
+       string (file, ".");
+       err |= control (file, "channel select", chan_sel, swz_x, NULL);
+    }
+    else
+    {
+       string (file, ".");
+       err |= control (file, "channel select", chan_sel, swz_x, NULL);
+       err |= control (file, "channel select", chan_sel, swz_y, NULL);
+       err |= control (file, "channel select", chan_sel, swz_z, NULL);
+       err |= control (file, "channel select", chan_sel, swz_w, NULL);
+    }
+    return err;
+}
+
+
+static int src2_3src (FILE *file, struct brw_instruction *inst)
+{
+    int err = 0;
+    GLuint swz_x = (inst->bits3.da3src.src2_swizzle >> 0) & 0x3;
+    GLuint swz_y = (inst->bits3.da3src.src2_swizzle >> 2) & 0x3;
+    GLuint swz_z = (inst->bits3.da3src.src2_swizzle >> 4) & 0x3;
+    GLuint swz_w = (inst->bits3.da3src.src2_swizzle >> 6) & 0x3;
+
+    err |= control (file, "negate", negate, inst->bits1.da3src.src2_negate,
+                   NULL);
+    err |= control (file, "abs", _abs, inst->bits1.da3src.src2_abs, NULL);
+
+    err |= reg (file, BRW_GENERAL_REGISTER_FILE,
+               inst->bits3.da3src.src2_reg_nr);
+    if (err == -1)
+       return 0;
+    if (inst->bits3.da3src.src2_subreg_nr)
+       format (file, ".%d", inst->bits3.da3src.src2_subreg_nr);
+    string (file, "<4,1,1>");
+    err |= control (file, "src da16 reg type", reg_encoding,
+                   BRW_REGISTER_TYPE_F, NULL);
+    /*
+     * Three kinds of swizzle display:
+     *  identity - nothing printed
+     *  1->all  - print the single channel
+     *  1->1     - print the mapping
+     */
+    if (swz_x == BRW_CHANNEL_X &&
+       swz_y == BRW_CHANNEL_Y &&
+       swz_z == BRW_CHANNEL_Z &&
+       swz_w == BRW_CHANNEL_W)
+    {
+       ;
+    }
+    else if (swz_x == swz_y && swz_x == swz_z && swz_x == swz_w)
+    {
+       string (file, ".");
+       err |= control (file, "channel select", chan_sel, swz_x, NULL);
+    }
+    else
+    {
+       string (file, ".");
+       err |= control (file, "channel select", chan_sel, swz_x, NULL);
+       err |= control (file, "channel select", chan_sel, swz_y, NULL);
+       err |= control (file, "channel select", chan_sel, swz_z, NULL);
+       err |= control (file, "channel select", chan_sel, swz_w, NULL);
+    }
+    return err;
+}
+
+static int imm (FILE *file, GLuint type, struct brw_instruction *inst) {
+    switch (type) {
+    case BRW_REGISTER_TYPE_UD:
+       format (file, "0x%08xUD", inst->bits3.ud);
+       break;
+    case BRW_REGISTER_TYPE_D:
+       format (file, "%dD", inst->bits3.d);
+       break;
+    case BRW_REGISTER_TYPE_UW:
+       format (file, "0x%04xUW", (uint16_t) inst->bits3.ud);
+       break;
+    case BRW_REGISTER_TYPE_W:
+       format (file, "%dW", (int16_t) inst->bits3.d);
+       break;
+    case BRW_REGISTER_TYPE_UB:
+       format (file, "0x%02xUB", (int8_t) inst->bits3.ud);
+       break;
+    case BRW_REGISTER_TYPE_VF:
+       format (file, "Vector Float");
+       break;
+    case BRW_REGISTER_TYPE_V:
+       format (file, "0x%08xV", inst->bits3.ud);
+       break;
+    case BRW_REGISTER_TYPE_F:
+       format (file, "%-gF", inst->bits3.f);
+    }
+    return 0;
+}
+
+static int src0 (FILE *file, struct brw_instruction *inst)
+{
+    if (inst->bits1.da1.src0_reg_file == BRW_IMMEDIATE_VALUE)
+       return imm (file, inst->bits1.da1.src0_reg_type,
+                   inst);
+    else if (inst->header.access_mode == BRW_ALIGN_1)
+    {
+       if (inst->bits2.da1.src0_address_mode == BRW_ADDRESS_DIRECT)
+       {
+           return src_da1 (file,
+                           inst->bits1.da1.src0_reg_type,
+                           inst->bits1.da1.src0_reg_file,
+                           inst->bits2.da1.src0_vert_stride,
+                           inst->bits2.da1.src0_width,
+                           inst->bits2.da1.src0_horiz_stride,
+                           inst->bits2.da1.src0_reg_nr,
+                           inst->bits2.da1.src0_subreg_nr,
+                           inst->bits2.da1.src0_abs,
+                           inst->bits2.da1.src0_negate);
+       }
+       else
+       {
+           return src_ia1 (file,
+                           inst->bits1.ia1.src0_reg_type,
+                           inst->bits1.ia1.src0_reg_file,
+                           inst->bits2.ia1.src0_indirect_offset,
+                           inst->bits2.ia1.src0_subreg_nr,
+                           inst->bits2.ia1.src0_negate,
+                           inst->bits2.ia1.src0_abs,
+                           inst->bits2.ia1.src0_address_mode,
+                           inst->bits2.ia1.src0_horiz_stride,
+                           inst->bits2.ia1.src0_width,
+                           inst->bits2.ia1.src0_vert_stride);
+       }
+    }
+    else
+    {
+       if (inst->bits2.da16.src0_address_mode == BRW_ADDRESS_DIRECT)
+       {
+           return src_da16 (file,
+                            inst->bits1.da16.src0_reg_type,
+                            inst->bits1.da16.src0_reg_file,
+                            inst->bits2.da16.src0_vert_stride,
+                            inst->bits2.da16.src0_reg_nr,
+                            inst->bits2.da16.src0_subreg_nr,
+                            inst->bits2.da16.src0_abs,
+                            inst->bits2.da16.src0_negate,
+                            inst->bits2.da16.src0_swz_x,
+                            inst->bits2.da16.src0_swz_y,
+                            inst->bits2.da16.src0_swz_z,
+                            inst->bits2.da16.src0_swz_w);
+       }
+       else
+       {
+           string (file, "Indirect align16 address mode not supported");
+           return 1;
+       }
+    }
+}
+
+static int src1 (FILE *file, struct brw_instruction *inst)
+{
+    if (inst->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE)
+       return imm (file, inst->bits1.da1.src1_reg_type,
+                   inst);
+    else if (inst->header.access_mode == BRW_ALIGN_1)
+    {
+       if (inst->bits3.da1.src1_address_mode == BRW_ADDRESS_DIRECT)
+       {
+           return src_da1 (file,
+                           inst->bits1.da1.src1_reg_type,
+                           inst->bits1.da1.src1_reg_file,
+                           inst->bits3.da1.src1_vert_stride,
+                           inst->bits3.da1.src1_width,
+                           inst->bits3.da1.src1_horiz_stride,
+                           inst->bits3.da1.src1_reg_nr,
+                           inst->bits3.da1.src1_subreg_nr,
+                           inst->bits3.da1.src1_abs,
+                           inst->bits3.da1.src1_negate);
+       }
+       else
+       {
+           return src_ia1 (file,
+                           inst->bits1.ia1.src1_reg_type,
+                           inst->bits1.ia1.src1_reg_file,
+                           inst->bits3.ia1.src1_indirect_offset,
+                           inst->bits3.ia1.src1_subreg_nr,
+                           inst->bits3.ia1.src1_negate,
+                           inst->bits3.ia1.src1_abs,
+                           inst->bits3.ia1.src1_address_mode,
+                           inst->bits3.ia1.src1_horiz_stride,
+                           inst->bits3.ia1.src1_width,
+                           inst->bits3.ia1.src1_vert_stride);
+       }
+    }
+    else
+    {
+       if (inst->bits3.da16.src1_address_mode == BRW_ADDRESS_DIRECT)
+       {
+           return src_da16 (file,
+                            inst->bits1.da16.src1_reg_type,
+                            inst->bits1.da16.src1_reg_file,
+                            inst->bits3.da16.src1_vert_stride,
+                            inst->bits3.da16.src1_reg_nr,
+                            inst->bits3.da16.src1_subreg_nr,
+                            inst->bits3.da16.src1_abs,
+                            inst->bits3.da16.src1_negate,
+                            inst->bits3.da16.src1_swz_x,
+                            inst->bits3.da16.src1_swz_y,
+                            inst->bits3.da16.src1_swz_z,
+                            inst->bits3.da16.src1_swz_w);
+       }
+       else
+       {
+           string (file, "Indirect align16 address mode not supported");
+           return 1;
+       }
+    }
+}
+
+int esize[6] = {
+       [0] = 1,
+       [1] = 2,
+       [2] = 4,
+       [3] = 8,
+       [4] = 16,
+       [5] = 32,
+};
+
+static int qtr_ctrl(FILE *file, struct brw_instruction *inst)
+{
+    int qtr_ctl = inst->header.compression_control;
+    int exec_size = esize[inst->header.execution_size];
+
+    if (exec_size == 8) {
+       switch (qtr_ctl) {
+       case 0:
+           string (file, " 1Q");
+           break;
+       case 1:
+           string (file, " 2Q");
+           break;
+       case 2:
+           string (file, " 3Q");
+           break;
+       case 3:
+           string (file, " 4Q");
+           break;
+       }
+    } else if (exec_size == 16){
+       if (qtr_ctl < 2)
+           string (file, " 1H");
+       else
+           string (file, " 2H");
+    }
+    return 0;
+}
+
+int brw_disasm (FILE *file, struct brw_instruction *inst, int gen)
+{
+    int        err = 0;
+    int space = 0;
+
+    if (inst->header.predicate_control) {
+       string (file, "(");
+       err |= control (file, "predicate inverse", pred_inv, inst->header.predicate_inverse, NULL);
+       format (file, "f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0);
+       if (inst->bits2.da1.flag_subreg_nr)
+           format (file, ".%d", inst->bits2.da1.flag_subreg_nr);
+       if (inst->header.access_mode == BRW_ALIGN_1)
+           err |= control (file, "predicate control align1", pred_ctrl_align1,
+                           inst->header.predicate_control, NULL);
+       else
+           err |= control (file, "predicate control align16", pred_ctrl_align16,
+                           inst->header.predicate_control, NULL);
+       string (file, ") ");
+    }
+
+    err |= print_opcode (file, inst->header.opcode);
+    err |= control (file, "saturate", saturate, inst->header.saturate, NULL);
+    err |= control (file, "debug control", debug_ctrl, inst->header.debug_control, NULL);
+
+    if (inst->header.opcode == BRW_OPCODE_MATH) {
+       string (file, " ");
+       err |= control (file, "function", math_function,
+                       inst->header.destreg__conditionalmod, NULL);
+    } else if (inst->header.opcode != BRW_OPCODE_SEND &&
+              inst->header.opcode != BRW_OPCODE_SENDC) {
+       err |= control (file, "conditional modifier", conditional_modifier,
+                       inst->header.destreg__conditionalmod, NULL);
+
+        /* If we're using the conditional modifier, print which flags reg is
+         * used for it.  Note that on gen6+, the embedded-condition SEL and
+         * control flow doesn't update flags.
+         */
+       if (inst->header.destreg__conditionalmod &&
+            (gen < 6 || (inst->header.opcode != BRW_OPCODE_SEL &&
+                         inst->header.opcode != BRW_OPCODE_IF &&
+                         inst->header.opcode != BRW_OPCODE_WHILE))) {
+           format (file, ".f%d", gen >= 7 ? inst->bits2.da1.flag_reg_nr : 0);
+           if (inst->bits2.da1.flag_subreg_nr)
+               format (file, ".%d", inst->bits2.da1.flag_subreg_nr);
+        }
+    }
+
+    if (inst->header.opcode != BRW_OPCODE_NOP) {
+       string (file, "(");
+       err |= control (file, "execution size", exec_size, inst->header.execution_size, NULL);
+       string (file, ")");
+    }
+
+    if (inst->header.opcode == BRW_OPCODE_SEND && gen < 6)
+       format (file, " %d", inst->header.destreg__conditionalmod);
+
+    if (opcode[inst->header.opcode].nsrc == 3) {
+       pad (file, 16);
+       err |= dest_3src (file, inst);
+
+       pad (file, 32);
+       err |= src0_3src (file, inst);
+
+       pad (file, 48);
+       err |= src1_3src (file, inst);
+
+       pad (file, 64);
+       err |= src2_3src (file, inst);
+    } else {
+       if (opcode[inst->header.opcode].ndst > 0) {
+         pad (file, 16);
+         err |= dest (file, inst);
+       } else if (gen == 7 && (inst->header.opcode == BRW_OPCODE_ELSE ||
+                              inst->header.opcode == BRW_OPCODE_ENDIF ||
+                              inst->header.opcode == BRW_OPCODE_WHILE)) {
+         format (file, " %d", inst->bits3.break_cont.jip);
+       } else if (gen == 6 && (inst->header.opcode == BRW_OPCODE_IF ||
+                              inst->header.opcode == BRW_OPCODE_ELSE ||
+                              inst->header.opcode == BRW_OPCODE_ENDIF ||
+                              inst->header.opcode == BRW_OPCODE_WHILE)) {
+         format (file, " %d", inst->bits1.branch_gen6.jump_count);
+       } else if ((gen >= 6 && (inst->header.opcode == BRW_OPCODE_BREAK ||
+                                inst->header.opcode == BRW_OPCODE_CONTINUE ||
+                                inst->header.opcode == BRW_OPCODE_HALT)) ||
+                  (gen == 7 && inst->header.opcode == BRW_OPCODE_IF)) {
+         format (file, " %d %d", inst->bits3.break_cont.uip, inst->bits3.break_cont.jip);
+       } else if (inst->header.opcode == BRW_OPCODE_JMPI) {
+         format (file, " %d", inst->bits3.d);
+       }
+
+       if (opcode[inst->header.opcode].nsrc > 0) {
+         pad (file, 32);
+         err |= src0 (file, inst);
+       }
+       if (opcode[inst->header.opcode].nsrc > 1) {
+         pad (file, 48);
+         err |= src1 (file, inst);
+       }
+    }
+
+    if (inst->header.opcode == BRW_OPCODE_SEND ||
+       inst->header.opcode == BRW_OPCODE_SENDC) {
+       enum brw_message_target target;
+
+       if (gen >= 6)
+           target = inst->header.destreg__conditionalmod;
+       else if (gen == 5)
+           target = inst->bits2.send_gen5.sfid;
+       else
+           target = inst->bits3.generic.msg_target;
+
+       newline (file);
+       pad (file, 16);
+       space = 0;
+
+       if (gen >= 6) {
+          err |= control (file, "target function", target_function_gen6,
+                          target, &space);
+       } else {
+          err |= control (file, "target function", target_function,
+                          target, &space);
+       }
+
+       switch (target) {
+       case BRW_SFID_MATH:
+           err |= control (file, "math function", math_function,
+                           inst->bits3.math.function, &space);
+           err |= control (file, "math saturate", math_saturate,
+                           inst->bits3.math.saturate, &space);
+           err |= control (file, "math signed", math_signed,
+                           inst->bits3.math.int_type, &space);
+           err |= control (file, "math scalar", math_scalar,
+                           inst->bits3.math.data_type, &space);
+           err |= control (file, "math precision", math_precision,
+                           inst->bits3.math.precision, &space);
+           break;
+       case BRW_SFID_SAMPLER:
+           if (gen >= 7) {
+               format (file, " (%d, %d, %d, %d)",
+                       inst->bits3.sampler_gen7.binding_table_index,
+                       inst->bits3.sampler_gen7.sampler,
+                       inst->bits3.sampler_gen7.msg_type,
+                       inst->bits3.sampler_gen7.simd_mode);
+           } else if (gen >= 5) {
+               format (file, " (%d, %d, %d, %d)",
+                       inst->bits3.sampler_gen5.binding_table_index,
+                       inst->bits3.sampler_gen5.sampler,
+                       inst->bits3.sampler_gen5.msg_type,
+                       inst->bits3.sampler_gen5.simd_mode);
+           } else if (0 /* FINISHME: is_g4x */) {
+               format (file, " (%d, %d)",
+                       inst->bits3.sampler_g4x.binding_table_index,
+                       inst->bits3.sampler_g4x.sampler);
+           } else {
+               format (file, " (%d, %d, ",
+                       inst->bits3.sampler.binding_table_index,
+                       inst->bits3.sampler.sampler);
+               err |= control (file, "sampler target format",
+                               sampler_target_format,
+                               inst->bits3.sampler.return_format, NULL);
+               string (file, ")");
+           }
+           break;
+       case BRW_SFID_DATAPORT_READ:
+           if (gen >= 6) {
+               format (file, " (%d, %d, %d, %d)",
+                       inst->bits3.gen6_dp.binding_table_index,
+                       inst->bits3.gen6_dp.msg_control,
+                       inst->bits3.gen6_dp.msg_type,
+                       inst->bits3.gen6_dp.send_commit_msg);
+           } else if (gen >= 5 /* FINISHME: || is_g4x */) {
+               format (file, " (%d, %d, %d)",
+                       inst->bits3.dp_read_gen5.binding_table_index,
+                       inst->bits3.dp_read_gen5.msg_control,
+                       inst->bits3.dp_read_gen5.msg_type);
+           } else {
+               format (file, " (%d, %d, %d)",
+                       inst->bits3.dp_read.binding_table_index,
+                       inst->bits3.dp_read.msg_control,
+                       inst->bits3.dp_read.msg_type);
+           }
+           break;
+
+       case BRW_SFID_DATAPORT_WRITE:
+           if (gen >= 7) {
+               format (file, " (");
+
+               err |= control (file, "DP rc message type",
+                               dp_rc_msg_type_gen6,
+                               inst->bits3.gen7_dp.msg_type, &space);
+
+               format (file, ", %d, %d, %d)",
+                       inst->bits3.gen7_dp.binding_table_index,
+                       inst->bits3.gen7_dp.msg_control,
+                       inst->bits3.gen7_dp.msg_type);
+           } else if (gen == 6) {
+               format (file, " (");
+
+               err |= control (file, "DP rc message type",
+                               dp_rc_msg_type_gen6,
+                               inst->bits3.gen6_dp.msg_type, &space);
+
+               format (file, ", %d, %d, %d, %d)",
+                       inst->bits3.gen6_dp.binding_table_index,
+                       inst->bits3.gen6_dp.msg_control,
+                       inst->bits3.gen6_dp.msg_type,
+                       inst->bits3.gen6_dp.send_commit_msg);
+           } else {
+               format (file, " (%d, %d, %d, %d)",
+                       inst->bits3.dp_write.binding_table_index,
+                       (inst->bits3.dp_write.last_render_target << 3) |
+                       inst->bits3.dp_write.msg_control,
+                       inst->bits3.dp_write.msg_type,
+                       inst->bits3.dp_write.send_commit_msg);
+           }
+           break;
+
+       case BRW_SFID_URB:
+           if (gen >= 5) {
+               format (file, " %d", inst->bits3.urb_gen5.offset);
+           } else {
+               format (file, " %d", inst->bits3.urb.offset);
+           }
+
+           space = 1;
+           if (gen >= 5) {
+               err |= control (file, "urb opcode", urb_opcode,
+                               inst->bits3.urb_gen5.opcode, &space);
+           }
+           err |= control (file, "urb swizzle", urb_swizzle,
+                           inst->bits3.urb.swizzle_control, &space);
+           err |= control (file, "urb allocate", urb_allocate,
+                           inst->bits3.urb.allocate, &space);
+           err |= control (file, "urb used", urb_used,
+                           inst->bits3.urb.used, &space);
+           err |= control (file, "urb complete", urb_complete,
+                           inst->bits3.urb.complete, &space);
+           break;
+       case BRW_SFID_THREAD_SPAWNER:
+           break;
+       case GEN7_SFID_DATAPORT_DATA_CACHE:
+           format (file, " (%d, %d, %d)",
+                   inst->bits3.gen7_dp.binding_table_index,
+                   inst->bits3.gen7_dp.msg_control,
+                   inst->bits3.gen7_dp.msg_type);
+           break;
+
+
+       default:
+           format (file, "unsupported target %d", target);
+           break;
+       }
+       if (space)
+           string (file, " ");
+       if (gen >= 5) {
+          format (file, "mlen %d",
+                  inst->bits3.generic_gen5.msg_length);
+          format (file, " rlen %d",
+                  inst->bits3.generic_gen5.response_length);
+       } else {
+          format (file, "mlen %d",
+                  inst->bits3.generic.msg_length);
+          format (file, " rlen %d",
+                  inst->bits3.generic.response_length);
+       }
+    }
+    pad (file, 64);
+    if (inst->header.opcode != BRW_OPCODE_NOP) {
+       string (file, "{");
+       space = 1;
+       err |= control(file, "access mode", access_mode, inst->header.access_mode, &space);
+       if (gen >= 6)
+           err |= control (file, "write enable control", wectrl, inst->header.mask_control, &space);
+       else
+           err |= control (file, "mask control", mask_ctrl, inst->header.mask_control, &space);
+       err |= control (file, "dependency control", dep_ctrl, inst->header.dependency_control, &space);
+
+       if (gen >= 6)
+           err |= qtr_ctrl (file, inst);
+       else {
+           if (inst->header.compression_control == BRW_COMPRESSION_COMPRESSED &&
+               opcode[inst->header.opcode].ndst > 0 &&
+               inst->bits1.da1.dest_reg_file == BRW_MESSAGE_REGISTER_FILE &&
+               inst->bits1.da1.dest_reg_nr & (1 << 7)) {
+               format (file, " compr4");
+           } else {
+               err |= control (file, "compression control", compr_ctrl,
+                               inst->header.compression_control, &space);
+           }
+       }
+
+       err |= control (file, "thread control", thread_ctrl, inst->header.thread_control, &space);
+       if (gen >= 6)
+           err |= control (file, "acc write control", accwr, inst->header.acc_wr_control, &space);
+       if (inst->header.opcode == BRW_OPCODE_SEND ||
+           inst->header.opcode == BRW_OPCODE_SENDC)
+           err |= control (file, "end of thread", end_of_thread,
+                           inst->bits3.generic.end_of_thread, &space);
+       if (space)
+           string (file, " ");
+       string (file, "}");
+    }
+    string (file, ";");
+    newline (file);
+    return err;
+}
diff --git a/src/gallium/drivers/ilo/shader/toy_compiler_reg.h b/src/gallium/drivers/ilo/shader/toy_compiler_reg.h
new file mode 100644 (file)
index 0000000..8c11b3a
--- /dev/null
@@ -0,0 +1,800 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef TOY_REG_H
+#define TOY_REG_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h" /* for assert() */
+#include "util/u_math.h" /* for union fi */
+
+/* a toy reg is 256-bit wide */
+#define TOY_REG_WIDTH        32
+
+/**
+ * Register files.
+ */
+enum toy_file {
+   /* virtual register file */
+   TOY_FILE_VRF,
+
+   TOY_FILE_ARF,
+   TOY_FILE_GRF,
+   TOY_FILE_MRF,
+   TOY_FILE_IMM,
+
+   TOY_FILE_COUNT,
+};
+
+/**
+ * Register types.
+ */
+enum toy_type {
+   TOY_TYPE_F,
+   TOY_TYPE_D,
+   TOY_TYPE_UD,
+   TOY_TYPE_W,
+   TOY_TYPE_UW,
+   TOY_TYPE_V, /* only valid for immediates */
+
+   TOY_TYPE_COUNT,
+};
+
+/**
+ * Register rectangles.  The three numbers stand for vertical stride, width,
+ * and horizontal stride respectively.
+ */
+enum toy_rect {
+   TOY_RECT_LINEAR,
+   TOY_RECT_041,
+   TOY_RECT_010,
+   TOY_RECT_220,
+   TOY_RECT_440,
+   TOY_RECT_240,
+
+   TOY_RECT_COUNT,
+};
+
+/**
+ * Source swizzles.  They are compatible with TGSI_SWIZZLE_x and hardware
+ * values.
+ */
+enum toy_swizzle {
+   TOY_SWIZZLE_X = 0,
+   TOY_SWIZZLE_Y = 1,
+   TOY_SWIZZLE_Z = 2,
+   TOY_SWIZZLE_W = 3,
+};
+
+/**
+ * Destination writemasks.  They are compatible with TGSI_WRITEMASK_x and
+ * hardware values.
+ */
+enum toy_writemask {
+   TOY_WRITEMASK_X    = (1 << TOY_SWIZZLE_X),
+   TOY_WRITEMASK_Y    = (1 << TOY_SWIZZLE_Y),
+   TOY_WRITEMASK_Z    = (1 << TOY_SWIZZLE_Z),
+   TOY_WRITEMASK_W    = (1 << TOY_SWIZZLE_W),
+   TOY_WRITEMASK_XY   = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y),
+   TOY_WRITEMASK_XZ   = (TOY_WRITEMASK_X | TOY_WRITEMASK_Z),
+   TOY_WRITEMASK_XW   = (TOY_WRITEMASK_X | TOY_WRITEMASK_W),
+   TOY_WRITEMASK_YZ   = (TOY_WRITEMASK_Y | TOY_WRITEMASK_Z),
+   TOY_WRITEMASK_YW   = (TOY_WRITEMASK_Y | TOY_WRITEMASK_W),
+   TOY_WRITEMASK_ZW   = (TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
+   TOY_WRITEMASK_XYZ  = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | TOY_WRITEMASK_Z),
+   TOY_WRITEMASK_XYW  = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y | TOY_WRITEMASK_W),
+   TOY_WRITEMASK_XZW  = (TOY_WRITEMASK_X | TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
+   TOY_WRITEMASK_YZW  = (TOY_WRITEMASK_Y | TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
+   TOY_WRITEMASK_XYZW = (TOY_WRITEMASK_X | TOY_WRITEMASK_Y |
+                         TOY_WRITEMASK_Z | TOY_WRITEMASK_W),
+};
+
+/**
+ * Destination operand.
+ */
+struct toy_dst {
+   unsigned file:3;              /* TOY_FILE_x */
+   unsigned type:3;              /* TOY_TYPE_x */
+   unsigned rect:3;              /* TOY_RECT_x */
+   unsigned indirect:1;          /* true or false */
+   unsigned indirect_subreg:6;   /* which subreg of a0? */
+
+   unsigned writemask:4;         /* TOY_WRITEMASK_x */
+   unsigned pad:12;
+
+   uint32_t val32;
+};
+
+/**
+ * Source operand.
+ */
+struct toy_src {
+   unsigned file:3;              /* TOY_FILE_x */
+   unsigned type:3;              /* TOY_TYPE_x */
+   unsigned rect:3;              /* TOY_RECT_x */
+   unsigned indirect:1;          /* true or false */
+   unsigned indirect_subreg:6;   /* which subreg of a0? */
+
+   unsigned swizzle_x:2;         /* TOY_SWIZZLE_x */
+   unsigned swizzle_y:2;         /* TOY_SWIZZLE_x */
+   unsigned swizzle_z:2;         /* TOY_SWIZZLE_x */
+   unsigned swizzle_w:2;         /* TOY_SWIZZLE_x */
+   unsigned absolute:1;          /* true or false */
+   unsigned negate:1;            /* true or false */
+   unsigned pad:6;
+
+   uint32_t val32;
+};
+
+/**
+ * Return true if the file is virtual.
+ */
+static inline bool
+toy_file_is_virtual(enum toy_file file)
+{
+   return (file == TOY_FILE_VRF);
+}
+
+/**
+ * Return true if the file is a hardware one.
+ */
+static inline bool
+toy_file_is_hw(enum toy_file file)
+{
+   return !toy_file_is_virtual(file);
+}
+
+/**
+ * Return the size of the file.
+ */
+static inline uint32_t
+toy_file_size(enum toy_file file)
+{
+   switch (file) {
+   case TOY_FILE_GRF:
+      return 256 * TOY_REG_WIDTH;
+   case TOY_FILE_MRF:
+      /* there is no MRF on GEN7+ */
+      return 256 * TOY_REG_WIDTH;
+   default:
+      assert(!"invalid toy file");
+      return 0;
+   }
+}
+
+/**
+ * Return the size of the type.
+ */
+static inline int
+toy_type_size(enum toy_type type)
+{
+   switch (type) {
+   case TOY_TYPE_F:
+   case TOY_TYPE_D:
+   case TOY_TYPE_UD:
+      return 4;
+   case TOY_TYPE_W:
+   case TOY_TYPE_UW:
+      return 2;
+   case TOY_TYPE_V:
+   default:
+      assert(!"invalid toy type");
+      return 0;
+   }
+}
+
+/**
+ * Return true if the destination operand is null.
+ */
+static inline bool
+tdst_is_null(struct toy_dst dst)
+{
+   /* BRW_ARF_NULL happens to be 0 */
+   return (dst.file == TOY_FILE_ARF && dst.val32 == 0);
+}
+
+/**
+ * Validate the destination operand.
+ */
+static inline struct toy_dst
+tdst_validate(struct toy_dst dst)
+{
+   switch (dst.file) {
+   case TOY_FILE_VRF:
+   case TOY_FILE_ARF:
+   case TOY_FILE_MRF:
+      assert(!dst.indirect);
+      if (dst.file == TOY_FILE_MRF)
+         assert(dst.val32 < toy_file_size(dst.file));
+      break;
+   case TOY_FILE_GRF:
+      if (!dst.indirect)
+         assert(dst.val32 < toy_file_size(dst.file));
+      break;
+   case TOY_FILE_IMM:
+      /* yes, dst can be IMM of type W (for IF/ELSE/ENDIF/WHILE) */
+      assert(!dst.indirect);
+      assert(dst.type == TOY_TYPE_W);
+      break;
+   default:
+      assert(!"invalid dst file");
+      break;
+   }
+
+   switch (dst.type) {
+   case TOY_TYPE_V:
+      assert(!"invalid dst type");
+      break;
+   default:
+      break;
+   }
+
+   assert(dst.rect == TOY_RECT_LINEAR);
+   if (dst.file != TOY_FILE_IMM)
+      assert(dst.val32 % toy_type_size(dst.type) == 0);
+
+   assert(dst.writemask <= TOY_WRITEMASK_XYZW);
+
+   return dst;
+}
+
+/**
+ * Change the type of the destination operand.
+ */
+static inline struct toy_dst
+tdst_type(struct toy_dst dst, enum toy_type type)
+{
+   dst.type = type;
+   return tdst_validate(dst);
+}
+
+/**
+ * Change the type of the destination operand to TOY_TYPE_D.
+ */
+static inline struct toy_dst
+tdst_d(struct toy_dst dst)
+{
+   return tdst_type(dst, TOY_TYPE_D);
+}
+
+/**
+ * Change the type of the destination operand to TOY_TYPE_UD.
+ */
+static inline struct toy_dst
+tdst_ud(struct toy_dst dst)
+{
+   return tdst_type(dst, TOY_TYPE_UD);
+}
+
+/**
+ * Change the type of the destination operand to TOY_TYPE_W.
+ */
+static inline struct toy_dst
+tdst_w(struct toy_dst dst)
+{
+   return tdst_type(dst, TOY_TYPE_W);
+}
+
+/**
+ * Change the type of the destination operand to TOY_TYPE_UW.
+ */
+static inline struct toy_dst
+tdst_uw(struct toy_dst dst)
+{
+   return tdst_type(dst, TOY_TYPE_UW);
+}
+
+/**
+ * Change the rectangle of the destination operand.
+ */
+static inline struct toy_dst
+tdst_rect(struct toy_dst dst, enum toy_rect rect)
+{
+   dst.rect = rect;
+   return tdst_validate(dst);
+}
+
+/**
+ * Apply writemask to the destination operand.  Note that the current
+ * writemask is honored.
+ */
+static inline struct toy_dst
+tdst_writemask(struct toy_dst dst, enum toy_writemask writemask)
+{
+   dst.writemask &= writemask;
+   return tdst_validate(dst);
+}
+
+/**
+ * Offset the destination operand.
+ */
+static inline struct toy_dst
+tdst_offset(struct toy_dst dst, int reg, int subreg)
+{
+   dst.val32 += reg * TOY_REG_WIDTH + subreg * toy_type_size(dst.type);
+   return tdst_validate(dst);
+}
+
+/**
+ * Construct a destination operand.
+ */
+static inline struct toy_dst
+tdst_full(enum toy_file file, enum toy_type type, enum toy_rect rect,
+          bool indirect, unsigned indirect_subreg,
+          enum toy_writemask writemask, uint32_t val32)
+{
+   struct toy_dst dst;
+
+   dst.file = file;
+   dst.type = type;
+   dst.rect = rect;
+   dst.indirect = indirect;
+   dst.indirect_subreg = indirect_subreg;
+   dst.writemask = writemask;
+   dst.pad = 0;
+
+   dst.val32 = val32;
+
+   return tdst_validate(dst);
+}
+
+/**
+ * Construct a null destination operand.
+ */
+static inline struct toy_dst
+tdst_null(void)
+{
+   static const struct toy_dst null_dst = {
+      .file = TOY_FILE_ARF,
+      .type = TOY_TYPE_F,
+      .rect = TOY_RECT_LINEAR,
+      .indirect = false,
+      .indirect_subreg = 0,
+      .writemask = TOY_WRITEMASK_XYZW,
+      .pad = 0,
+      .val32 = 0,
+   };
+
+   return null_dst;
+}
+
+/**
+ * Construct a destination operand from a source operand.
+ */
+static inline struct toy_dst
+tdst_from(struct toy_src src)
+{
+   const enum toy_writemask writemask =
+      (1 << src.swizzle_x) |
+      (1 << src.swizzle_y) |
+      (1 << src.swizzle_z) |
+      (1 << src.swizzle_w);
+
+   return tdst_full(src.file, src.type, src.rect,
+         src.indirect, src.indirect_subreg, writemask, src.val32);
+}
+
+/**
+ * Construct a destination operand, assuming the type is TOY_TYPE_F, the
+ * rectangle is TOY_RECT_LINEAR, and the writemask is TOY_WRITEMASK_XYZW.
+ */
+static inline struct toy_dst
+tdst(enum toy_file file, unsigned reg, unsigned subreg_in_bytes)
+{
+   const enum toy_type type = TOY_TYPE_F;
+   const enum toy_rect rect = TOY_RECT_LINEAR;
+   const uint32_t val32 = reg * TOY_REG_WIDTH + subreg_in_bytes;
+
+   return tdst_full(file, type, rect,
+         false, 0, TOY_WRITEMASK_XYZW, val32);
+}
+
+/**
+ * Construct an immediate destination operand of type TOY_TYPE_W.
+ */
+static inline struct toy_dst
+tdst_imm_w(int16_t w)
+{
+   const union fi fi = { .i = w };
+
+   return tdst_full(TOY_FILE_IMM, TOY_TYPE_W, TOY_RECT_LINEAR,
+         false, 0, TOY_WRITEMASK_XYZW, fi.ui);
+}
+
+/**
+ * Return true if the source operand is null.
+ */
+static inline bool
+tsrc_is_null(struct toy_src src)
+{
+   /* BRW_ARF_NULL happens to be 0 */
+   return (src.file == TOY_FILE_ARF && src.val32 == 0);
+}
+
+/**
+ * Return true if the source operand is swizzled.
+ */
+static inline bool
+tsrc_is_swizzled(struct toy_src src)
+{
+   return (src.swizzle_x != TOY_SWIZZLE_X ||
+           src.swizzle_y != TOY_SWIZZLE_Y ||
+           src.swizzle_z != TOY_SWIZZLE_Z ||
+           src.swizzle_w != TOY_SWIZZLE_W);
+}
+
+/**
+ * Return true if the source operand is swizzled to the same channel.
+ */
+static inline bool
+tsrc_is_swizzle1(struct toy_src src)
+{
+   return (src.swizzle_x == src.swizzle_y &&
+           src.swizzle_x == src.swizzle_z &&
+           src.swizzle_x == src.swizzle_w);
+}
+
+/**
+ * Validate the source operand.
+ */
+static inline struct toy_src
+tsrc_validate(struct toy_src src)
+{
+   switch (src.file) {
+   case TOY_FILE_VRF:
+   case TOY_FILE_ARF:
+   case TOY_FILE_MRF:
+      assert(!src.indirect);
+      if (src.file == TOY_FILE_MRF)
+         assert(src.val32 < toy_file_size(src.file));
+      break;
+   case TOY_FILE_GRF:
+      if (!src.indirect)
+         assert(src.val32 < toy_file_size(src.file));
+      break;
+   case TOY_FILE_IMM:
+      assert(!src.indirect);
+      break;
+   default:
+      assert(!"invalid src file");
+      break;
+   }
+
+   switch (src.type) {
+   case TOY_TYPE_V:
+      assert(src.file == TOY_FILE_IMM);
+      break;
+   default:
+      break;
+   }
+
+   if (src.file != TOY_FILE_IMM)
+      assert(src.val32 % toy_type_size(src.type) == 0);
+
+   assert(src.swizzle_x < 4 && src.swizzle_y < 4 &&
+          src.swizzle_z < 4 && src.swizzle_w < 4);
+
+   return src;
+}
+
+/**
+ * Change the type of the source operand.
+ */
+static inline struct toy_src
+tsrc_type(struct toy_src src, enum toy_type type)
+{
+   src.type = type;
+   return tsrc_validate(src);
+}
+
+/**
+ * Change the type of the source operand to TOY_TYPE_D.
+ */
+static inline struct toy_src
+tsrc_d(struct toy_src src)
+{
+   return tsrc_type(src, TOY_TYPE_D);
+}
+
+/**
+ * Change the type of the source operand to TOY_TYPE_UD.
+ */
+static inline struct toy_src
+tsrc_ud(struct toy_src src)
+{
+   return tsrc_type(src, TOY_TYPE_UD);
+}
+
+/**
+ * Change the type of the source operand to TOY_TYPE_W.
+ */
+static inline struct toy_src
+tsrc_w(struct toy_src src)
+{
+   return tsrc_type(src, TOY_TYPE_W);
+}
+
+/**
+ * Change the type of the source operand to TOY_TYPE_UW.
+ */
+static inline struct toy_src
+tsrc_uw(struct toy_src src)
+{
+   return tsrc_type(src, TOY_TYPE_UW);
+}
+
+/**
+ * Change the rectangle of the source operand.
+ */
+static inline struct toy_src
+tsrc_rect(struct toy_src src, enum toy_rect rect)
+{
+   src.rect = rect;
+   return tsrc_validate(src);
+}
+
+/**
+ * Swizzle the source operand.  Note that the current swizzles are honored.
+ */
+static inline struct toy_src
+tsrc_swizzle(struct toy_src src,
+             enum toy_swizzle swizzle_x, enum toy_swizzle swizzle_y,
+             enum toy_swizzle swizzle_z, enum toy_swizzle swizzle_w)
+{
+   const enum toy_swizzle current[4] = {
+      src.swizzle_x, src.swizzle_y,
+      src.swizzle_z, src.swizzle_w,
+   };
+
+   src.swizzle_x = current[swizzle_x];
+   src.swizzle_y = current[swizzle_y];
+   src.swizzle_z = current[swizzle_z];
+   src.swizzle_w = current[swizzle_w];
+
+   return tsrc_validate(src);
+}
+
+/**
+ * Swizzle the source operand to the same channel.  Note that the current
+ * swizzles are honored.
+ */
+static inline struct toy_src
+tsrc_swizzle1(struct toy_src src, enum toy_swizzle swizzle)
+{
+   return tsrc_swizzle(src, swizzle, swizzle, swizzle, swizzle);
+}
+
+/**
+ * Set absolute and unset negate of the source operand.
+ */
+static inline struct toy_src
+tsrc_absolute(struct toy_src src)
+{
+   src.absolute = true;
+   src.negate = false;
+   return tsrc_validate(src);
+}
+
+/**
+ * Negate the source operand.
+ */
+static inline struct toy_src
+tsrc_negate(struct toy_src src)
+{
+   src.negate = !src.negate;
+   return tsrc_validate(src);
+}
+
+/**
+ * Offset the source operand.
+ */
+static inline struct toy_src
+tsrc_offset(struct toy_src src, int reg, int subreg)
+{
+   src.val32 += reg * TOY_REG_WIDTH + subreg * toy_type_size(src.type);
+   return tsrc_validate(src);
+}
+
+/**
+ * Construct a source operand.
+ */
+static inline struct toy_src
+tsrc_full(enum toy_file file, enum toy_type type,
+          enum toy_rect rect, bool indirect, unsigned indirect_subreg,
+          enum toy_swizzle swizzle_x, enum toy_swizzle swizzle_y,
+          enum toy_swizzle swizzle_z, enum toy_swizzle swizzle_w,
+          bool absolute, bool negate,
+          uint32_t val32)
+{
+   struct toy_src src;
+
+   src.file = file;
+   src.type = type;
+   src.rect = rect;
+   src.indirect = indirect;
+   src.indirect_subreg = indirect_subreg;
+   src.swizzle_x = swizzle_x;
+   src.swizzle_y = swizzle_y;
+   src.swizzle_z = swizzle_z;
+   src.swizzle_w = swizzle_w;
+   src.absolute = absolute;
+   src.negate = negate;
+   src.pad = 0;
+
+   src.val32 = val32;
+
+   return tsrc_validate(src);
+}
+
+/**
+ * Construct a null source operand.
+ */
+static inline struct toy_src
+tsrc_null(void)
+{
+   static const struct toy_src null_src = {
+      .file = TOY_FILE_ARF,
+      .type = TOY_TYPE_F,
+      .rect = TOY_RECT_LINEAR,
+      .indirect = false,
+      .indirect_subreg = 0,
+      .swizzle_x = TOY_SWIZZLE_X,
+      .swizzle_y = TOY_SWIZZLE_Y,
+      .swizzle_z = TOY_SWIZZLE_Z,
+      .swizzle_w = TOY_SWIZZLE_W,
+      .absolute = false,
+      .negate = false,
+      .pad = 0,
+      .val32 = 0,
+   };
+
+   return null_src;
+}
+
+/**
+ * Construct a source operand from a destination operand.
+ */
+static inline struct toy_src
+tsrc_from(struct toy_dst dst)
+{
+   enum toy_swizzle swizzle[4];
+
+   if (dst.writemask == TOY_WRITEMASK_XYZW) {
+      swizzle[0] = TOY_SWIZZLE_X;
+      swizzle[1] = TOY_SWIZZLE_Y;
+      swizzle[2] = TOY_SWIZZLE_Z;
+      swizzle[3] = TOY_SWIZZLE_W;
+   }
+   else {
+      const enum toy_swizzle first =
+         (dst.writemask & TOY_WRITEMASK_X) ? TOY_SWIZZLE_X :
+         (dst.writemask & TOY_WRITEMASK_Y) ? TOY_SWIZZLE_Y :
+         (dst.writemask & TOY_WRITEMASK_Z) ? TOY_SWIZZLE_Z :
+         (dst.writemask & TOY_WRITEMASK_W) ? TOY_SWIZZLE_W :
+         TOY_SWIZZLE_X;
+
+      swizzle[0] = (dst.writemask & TOY_WRITEMASK_X) ? TOY_SWIZZLE_X : first;
+      swizzle[1] = (dst.writemask & TOY_WRITEMASK_Y) ? TOY_SWIZZLE_Y : first;
+      swizzle[2] = (dst.writemask & TOY_WRITEMASK_Z) ? TOY_SWIZZLE_Z : first;
+      swizzle[3] = (dst.writemask & TOY_WRITEMASK_W) ? TOY_SWIZZLE_W : first;
+   }
+
+   return tsrc_full(dst.file, dst.type, dst.rect,
+                    dst.indirect, dst.indirect_subreg,
+                    swizzle[0], swizzle[1], swizzle[2], swizzle[3],
+                    false, false, dst.val32);
+}
+
+/**
+ * Construct a source operand, assuming the type is TOY_TYPE_F, the
+ * rectangle is TOY_RECT_LINEAR, and no swizzles/absolute/negate.
+ */
+static inline struct toy_src
+tsrc(enum toy_file file, unsigned reg, unsigned subreg_in_bytes)
+{
+   const enum toy_type type = TOY_TYPE_F;
+   const enum toy_rect rect = TOY_RECT_LINEAR;
+   const uint32_t val32 = reg * TOY_REG_WIDTH + subreg_in_bytes;
+
+   return tsrc_full(file, type, rect, false, 0,
+                    TOY_SWIZZLE_X, TOY_SWIZZLE_Y,
+                    TOY_SWIZZLE_Z, TOY_SWIZZLE_W,
+                    false, false, val32);
+}
+
+/**
+ * Construct an immediate source operand.
+ */
+static inline struct toy_src
+tsrc_imm(enum toy_type type, uint32_t val32)
+{
+   return tsrc_full(TOY_FILE_IMM, type, TOY_RECT_LINEAR, false, 0,
+                    TOY_SWIZZLE_X, TOY_SWIZZLE_Y,
+                    TOY_SWIZZLE_Z, TOY_SWIZZLE_W,
+                    false, false, val32);
+}
+
+/**
+ * Construct an immediate source operand of type TOY_TYPE_F.
+ */
+static inline struct toy_src
+tsrc_imm_f(float f)
+{
+   const union fi fi = { .f = f };
+   return tsrc_imm(TOY_TYPE_F, fi.ui);
+}
+
+/**
+ * Construct an immediate source operand of type TOY_TYPE_D.
+ */
+static inline struct toy_src
+tsrc_imm_d(int32_t d)
+{
+   const union fi fi = { .i = d };
+   return tsrc_imm(TOY_TYPE_D, fi.ui);
+}
+
+/**
+ * Construct an immediate source operand of type TOY_TYPE_UD.
+ */
+static inline struct toy_src
+tsrc_imm_ud(uint32_t ud)
+{
+   const union fi fi = { .ui = ud };
+   return tsrc_imm(TOY_TYPE_UD, fi.ui);
+}
+
+/**
+ * Construct an immediate source operand of type TOY_TYPE_W.
+ */
+static inline struct toy_src
+tsrc_imm_w(int16_t w)
+{
+   const union fi fi = { .i = w };
+   return tsrc_imm(TOY_TYPE_W, fi.ui);
+}
+
+/**
+ * Construct an immediate source operand of type TOY_TYPE_UW.
+ */
+static inline struct toy_src
+tsrc_imm_uw(uint16_t uw)
+{
+   const union fi fi = { .ui = uw };
+   return tsrc_imm(TOY_TYPE_UW, fi.ui);
+}
+
+/**
+ * Construct an immediate source operand of type TOY_TYPE_V.
+ */
+static inline struct toy_src
+tsrc_imm_v(uint32_t v)
+{
+   return tsrc_imm(TOY_TYPE_V, v);
+}
+
+#endif /* TOY_REG_H */
diff --git a/src/gallium/drivers/ilo/shader/toy_helpers.h b/src/gallium/drivers/ilo/shader/toy_helpers.h
new file mode 100644 (file)
index 0000000..dca9fd7
--- /dev/null
@@ -0,0 +1,289 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef TOY_HELPERS_H
+#define TOY_HELPERS_H
+
+#include "toy_compiler.h"
+
+/**
+ * Transpose a dst operand.
+ *
+ * Instead of processing a single vertex with each of its attributes in one
+ * register, such as
+ *
+ *   r0 = [x0, y0, z0, w0]
+ *
+ * we want to process four vertices at a time
+ *
+ *   r0 = [x0, y0, z0, w0]
+ *   r1 = [x1, y1, z1, w1]
+ *   r2 = [x2, y2, z2, w2]
+ *   r3 = [x3, y3, z3, w3]
+ *
+ * but with the attribute data "transposed"
+ *
+ *   r0 = [x0, x1, x2, x3]
+ *   r1 = [y0, y1, y2, y3]
+ *   r2 = [z0, z1, z2, z3]
+ *   r3 = [w0, w1, w2, w3]
+ *
+ * This is also known as the SoA form.
+ */
+static inline void
+tdst_transpose(struct toy_dst dst, struct toy_dst *trans)
+{
+   int i;
+
+   switch (dst.file) {
+   case TOY_FILE_VRF:
+      assert(!dst.indirect);
+      for (i = 0; i < 4; i++) {
+         if (dst.writemask & (1 << i)) {
+            trans[i] = tdst_offset(dst, i, 0);
+            trans[i].writemask = TOY_WRITEMASK_XYZW;
+         }
+         else {
+            trans[i] = tdst_null();
+         }
+      }
+      break;
+   case TOY_FILE_ARF:
+      assert(tdst_is_null(dst));
+      for (i = 0; i < 4; i++)
+         trans[i] = dst;
+      break;
+   case TOY_FILE_GRF:
+   case TOY_FILE_MRF:
+   case TOY_FILE_IMM:
+   default:
+      assert(!"unexpected file in dst transposition");
+      for (i = 0; i < 4; i++)
+         trans[i] = tdst_null();
+      break;
+   }
+}
+
+/**
+ * Transpose a src operand.
+ */
+static inline void
+tsrc_transpose(struct toy_src src, struct toy_src *trans)
+{
+   const enum toy_swizzle swizzle[4] = {
+      src.swizzle_x, src.swizzle_y,
+      src.swizzle_z, src.swizzle_w,
+   };
+   int i;
+
+   switch (src.file) {
+   case TOY_FILE_VRF:
+      assert(!src.indirect);
+      for (i = 0; i < 4; i++) {
+         trans[i] = tsrc_offset(src, swizzle[i], 0);
+         trans[i].swizzle_x = TOY_SWIZZLE_X;
+         trans[i].swizzle_y = TOY_SWIZZLE_Y;
+         trans[i].swizzle_z = TOY_SWIZZLE_Z;
+         trans[i].swizzle_w = TOY_SWIZZLE_W;
+      }
+      break;
+   case TOY_FILE_ARF:
+      assert(tsrc_is_null(src));
+      /* fall through */
+   case TOY_FILE_IMM:
+      for (i = 0; i < 4; i++)
+         trans[i] = src;
+      break;
+   case TOY_FILE_GRF:
+   case TOY_FILE_MRF:
+   default:
+      assert(!"unexpected file in src transposition");
+      for (i = 0; i < 4; i++)
+         trans[i] = tsrc_null();
+      break;
+   }
+}
+
+static inline struct toy_src
+tsrc_imm_mdesc(const struct toy_compiler *tc,
+               bool eot,
+               unsigned message_length,
+               unsigned response_length,
+               bool header_present,
+               uint32_t function_control)
+{
+   uint32_t desc;
+
+   assert(message_length >= 1 && message_length <= 15);
+   assert(response_length >= 0 && response_length <= 16);
+   assert(function_control < 1 << 19);
+
+   desc = eot << 31 |
+          message_length << 25 |
+          response_length << 20 |
+          header_present << 19 |
+          function_control;
+
+   return tsrc_imm_ud(desc);
+}
+
+static inline struct toy_src
+tsrc_imm_mdesc_sampler(const struct toy_compiler *tc,
+                       unsigned message_length,
+                       unsigned response_length,
+                       bool header_present,
+                       unsigned simd_mode,
+                       unsigned message_type,
+                       unsigned sampler_index,
+                       unsigned binding_table_index)
+{
+   const bool eot = false;
+   uint32_t ctrl;
+
+   assert(simd_mode < 4);
+   assert(sampler_index < 16);
+   assert(binding_table_index < 256);
+
+   if (tc->gen >= ILO_GEN(7)) {
+      ctrl = simd_mode << 17 |
+             message_type << 12 |
+             sampler_index << 8 |
+             binding_table_index;
+   }
+   else {
+      ctrl = simd_mode << 16 |
+             message_type << 12 |
+             sampler_index << 8 |
+             binding_table_index;
+   }
+
+   return tsrc_imm_mdesc(tc, eot, message_length,
+         response_length, header_present, ctrl);
+}
+
+static inline struct toy_src
+tsrc_imm_mdesc_data_port(const struct toy_compiler *tc,
+                         bool eot,
+                         unsigned message_length,
+                         unsigned response_length,
+                         bool header_present,
+                         bool send_write_commit_message,
+                         unsigned message_type,
+                         unsigned message_specific_control,
+                         unsigned binding_table_index)
+{
+   uint32_t ctrl;
+
+   if (tc->gen >= ILO_GEN(7)) {
+      assert(!send_write_commit_message);
+      assert((message_specific_control & 0x3f00) == message_specific_control);
+
+      ctrl = message_type << 14 |
+             (message_specific_control & 0x3f00) |
+             binding_table_index;
+   }
+   else {
+      assert(!send_write_commit_message ||
+             message_type == GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE);
+      assert((message_specific_control & 0x1f00) == message_specific_control);
+
+      ctrl = send_write_commit_message << 17 |
+             message_type << 13 |
+             (message_specific_control & 0x1f00) |
+             binding_table_index;
+   }
+
+   return tsrc_imm_mdesc(tc, eot, message_length,
+         response_length, header_present, ctrl);
+}
+
+static inline struct toy_src
+tsrc_imm_mdesc_data_port_scratch(const struct toy_compiler *tc,
+                                 unsigned message_length,
+                                 unsigned response_length,
+                                 bool write_type,
+                                 bool dword_mode,
+                                 bool invalidate_after_read,
+                                 int num_registers,
+                                 int hword_offset)
+{
+   const bool eot = false;
+   const bool header_present = true;
+   uint32_t ctrl;
+
+   assert(tc->gen >= ILO_GEN(7));
+   assert(num_registers == 1 || num_registers == 2 || num_registers == 4);
+
+   ctrl = 1 << 18 |
+          write_type << 17 |
+          dword_mode << 16 |
+          invalidate_after_read << 15 |
+          (num_registers - 1) << 12 |
+          hword_offset;
+
+   return tsrc_imm_mdesc(tc, eot, message_length,
+         response_length, header_present, ctrl);
+}
+
+static inline struct toy_src
+tsrc_imm_mdesc_urb(const struct toy_compiler *tc,
+                   bool eot,
+                   unsigned message_length,
+                   unsigned response_length,
+                   bool complete,
+                   bool used,
+                   bool allocate,
+                   unsigned swizzle_control,
+                   unsigned global_offset,
+                   unsigned urb_opcode)
+{
+   const bool header_present = true;
+   uint32_t ctrl;
+
+   if (tc->gen >= ILO_GEN(7)) {
+      const bool per_slot_offset = false;
+
+      ctrl = per_slot_offset << 16 |
+             complete << 15 |
+             swizzle_control << 14 |
+             global_offset << 3 |
+             urb_opcode;
+   }
+   else {
+      ctrl = complete << 15 |
+             used << 14 |
+             allocate << 13 |
+             swizzle_control << 10 |
+             global_offset << 4 |
+             urb_opcode;
+   }
+
+   return tsrc_imm_mdesc(tc, eot, message_length,
+         response_length, header_present, ctrl);
+}
+
+#endif /* TOY_HELPERS_H */
diff --git a/src/gallium/drivers/ilo/shader/toy_legalize.c b/src/gallium/drivers/ilo/shader/toy_legalize.c
new file mode 100644 (file)
index 0000000..04f2a25
--- /dev/null
@@ -0,0 +1,632 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "pipe/p_shader_tokens.h"
+#include "toy_compiler.h"
+#include "toy_tgsi.h"
+#include "toy_helpers.h"
+#include "toy_legalize.h"
+
+/**
+ * Lower an instruction to BRW_OPCODE_SEND(C).
+ */
+void
+toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst,
+                           bool sendc, unsigned sfid)
+{
+   assert(inst->opcode >= 128);
+
+   inst->opcode = (sendc) ? BRW_OPCODE_SENDC : BRW_OPCODE_SEND;
+
+   /* thread control is reserved */
+   assert(inst->thread_ctrl == 0);
+
+   assert(inst->cond_modifier == BRW_CONDITIONAL_NONE);
+   inst->cond_modifier = sfid;
+}
+
+static int
+math_op_to_func(unsigned opcode)
+{
+   switch (opcode) {
+   case TOY_OPCODE_INV:    return BRW_MATH_FUNCTION_INV;
+   case TOY_OPCODE_LOG:    return BRW_MATH_FUNCTION_LOG;
+   case TOY_OPCODE_EXP:    return BRW_MATH_FUNCTION_EXP;
+   case TOY_OPCODE_SQRT:   return BRW_MATH_FUNCTION_SQRT;
+   case TOY_OPCODE_RSQ:    return BRW_MATH_FUNCTION_RSQ;
+   case TOY_OPCODE_SIN:    return BRW_MATH_FUNCTION_SIN;
+   case TOY_OPCODE_COS:    return BRW_MATH_FUNCTION_COS;
+   case TOY_OPCODE_FDIV:   return BRW_MATH_FUNCTION_FDIV;
+   case TOY_OPCODE_POW:    return BRW_MATH_FUNCTION_POW;
+   case TOY_OPCODE_INT_DIV_QUOTIENT:   return BRW_MATH_FUNCTION_INT_DIV_QUOTIENT;
+   case TOY_OPCODE_INT_DIV_REMAINDER:  return BRW_MATH_FUNCTION_INT_DIV_REMAINDER;
+   default:
+       assert(!"unknown math opcode");
+       return -1;
+   }
+}
+
+/**
+ * Lower virtual math opcodes to BRW_OPCODE_MATH.
+ */
+void
+toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst)
+{
+   struct toy_dst tmp;
+   int i;
+
+   /* see commit 250770b74d33bb8625c780a74a89477af033d13a */
+   for (i = 0; i < Elements(inst->src); i++) {
+      if (tsrc_is_null(inst->src[i]))
+         break;
+
+      /* no swizzling in align1 */
+      /* XXX how about source modifiers? */
+      if (toy_file_is_virtual(inst->src[i].file) &&
+          !tsrc_is_swizzled(inst->src[i]) &&
+          !inst->src[i].absolute &&
+          !inst->src[i].negate)
+         continue;
+
+      tmp = tdst_type(tc_alloc_tmp(tc), inst->src[i].type);
+      tc_MOV(tc, tmp, inst->src[i]);
+      inst->src[i] = tsrc_from(tmp);
+   }
+
+   /* FC[0:3] */
+   assert(inst->cond_modifier == BRW_CONDITIONAL_NONE);
+   inst->cond_modifier = math_op_to_func(inst->opcode);
+   /* FC[4:5] */
+   assert(inst->thread_ctrl == 0);
+   inst->thread_ctrl = 0;
+
+   inst->opcode = BRW_OPCODE_MATH;
+   tc_move_inst(tc, inst);
+
+   /* no writemask in align1 */
+   if (inst->dst.writemask != TOY_WRITEMASK_XYZW) {
+      struct toy_dst dst = inst->dst;
+      struct toy_inst *inst2;
+
+      tmp = tc_alloc_tmp(tc);
+      tmp.type = inst->dst.type;
+      inst->dst = tmp;
+
+      inst2 = tc_MOV(tc, dst, tsrc_from(tmp));
+      inst2->pred_ctrl = inst->pred_ctrl;
+   }
+}
+
+static uint32_t
+absolute_imm(uint32_t imm32, enum toy_type type)
+{
+   union fi val = { .ui = imm32 };
+
+   switch (type) {
+   case TOY_TYPE_F:
+      val.f = fabs(val.f);
+      break;
+   case TOY_TYPE_D:
+      if (val.i < 0)
+         val.i = -val.i;
+      break;
+   case TOY_TYPE_W:
+      if ((int16_t) (val.ui & 0xffff) < 0)
+         val.i = -((int16_t) (val.ui & 0xffff));
+      break;
+   case TOY_TYPE_V:
+      assert(!"cannot take absoulte of immediates of type V");
+      break;
+   default:
+      break;
+   }
+
+   return val.ui;
+}
+
+static uint32_t
+negate_imm(uint32_t imm32, enum toy_type type)
+{
+   union fi val = { .ui = imm32 };
+
+   switch (type) {
+   case TOY_TYPE_F:
+      val.f = -val.f;
+      break;
+   case TOY_TYPE_D:
+   case TOY_TYPE_UD:
+      val.i = -val.i;
+      break;
+   case TOY_TYPE_W:
+   case TOY_TYPE_UW:
+      val.i = -((int16_t) (val.ui & 0xffff));
+      break;
+   default:
+      assert(!"negate immediate of unknown type");
+      break;
+   }
+
+   return val.ui;
+}
+
+static void
+validate_imm(struct toy_compiler *tc, struct toy_inst *inst)
+{
+   bool move_inst = false;
+   int i;
+
+   for (i = 0; i < Elements(inst->src); i++) {
+      struct toy_dst tmp;
+
+      if (tsrc_is_null(inst->src[i]))
+         break;
+
+      if (inst->src[i].file != TOY_FILE_IMM)
+         continue;
+
+      if (inst->src[i].absolute) {
+         inst->src[i].val32 =
+            absolute_imm(inst->src[i].val32, inst->src[i].type);
+         inst->src[i].absolute = false;
+      }
+
+      if (inst->src[i].negate) {
+         inst->src[i].val32 =
+            negate_imm(inst->src[i].val32, inst->src[i].type);
+         inst->src[i].negate = false;
+      }
+
+      /* this is the last operand */
+      if (i + 1 == Elements(inst->src) || tsrc_is_null(inst->src[i + 1]))
+         break;
+
+      /* need to use a temp if this imm is not the last operand */
+      /* TODO we should simply swap the operands if the op is commutative */
+      tmp = tc_alloc_tmp(tc);
+      tmp = tdst_type(tmp, inst->src[i].type);
+      tc_MOV(tc, tmp, inst->src[i]);
+      inst->src[i] = tsrc_from(tmp);
+
+      move_inst = true;
+   }
+
+   if (move_inst)
+      tc_move_inst(tc, inst);
+}
+
+static void
+lower_opcode_mul(struct toy_compiler *tc, struct toy_inst *inst)
+{
+   const enum toy_type inst_type = inst->dst.type;
+   const struct toy_dst acc0 =
+      tdst_type(tdst(TOY_FILE_ARF, BRW_ARF_ACCUMULATOR, 0), inst_type);
+   struct toy_inst *inst2;
+
+   /* only need to take care of integer multiplications */
+   if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D)
+      return;
+
+   /* acc0 = (src0 & 0x0000ffff) * src1 */
+   tc_MUL(tc, acc0, inst->src[0], inst->src[1]);
+
+   /* acc0 = (src0 & 0xffff0000) * src1 + acc0 */
+   inst2 = tc_add2(tc, BRW_OPCODE_MACH, tdst_type(tdst_null(), inst_type),
+         inst->src[0], inst->src[1]);
+   inst2->acc_wr_ctrl = true;
+
+   /* dst = acc0 & 0xffffffff */
+   tc_MOV(tc, inst->dst, tsrc_from(acc0));
+
+   tc_discard_inst(tc, inst);
+}
+
+static void
+lower_opcode_mac(struct toy_compiler *tc, struct toy_inst *inst)
+{
+   const enum toy_type inst_type = inst->dst.type;
+
+   if (inst_type != TOY_TYPE_UD && inst_type != TOY_TYPE_D) {
+      const struct toy_dst acc0 = tdst(TOY_FILE_ARF, BRW_ARF_ACCUMULATOR, 0);
+
+      tc_MOV(tc, acc0, inst->src[2]);
+      inst->src[2] = tsrc_null();
+      tc_move_inst(tc, inst);
+   }
+   else {
+      struct toy_dst tmp = tdst_type(tc_alloc_tmp(tc), inst_type);
+      struct toy_inst *inst2;
+
+      inst2 = tc_MUL(tc, tmp, inst->src[0], inst->src[1]);
+      lower_opcode_mul(tc, inst2);
+
+      tc_ADD(tc, inst->dst, tsrc_from(tmp), inst->src[2]);
+
+      tc_discard_inst(tc, inst);
+   }
+}
+
+/**
+ * Legalize the instructions for register allocation.
+ */
+void
+toy_compiler_legalize_for_ra(struct toy_compiler *tc)
+{
+   struct toy_inst *inst;
+
+   tc_head(tc);
+   while ((inst = tc_next(tc)) != NULL) {
+      switch (inst->opcode) {
+      case BRW_OPCODE_MAC:
+         lower_opcode_mac(tc, inst);
+         break;
+      case BRW_OPCODE_MAD:
+         /* TODO operands must be floats */
+         break;
+      case BRW_OPCODE_MUL:
+         lower_opcode_mul(tc, inst);
+         break;
+      default:
+         if (inst->opcode > TOY_OPCODE_LAST_HW)
+            tc_fail(tc, "internal opcodes not lowered");
+      }
+   }
+
+   /* loop again as the previous pass may add new instructions */
+   tc_head(tc);
+   while ((inst = tc_next(tc)) != NULL) {
+      validate_imm(tc, inst);
+   }
+}
+
+static void
+patch_while_jip(struct toy_compiler *tc, struct toy_inst *inst)
+{
+   struct toy_inst *inst2;
+   int nest_level, dist;
+
+   nest_level = 0;
+   dist = -1;
+
+   /* search backward */
+   LIST_FOR_EACH_ENTRY_FROM_REV(inst2, inst->list.prev,
+         &tc->instructions, list) {
+      if (inst2->marker) {
+         if (inst2->opcode == BRW_OPCODE_DO) {
+            if (nest_level) {
+               nest_level--;
+            }
+            else {
+               /* the following instruction */
+               dist++;
+               break;
+            }
+         }
+
+         continue;
+      }
+
+      if (inst2->opcode == BRW_OPCODE_WHILE)
+         nest_level++;
+
+      dist--;
+   }
+
+   if (tc->gen >= ILO_GEN(7))
+      inst->src[1] = tsrc_imm_w(dist * 2);
+   else
+      inst->dst = tdst_imm_w(dist * 2);
+}
+
+static void
+patch_if_else_jip(struct toy_compiler *tc, struct toy_inst *inst)
+{
+   struct toy_inst *inst2;
+   int nest_level, dist;
+   int jip, uip;
+
+   nest_level = 0;
+   dist = 1;
+   jip = 0;
+   uip = 0;
+
+   /* search forward */
+   LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
+      if (inst2->marker)
+         continue;
+
+      if (inst2->opcode == BRW_OPCODE_ENDIF) {
+         if (nest_level) {
+            nest_level--;
+         }
+         else {
+            uip = dist * 2;
+            if (!jip)
+               jip = uip;
+            break;
+         }
+      }
+      else if (inst2->opcode == BRW_OPCODE_ELSE &&
+               inst->opcode == BRW_OPCODE_IF) {
+         if (!nest_level) {
+            /* the following instruction */
+            jip = (dist + 1) * 2;
+
+            if (tc->gen == ILO_GEN(6)) {
+               uip = jip;
+               break;
+            }
+         }
+      }
+      else if (inst2->opcode == BRW_OPCODE_IF) {
+         nest_level++;
+      }
+
+      dist++;
+   }
+
+   if (tc->gen >= ILO_GEN(7)) {
+      /* what should the type be? */
+      inst->dst.type = TOY_TYPE_D;
+      inst->src[0].type = TOY_TYPE_D;
+      inst->src[1] = tsrc_imm_d(uip << 16 | jip);
+   }
+   else {
+      inst->dst = tdst_imm_w(jip);
+   }
+
+   inst->thread_ctrl = BRW_THREAD_SWITCH;
+}
+
+static void
+patch_endif_jip(struct toy_compiler *tc, struct toy_inst *inst)
+{
+   struct toy_inst *inst2;
+   bool found = false;
+   int dist = 1;
+
+   /* search forward for instructions that may enable channels */
+   LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
+      if (inst2->marker)
+         continue;
+
+      switch (inst2->opcode) {
+      case BRW_OPCODE_ENDIF:
+      case BRW_OPCODE_ELSE:
+      case BRW_OPCODE_WHILE:
+         found = true;
+         break;
+      default:
+         break;
+      }
+
+      if (found)
+         break;
+
+      dist++;
+   }
+
+   /* should we set dist to (dist - 1) or 1? */
+   if (!found)
+      dist = 1;
+
+   if (tc->gen >= ILO_GEN(7))
+      inst->src[1] = tsrc_imm_w(dist * 2);
+   else
+      inst->dst = tdst_imm_w(dist * 2);
+
+   inst->thread_ctrl = BRW_THREAD_SWITCH;
+}
+
+static void
+patch_break_continue_jip(struct toy_compiler *tc, struct toy_inst *inst)
+{
+   struct toy_inst *inst2, *inst3;
+   int nest_level, dist, jip, uip;
+
+   nest_level = 0;
+   dist = 1;
+   jip = 1 * 2;
+   uip = 1 * 2;
+
+   /* search forward */
+   LIST_FOR_EACH_ENTRY_FROM(inst2, inst->list.next, &tc->instructions, list) {
+      if (inst2->marker) {
+         if (inst2->opcode == BRW_OPCODE_DO)
+            nest_level++;
+         continue;
+      }
+
+      if (inst2->opcode == BRW_OPCODE_ELSE ||
+          inst2->opcode == BRW_OPCODE_ENDIF ||
+          inst2->opcode == BRW_OPCODE_WHILE) {
+         jip = dist * 2;
+         break;
+      }
+
+      dist++;
+   }
+
+   /* go on to determine uip */
+   inst3 = inst2;
+   LIST_FOR_EACH_ENTRY_FROM(inst2, &inst3->list, &tc->instructions, list) {
+      if (inst2->marker) {
+         if (inst2->opcode == BRW_OPCODE_DO)
+            nest_level++;
+         continue;
+      }
+
+      if (inst2->opcode == BRW_OPCODE_WHILE) {
+         if (nest_level) {
+            nest_level--;
+         }
+         else {
+            /* the following instruction */
+            if (tc->gen == ILO_GEN(6) && inst->opcode == BRW_OPCODE_BREAK)
+               dist++;
+
+            uip = dist * 2;
+            break;
+         }
+      }
+
+      dist++;
+   }
+
+   /* should the type be D or W? */
+   inst->dst.type = TOY_TYPE_D;
+   inst->src[0].type = TOY_TYPE_D;
+   inst->src[1] = tsrc_imm_d(uip << 16 | jip);
+}
+
+/**
+ * Legalize the instructions for assembling.
+ */
+void
+toy_compiler_legalize_for_asm(struct toy_compiler *tc)
+{
+   struct toy_inst *inst;
+   int pc = 0;
+
+   tc_head(tc);
+   while ((inst = tc_next(tc)) != NULL) {
+      int i;
+
+      pc++;
+
+      /*
+       * From the Sandy Bridge PRM, volume 4 part 2, page 112:
+       *
+       *     "Specifically, for instructions with a single source, it only
+       *      uses the first source operand <src0>. In this case, the second
+       *      source operand <src1> must be set to null and also with the same
+       *      type as the first source operand <src0>.  It is a special case
+       *      when <src0> is an immediate, as an immediate <src0> uses DW3 of
+       *      the instruction word, which is normally used by <src1>.  In this
+       *      case, <src1> must be programmed with register file ARF and the
+       *      same data type as <src0>."
+       *
+       * Since we already fill unused operands with null, we only need to take
+       * care of the type.
+       */
+      if (tsrc_is_null(inst->src[1]))
+         inst->src[1].type = inst->src[0].type;
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MATH:
+         /* math does not support align16 nor exec_size > 8 */
+         inst->access_mode = BRW_ALIGN_1;
+
+         if (inst->exec_size == BRW_EXECUTE_16) {
+            /*
+             * From the Ivy Bridge PRM, volume 4 part 3, page 192:
+             *
+             *     "INT DIV function does not support SIMD16."
+             */
+            if (tc->gen < ILO_GEN(7) ||
+                inst->cond_modifier == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
+                inst->cond_modifier == BRW_MATH_FUNCTION_INT_DIV_REMAINDER) {
+               struct toy_inst *inst2;
+
+               inst->exec_size = BRW_EXECUTE_8;
+               inst->qtr_ctrl = GEN6_COMPRESSION_1Q;
+
+               inst2 = tc_duplicate_inst(tc, inst);
+               inst2->qtr_ctrl = GEN6_COMPRESSION_2Q;
+               inst2->dst = tdst_offset(inst2->dst, 1, 0);
+               inst2->src[0] = tsrc_offset(inst2->src[0], 1, 0);
+               if (!tsrc_is_null(inst2->src[1]))
+                  inst2->src[1] = tsrc_offset(inst2->src[1], 1, 0);
+
+               pc++;
+            }
+         }
+         break;
+      case BRW_OPCODE_IF:
+         if (tc->gen >= ILO_GEN(7) &&
+             inst->cond_modifier != BRW_CONDITIONAL_NONE) {
+            struct toy_inst *inst2;
+
+            inst2 = tc_duplicate_inst(tc, inst);
+
+            /* replace the original IF by CMP */
+            inst->opcode = BRW_OPCODE_CMP;
+
+            /* predicate control instead of condition modifier */
+            inst2->dst = tdst_null();
+            inst2->src[0] = tsrc_null();
+            inst2->src[1] = tsrc_null();
+            inst2->cond_modifier = BRW_CONDITIONAL_NONE;
+            inst2->pred_ctrl = BRW_PREDICATE_NORMAL;
+
+            pc++;
+         }
+         break;
+      default:
+         break;
+      }
+
+      /* MRF to GRF */
+      if (tc->gen >= ILO_GEN(7)) {
+         for (i = 0; i < Elements(inst->src); i++) {
+            if (inst->src[i].file != TOY_FILE_MRF)
+               continue;
+            else if (tsrc_is_null(inst->src[i]))
+               break;
+
+            inst->src[i].file = TOY_FILE_GRF;
+         }
+
+         if (inst->dst.file == TOY_FILE_MRF)
+            inst->dst.file = TOY_FILE_GRF;
+      }
+   }
+
+   tc->num_instructions = pc;
+
+   /* set JIP/UIP */
+   tc_head(tc);
+   while ((inst = tc_next(tc)) != NULL) {
+      switch (inst->opcode) {
+      case BRW_OPCODE_IF:
+      case BRW_OPCODE_ELSE:
+         patch_if_else_jip(tc, inst);
+         break;
+      case BRW_OPCODE_ENDIF:
+         patch_endif_jip(tc, inst);
+         break;
+      case BRW_OPCODE_WHILE:
+         patch_while_jip(tc, inst);
+         break;
+      case BRW_OPCODE_BREAK:
+      case BRW_OPCODE_CONTINUE:
+         patch_break_continue_jip(tc, inst);
+         break;
+      default:
+         break;
+      }
+   }
+}
diff --git a/src/gallium/drivers/ilo/shader/toy_legalize.h b/src/gallium/drivers/ilo/shader/toy_legalize.h
new file mode 100644 (file)
index 0000000..8e2a120
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef TOY_LEGALIZE_H
+#define TOY_LEGALIZE_H
+
+#include "toy_compiler.h"
+#include "toy_tgsi.h"
+
+void
+toy_compiler_lower_to_send(struct toy_compiler *tc, struct toy_inst *inst,
+                           bool sendc, unsigned sfid);
+
+void
+toy_compiler_lower_math(struct toy_compiler *tc, struct toy_inst *inst);
+
+void
+toy_compiler_allocate_registers(struct toy_compiler *tc,
+                                int start_grf, int end_grf,
+                                int num_grf_per_vrf);
+
+void
+toy_compiler_legalize_for_ra(struct toy_compiler *tc);
+
+void
+toy_compiler_legalize_for_asm(struct toy_compiler *tc);
+
+#endif /* TOY_LEGALIZE_H */
diff --git a/src/gallium/drivers/ilo/shader/toy_legalize_ra.c b/src/gallium/drivers/ilo/shader/toy_legalize_ra.c
new file mode 100644 (file)
index 0000000..e691f12
--- /dev/null
@@ -0,0 +1,628 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include <stdlib.h> /* for qsort() */
+#include "toy_compiler.h"
+#include "toy_legalize.h"
+
+/**
+ * Live interval of a VRF register.
+ */
+struct linear_scan_live_interval {
+   int vrf;
+   int startpoint;
+   int endpoint;
+
+   /*
+    * should this be assigned a consecutive register of the previous
+    * interval's?
+    */
+   bool consecutive;
+
+   int reg;
+
+   struct list_head list;
+};
+
+/**
+ * Linear scan.
+ */
+struct linear_scan {
+   struct linear_scan_live_interval *intervals;
+   int max_vrf, num_vrfs;
+
+   int num_regs;
+
+   struct list_head active_list;
+   int *free_regs;
+   int num_free_regs;
+
+   int *vrf_mapping;
+};
+
+/**
+ * Return a chunk of registers to the free register pool.
+ */
+static void
+linear_scan_free_regs(struct linear_scan *ls, int reg, int count)
+{
+   int i;
+
+   for (i = 0; i < count; i++)
+      ls->free_regs[ls->num_free_regs++] = reg + count - 1 - i;
+}
+
+static int
+linear_scan_compare_regs(const void *elem1, const void *elem2)
+{
+   const int *reg1 = elem1;
+   const int *reg2 = elem2;
+
+   /* in reverse order */
+   return (*reg2 - *reg1);
+}
+
+/**
+ * Allocate a chunk of registers from the free register pool.
+ */
+static int
+linear_scan_allocate_regs(struct linear_scan *ls, int count)
+{
+   bool sorted = false;
+   int reg;
+
+   /* simple cases */
+   if (count > ls->num_free_regs)
+      return -1;
+   else if (count == 1)
+      return ls->free_regs[--ls->num_free_regs];
+
+   /* TODO a free register pool */
+   /* TODO reserve some regs for spilling */
+   while (true) {
+      bool found = false;
+      int start;
+
+      /*
+       * find a chunk of registers that have consecutive register
+       * numbers
+       */
+      for (start = ls->num_free_regs - 1; start >= count - 1; start--) {
+         int i;
+
+         for (i = 1; i < count; i++) {
+            if (ls->free_regs[start - i] != ls->free_regs[start] + i)
+               break;
+         }
+
+         if (i >= count) {
+            found = true;
+            break;
+         }
+      }
+
+      if (found) {
+         reg = ls->free_regs[start];
+
+         if (start != ls->num_free_regs - 1) {
+            start++;
+            memmove(&ls->free_regs[start - count],
+                    &ls->free_regs[start],
+                    sizeof(*ls->free_regs) * (ls->num_free_regs - start));
+         }
+         ls->num_free_regs -= count;
+         break;
+      }
+      else if (!sorted) {
+         /* sort and retry */
+         qsort(ls->free_regs, ls->num_free_regs, sizeof(*ls->free_regs),
+               linear_scan_compare_regs);
+         sorted = true;
+      }
+      else {
+         /* failed */
+         reg = -1;
+         break;
+      }
+   }
+
+   return reg;
+}
+
+/**
+ * Add an interval to the active list.
+ */
+static void
+linear_scan_add_active(struct linear_scan *ls,
+                       struct linear_scan_live_interval *interval)
+{
+   struct linear_scan_live_interval *pos;
+
+   /* keep the active list sorted by endpoints */
+   LIST_FOR_EACH_ENTRY(pos, &ls->active_list, list) {
+      if (pos->endpoint >= interval->endpoint)
+         break;
+   }
+
+   list_addtail(&interval->list, &pos->list);
+}
+
+/**
+ * Remove an interval from the active list.
+ */
+static void
+linear_scan_remove_active(struct linear_scan *ls,
+                          struct linear_scan_live_interval *interval)
+{
+   list_del(&interval->list);
+}
+
+/**
+ * Remove intervals that are no longer active from the active list.
+ */
+static void
+linear_scan_expire_active(struct linear_scan *ls, int pc)
+{
+   struct linear_scan_live_interval *interval, *next;
+
+   LIST_FOR_EACH_ENTRY_SAFE(interval, next, &ls->active_list, list) {
+      /*
+       * since we sort intervals on the active list by their endpoints, we
+       * know that this and the rest of the intervals are still active.
+       */
+      if (interval->endpoint >= pc)
+         break;
+
+      linear_scan_remove_active(ls, interval);
+
+      /* recycle the reg */
+      linear_scan_free_regs(ls, interval->reg, 1);
+   }
+}
+
+/**
+ * Spill an interval.
+ */
+static void
+linear_scan_spill(struct linear_scan *ls,
+                  struct linear_scan_live_interval *interval,
+                  bool is_active)
+{
+   assert(!"no spilling support");
+}
+
+/**
+ * Spill a range of intervals.
+ */
+static void
+linear_scan_spill_range(struct linear_scan *ls, int first, int count)
+{
+   int i;
+
+   for (i = 0; i < count; i++) {
+      struct linear_scan_live_interval *interval = &ls->intervals[first + i];
+
+      linear_scan_spill(ls, interval, false);
+   }
+}
+
+/**
+ * Perform linear scan to allocate registers for the intervals.
+ */
+static bool
+linear_scan_run(struct linear_scan *ls)
+{
+   int i;
+
+   i = 0;
+   while (i < ls->num_vrfs) {
+      struct linear_scan_live_interval *first = &ls->intervals[i];
+      int reg, count;
+
+      /*
+       * BRW_OPCODE_SEND may write to multiple consecutive registers and we need to
+       * support that
+       */
+      for (count = 1; i + count < ls->num_vrfs; count++) {
+         const struct linear_scan_live_interval *interval =
+            &ls->intervals[i + count];
+
+         if (interval->startpoint != first->startpoint ||
+             !interval->consecutive)
+            break;
+      }
+
+      reg = linear_scan_allocate_regs(ls, count);
+
+      /* expire intervals that are no longer active and try again */
+      if (reg < 0) {
+         linear_scan_expire_active(ls, first->startpoint);
+         reg = linear_scan_allocate_regs(ls, count);
+      }
+
+      /* have to spill some intervals */
+      if (reg < 0) {
+         struct linear_scan_live_interval *last_active =
+            container_of(ls->active_list.prev,
+                  (struct linear_scan_live_interval *) NULL, list);
+
+         /* heuristically spill the interval that ends last */
+         if (count > 1 || last_active->endpoint < first->endpoint) {
+            linear_scan_spill_range(ls, i, count);
+            i += count;
+            continue;
+         }
+
+         /* make some room for the new interval */
+         linear_scan_spill(ls, last_active, true);
+         reg = linear_scan_allocate_regs(ls, count);
+         if (reg < 0) {
+            assert(!"failed to spill any register");
+            return false;
+         }
+      }
+
+      while (count--) {
+         struct linear_scan_live_interval *interval = &ls->intervals[i++];
+
+         interval->reg = reg++;
+         linear_scan_add_active(ls, interval);
+
+         ls->vrf_mapping[interval->vrf] = interval->reg;
+
+         /*
+          * this should and must be the case because of how we initialized the
+          * intervals
+          */
+         assert(interval->vrf - first->vrf == interval->reg - first->reg);
+      }
+   }
+
+   return true;
+}
+
+/**
+ * Add a new interval.
+ */
+static void
+linear_scan_add_live_interval(struct linear_scan *ls, int vrf, int pc)
+{
+   if (ls->intervals[vrf].vrf)
+      return;
+
+   ls->intervals[vrf].vrf = vrf;
+   ls->intervals[vrf].startpoint = pc;
+
+   ls->num_vrfs++;
+   if (vrf > ls->max_vrf)
+      ls->max_vrf = vrf;
+}
+
+/**
+ * Perform (oversimplified?) live variable analysis.
+ */
+static void
+linear_scan_init_live_intervals(struct linear_scan *ls,
+                                struct toy_compiler *tc)
+{
+   const struct toy_inst *inst;
+   int pc, do_pc, while_pc;
+
+   pc = 0;
+   do_pc = -1;
+   while_pc = -1;
+
+   tc_head(tc);
+   while ((inst = tc_next_no_skip(tc)) != NULL) {
+      const int startpoint = (pc <= while_pc) ? do_pc : pc;
+      const int endpoint = (pc <= while_pc) ? while_pc : pc;
+      int vrf, i;
+
+      /*
+       * assume all registers used in this outermost loop are live through out
+       * the whole loop
+       */
+      if (inst->marker) {
+         if (pc > while_pc) {
+            struct toy_inst *inst2;
+            int loop_level = 1;
+
+            assert(inst->opcode == BRW_OPCODE_DO);
+            do_pc = pc;
+            while_pc = pc + 1;
+
+            /* find the matching BRW_OPCODE_WHILE */
+            LIST_FOR_EACH_ENTRY_FROM(inst2, tc->iter_next,
+                  &tc->instructions, list) {
+               if (inst2->marker) {
+                  assert(inst->opcode == BRW_OPCODE_DO);
+                  loop_level++;
+                  continue;
+               }
+
+               if (inst2->opcode == BRW_OPCODE_WHILE) {
+                  loop_level--;
+                  if (!loop_level)
+                     break;
+               }
+               while_pc++;
+            }
+         }
+
+         continue;
+      }
+
+      if (inst->dst.file == TOY_FILE_VRF) {
+         int num_dst;
+
+         /* TODO this is a hack */
+         if (inst->opcode == BRW_OPCODE_SEND ||
+             inst->opcode == BRW_OPCODE_SENDC) {
+            const uint32_t mdesc = inst->src[1].val32;
+            int response_length = (mdesc >> 20) & 0x1f;
+
+            num_dst = response_length;
+            if (num_dst > 1 && inst->exec_size == BRW_EXECUTE_16)
+               num_dst /= 2;
+         }
+         else {
+            num_dst = 1;
+         }
+
+         vrf = inst->dst.val32 / TOY_REG_WIDTH;
+
+         for (i = 0; i < num_dst; i++) {
+            /* first use */
+            if (!ls->intervals[vrf].vrf)
+               linear_scan_add_live_interval(ls, vrf, startpoint);
+
+            ls->intervals[vrf].endpoint = endpoint;
+            ls->intervals[vrf].consecutive = (i > 0);
+
+            vrf++;
+         }
+      }
+
+      for (i = 0; i < Elements(inst->src); i++) {
+         if (inst->src[i].file != TOY_FILE_VRF)
+            continue;
+
+         vrf = inst->src[i].val32 / TOY_REG_WIDTH;
+
+         /* first use */
+         if (!ls->intervals[vrf].vrf)
+            linear_scan_add_live_interval(ls, vrf, startpoint);
+
+         ls->intervals[vrf].endpoint = endpoint;
+      }
+
+      pc++;
+   }
+}
+
+/**
+ * Clean up after performing linear scan.
+ */
+static void
+linear_scan_cleanup(struct linear_scan *ls)
+{
+   FREE(ls->vrf_mapping);
+   FREE(ls->intervals);
+   FREE(ls->free_regs);
+}
+
+static int
+linear_scan_compare_live_intervals(const void *elem1, const void *elem2)
+{
+   const struct linear_scan_live_interval *interval1 = elem1;
+   const struct linear_scan_live_interval *interval2 = elem2;
+
+   /* make unused elements appear at the end */
+   if (!interval1->vrf)
+      return 1;
+   else if (!interval2->vrf)
+      return -1;
+
+   /* sort by startpoints first, and then by vrf */
+   if (interval1->startpoint != interval2->startpoint)
+      return (interval1->startpoint - interval2->startpoint);
+   else
+      return (interval1->vrf - interval2->vrf);
+
+}
+
+/**
+ * Prepare for linear scan.
+ */
+static bool
+linear_scan_init(struct linear_scan *ls, int num_regs,
+                 struct toy_compiler *tc)
+{
+   int num_intervals, i;
+
+   memset(ls, 0, sizeof(*ls));
+
+   /* this may be much larger than ls->num_vrfs... */
+   num_intervals = tc->next_vrf;
+   ls->intervals = CALLOC(num_intervals, sizeof(ls->intervals[0]));
+   if (!ls->intervals)
+      return false;
+
+   linear_scan_init_live_intervals(ls, tc);
+   /* sort intervals by startpoints */
+   qsort(ls->intervals, num_intervals, sizeof(*ls->intervals),
+         linear_scan_compare_live_intervals);
+
+   ls->num_regs = num_regs;
+   ls->num_free_regs = num_regs;
+
+   ls->free_regs = MALLOC(ls->num_regs * sizeof(*ls->free_regs));
+   if (!ls->free_regs) {
+      FREE(ls->intervals);
+      return false;
+   }
+
+   /* add in reverse order as we will allocate from the tail */
+   for (i = 0; i < ls->num_regs; i++)
+      ls->free_regs[i] = num_regs - i - 1;
+
+   list_inithead(&ls->active_list);
+
+   ls->vrf_mapping = CALLOC(ls->max_vrf + 1, sizeof(*ls->vrf_mapping));
+   if (!ls->vrf_mapping) {
+      FREE(ls->intervals);
+      FREE(ls->free_regs);
+      return false;
+   }
+
+   return true;
+}
+
+/**
+ * Allocate registers with linear scan.
+ */
+static void
+linear_scan_allocation(struct toy_compiler *tc,
+                       int start_grf, int end_grf,
+                       int num_grf_per_vrf)
+{
+   const int num_grfs = end_grf - start_grf + 1;
+   struct linear_scan ls;
+   struct toy_inst *inst;
+
+   if (!linear_scan_init(&ls, num_grfs / num_grf_per_vrf, tc))
+      return;
+
+   if (!linear_scan_run(&ls)) {
+      tc_fail(tc, "failed to allocate registers");
+      return;
+   }
+
+
+   tc_head(tc);
+   while ((inst = tc_next(tc)) != NULL) {
+      int i;
+
+      if (inst->dst.file == TOY_FILE_VRF) {
+         const uint32_t val32 = inst->dst.val32;
+         int reg = val32 / TOY_REG_WIDTH;
+         int subreg = val32 % TOY_REG_WIDTH;
+
+         /* map to GRF */
+         reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
+
+         inst->dst.file = TOY_FILE_GRF;
+         inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
+      }
+
+      for (i = 0; i < Elements(inst->src); i++) {
+         const uint32_t val32 = inst->src[i].val32;
+         int reg, subreg;
+
+         if (inst->src[i].file != TOY_FILE_VRF)
+            continue;
+
+         reg = val32 / TOY_REG_WIDTH;
+         subreg = val32 % TOY_REG_WIDTH;
+
+         /* map to GRF */
+         reg = ls.vrf_mapping[reg] * num_grf_per_vrf + start_grf;
+
+         inst->src[i].file = TOY_FILE_GRF;
+         inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
+      }
+   }
+
+   linear_scan_cleanup(&ls);
+}
+
+/**
+ * Trivially allocate registers.
+ */
+static void
+trivial_allocation(struct toy_compiler *tc,
+                   int start_grf, int end_grf,
+                   int num_grf_per_vrf)
+{
+   struct toy_inst *inst;
+   int max_grf = -1;
+
+   tc_head(tc);
+   while ((inst = tc_next(tc)) != NULL) {
+      int i;
+
+      if (inst->dst.file == TOY_FILE_VRF) {
+         const uint32_t val32 = inst->dst.val32;
+         int reg = val32 / TOY_REG_WIDTH;
+         int subreg = val32 % TOY_REG_WIDTH;
+
+         reg = reg * num_grf_per_vrf + start_grf - 1;
+
+         inst->dst.file = TOY_FILE_GRF;
+         inst->dst.val32 = reg * TOY_REG_WIDTH + subreg;
+
+         if (reg > max_grf)
+            max_grf = reg;
+      }
+
+      for (i = 0; i < Elements(inst->src); i++) {
+         const uint32_t val32 = inst->src[i].val32;
+         int reg, subreg;
+
+         if (inst->src[i].file != TOY_FILE_VRF)
+            continue;
+
+         reg = val32 / TOY_REG_WIDTH;
+         subreg = val32 % TOY_REG_WIDTH;
+
+         reg = reg * num_grf_per_vrf + start_grf - 1;
+
+         inst->src[i].file = TOY_FILE_GRF;
+         inst->src[i].val32 = reg * TOY_REG_WIDTH + subreg;
+
+         if (reg > max_grf)
+            max_grf = reg;
+      }
+   }
+
+   if (max_grf + num_grf_per_vrf - 1 > end_grf)
+      tc_fail(tc, "failed to allocate registers");
+}
+
+/**
+ * Allocate GRF registers to VRF registers.
+ */
+void
+toy_compiler_allocate_registers(struct toy_compiler *tc,
+                                int start_grf, int end_grf,
+                                int num_grf_per_vrf)
+{
+   if (true)
+      linear_scan_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
+   else
+      trivial_allocation(tc, start_grf, end_grf, num_grf_per_vrf);
+}
diff --git a/src/gallium/drivers/ilo/shader/toy_optimize.c b/src/gallium/drivers/ilo/shader/toy_optimize.c
new file mode 100644 (file)
index 0000000..62a663f
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "toy_compiler.h"
+#include "toy_tgsi.h"
+#include "toy_optimize.h"
+
+/**
+ * This just eliminates instructions with null dst so far.
+ */
+static void
+eliminate_dead_code(struct toy_compiler *tc)
+{
+   struct toy_inst *inst;
+
+   tc_head(tc);
+   while ((inst = tc_next(tc)) != NULL) {
+      switch (inst->opcode) {
+      case BRW_OPCODE_IF:
+      case BRW_OPCODE_ELSE:
+      case BRW_OPCODE_ENDIF:
+      case BRW_OPCODE_WHILE:
+      case BRW_OPCODE_BREAK:
+      case BRW_OPCODE_CONTINUE:
+      case BRW_OPCODE_SEND:
+      case BRW_OPCODE_SENDC:
+      case BRW_OPCODE_NOP:
+         /* never eliminated */
+         break;
+      default:
+         if (tdst_is_null(inst->dst) || !inst->dst.writemask) {
+            /* math is always BRW_CONDITIONAL_NONE */
+            if ((inst->opcode == BRW_OPCODE_MATH ||
+                 inst->cond_modifier == BRW_CONDITIONAL_NONE) &&
+                !inst->acc_wr_ctrl)
+               tc_discard_inst(tc, inst);
+         }
+         break;
+      }
+   }
+}
+
+void
+toy_compiler_optimize(struct toy_compiler *tc)
+{
+   eliminate_dead_code(tc);
+}
diff --git a/src/gallium/drivers/ilo/shader/toy_optimize.h b/src/gallium/drivers/ilo/shader/toy_optimize.h
new file mode 100644 (file)
index 0000000..f65198c
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef TOY_OPTIMIZE_H
+#define TOY_OPTIMIZE_H
+
+#include "toy_compiler.h"
+
+void
+toy_compiler_optimize(struct toy_compiler *tc);
+
+#endif /* TOY_OPTIMIZE_H */
diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c b/src/gallium/drivers/ilo/shader/toy_tgsi.c
new file mode 100644 (file)
index 0000000..c2b1da5
--- /dev/null
@@ -0,0 +1,2736 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_strings.h"
+#include "util/u_hash_table.h"
+#include "toy_helpers.h"
+#include "toy_tgsi.h"
+
+/* map TGSI opcode to GEN opcode 1-to-1 */
+static const struct {
+   int opcode;
+   int num_dst;
+   int num_src;
+} aos_simple_opcode_map[TGSI_OPCODE_LAST] = {
+   [TGSI_OPCODE_ARL]          = { BRW_OPCODE_RNDD,                1, 1 },
+   [TGSI_OPCODE_MOV]          = { BRW_OPCODE_MOV,                 1, 1 },
+   [TGSI_OPCODE_RCP]          = { TOY_OPCODE_INV,                 1, 1 },
+   [TGSI_OPCODE_RSQ]          = { TOY_OPCODE_RSQ,                 1, 1 },
+   [TGSI_OPCODE_MUL]          = { BRW_OPCODE_MUL,                 1, 2 },
+   [TGSI_OPCODE_ADD]          = { BRW_OPCODE_ADD,                 1, 2 },
+   [TGSI_OPCODE_DP3]          = { BRW_OPCODE_DP3,                 1, 2 },
+   [TGSI_OPCODE_DP4]          = { BRW_OPCODE_DP4,                 1, 2 },
+   [TGSI_OPCODE_MIN]          = { BRW_OPCODE_SEL,                 1, 2 },
+   [TGSI_OPCODE_MAX]          = { BRW_OPCODE_SEL,                 1, 2 },
+   /* a later pass will move src[2] to accumulator */
+   [TGSI_OPCODE_MAD]          = { BRW_OPCODE_MAC,                 1, 3 },
+   [TGSI_OPCODE_SUB]          = { BRW_OPCODE_ADD,                 1, 2 },
+   [TGSI_OPCODE_SQRT]         = { TOY_OPCODE_SQRT,                1, 1 },
+   [TGSI_OPCODE_FRC]          = { BRW_OPCODE_FRC,                 1, 1 },
+   [TGSI_OPCODE_FLR]          = { BRW_OPCODE_RNDD,                1, 1 },
+   [TGSI_OPCODE_ROUND]        = { BRW_OPCODE_RNDE,                1, 1 },
+   [TGSI_OPCODE_EX2]          = { TOY_OPCODE_EXP,                 1, 1 },
+   [TGSI_OPCODE_LG2]          = { TOY_OPCODE_LOG,                 1, 1 },
+   [TGSI_OPCODE_POW]          = { TOY_OPCODE_POW,                 1, 2 },
+   [TGSI_OPCODE_ABS]          = { BRW_OPCODE_MOV,                 1, 1 },
+   [TGSI_OPCODE_DPH]          = { BRW_OPCODE_DPH,                 1, 2 },
+   [TGSI_OPCODE_COS]          = { TOY_OPCODE_COS,                 1, 1 },
+   [TGSI_OPCODE_KILP]         = { TOY_OPCODE_KIL,                 0, 0 },
+   [TGSI_OPCODE_SIN]          = { TOY_OPCODE_SIN,                 1, 1 },
+   [TGSI_OPCODE_ARR]          = { BRW_OPCODE_RNDZ,                1, 1 },
+   [TGSI_OPCODE_DP2]          = { BRW_OPCODE_DP2,                 1, 2 },
+   [TGSI_OPCODE_IF]           = { BRW_OPCODE_IF,                  0, 1 },
+   [TGSI_OPCODE_UIF]          = { BRW_OPCODE_IF,                  0, 1 },
+   [TGSI_OPCODE_ELSE]         = { BRW_OPCODE_ELSE,                0, 0 },
+   [TGSI_OPCODE_ENDIF]        = { BRW_OPCODE_ENDIF,               0, 0 },
+   [TGSI_OPCODE_I2F]          = { BRW_OPCODE_MOV,                 1, 1 },
+   [TGSI_OPCODE_NOT]          = { BRW_OPCODE_NOT,                 1, 1 },
+   [TGSI_OPCODE_TRUNC]        = { BRW_OPCODE_RNDZ,                1, 1 },
+   [TGSI_OPCODE_SHL]          = { BRW_OPCODE_SHL,                 1, 2 },
+   [TGSI_OPCODE_AND]          = { BRW_OPCODE_AND,                 1, 2 },
+   [TGSI_OPCODE_OR]           = { BRW_OPCODE_OR,                  1, 2 },
+   [TGSI_OPCODE_MOD]          = { TOY_OPCODE_INT_DIV_REMAINDER,   1, 2 },
+   [TGSI_OPCODE_XOR]          = { BRW_OPCODE_XOR,                 1, 2 },
+   [TGSI_OPCODE_EMIT]         = { TOY_OPCODE_EMIT,                0, 0 },
+   [TGSI_OPCODE_ENDPRIM]      = { TOY_OPCODE_ENDPRIM,             0, 0 },
+   [TGSI_OPCODE_NOP]          = { BRW_OPCODE_NOP,                 0, 0 },
+   [TGSI_OPCODE_KIL]          = { TOY_OPCODE_KIL,                 0, 1 },
+   [TGSI_OPCODE_END]          = { BRW_OPCODE_NOP,                 0, 0 },
+   [TGSI_OPCODE_F2I]          = { BRW_OPCODE_MOV,                 1, 1 },
+   [TGSI_OPCODE_IDIV]         = { TOY_OPCODE_INT_DIV_QUOTIENT,    1, 2 },
+   [TGSI_OPCODE_IMAX]         = { BRW_OPCODE_SEL,                 1, 2 },
+   [TGSI_OPCODE_IMIN]         = { BRW_OPCODE_SEL,                 1, 2 },
+   [TGSI_OPCODE_INEG]         = { BRW_OPCODE_MOV,                 1, 1 },
+   [TGSI_OPCODE_ISHR]         = { BRW_OPCODE_ASR,                 1, 2 },
+   [TGSI_OPCODE_F2U]          = { BRW_OPCODE_MOV,                 1, 1 },
+   [TGSI_OPCODE_U2F]          = { BRW_OPCODE_MOV,                 1, 1 },
+   [TGSI_OPCODE_UADD]         = { BRW_OPCODE_ADD,                 1, 2 },
+   [TGSI_OPCODE_UDIV]         = { TOY_OPCODE_INT_DIV_QUOTIENT,    1, 2 },
+   /* a later pass will move src[2] to accumulator */
+   [TGSI_OPCODE_UMAD]         = { BRW_OPCODE_MAC,                 1, 3 },
+   [TGSI_OPCODE_UMAX]         = { BRW_OPCODE_SEL,                 1, 2 },
+   [TGSI_OPCODE_UMIN]         = { BRW_OPCODE_SEL,                 1, 2 },
+   [TGSI_OPCODE_UMOD]         = { TOY_OPCODE_INT_DIV_REMAINDER,   1, 2 },
+   [TGSI_OPCODE_UMUL]         = { BRW_OPCODE_MUL,                 1, 2 },
+   [TGSI_OPCODE_USHR]         = { BRW_OPCODE_SHR,                 1, 2 },
+   [TGSI_OPCODE_UARL]         = { BRW_OPCODE_MOV,                 1, 1 },
+   [TGSI_OPCODE_IABS]         = { BRW_OPCODE_MOV,                 1, 1 },
+};
+
+static void
+aos_simple(struct toy_compiler *tc,
+           const struct tgsi_full_instruction *tgsi_inst,
+           struct toy_dst *dst,
+           struct toy_src *src)
+{
+   struct toy_inst *inst;
+   int opcode;
+   int cond_modifier = BRW_CONDITIONAL_NONE;
+   int num_dst = tgsi_inst->Instruction.NumDstRegs;
+   int num_src = tgsi_inst->Instruction.NumSrcRegs;
+   int i;
+
+   opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
+   assert(num_dst == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_dst);
+   assert(num_src == aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].num_src);
+   if (!opcode) {
+      assert(!"invalid aos_simple() call");
+      return;
+   }
+
+   /* no need to emit nop */
+   if (opcode == BRW_OPCODE_NOP)
+      return;
+
+   inst = tc_add(tc);
+   if (!inst)
+      return;
+
+   inst->opcode = opcode;
+
+   switch (tgsi_inst->Instruction.Opcode) {
+   case TGSI_OPCODE_MIN:
+   case TGSI_OPCODE_IMIN:
+   case TGSI_OPCODE_UMIN:
+      cond_modifier = BRW_CONDITIONAL_L;
+      break;
+   case TGSI_OPCODE_MAX:
+   case TGSI_OPCODE_IMAX:
+   case TGSI_OPCODE_UMAX:
+      cond_modifier = BRW_CONDITIONAL_GE;
+      break;
+   case TGSI_OPCODE_SUB:
+      src[1] = tsrc_negate(src[1]);
+      break;
+   case TGSI_OPCODE_ABS:
+   case TGSI_OPCODE_IABS:
+      src[0] = tsrc_absolute(src[0]);
+      break;
+   case TGSI_OPCODE_IF:
+      cond_modifier = BRW_CONDITIONAL_NEQ;
+      num_src = 2;
+      assert(src[0].type == TOY_TYPE_F);
+      src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
+      src[1] = tsrc_imm_f(0.0f);
+      break;
+   case TGSI_OPCODE_UIF:
+      cond_modifier = BRW_CONDITIONAL_NEQ;
+      num_src = 2;
+      assert(src[0].type == TOY_TYPE_D);
+      src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
+      src[1] = tsrc_imm_d(0);
+      break;
+   case TGSI_OPCODE_INEG:
+      src[0] = tsrc_negate(src[0]);
+      break;
+   case TGSI_OPCODE_RCP:
+   case TGSI_OPCODE_RSQ:
+   case TGSI_OPCODE_EX2:
+   case TGSI_OPCODE_LG2:
+   case TGSI_OPCODE_COS:
+   case TGSI_OPCODE_SIN:
+      src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
+      break;
+   case TGSI_OPCODE_POW:
+      src[0] = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
+      src[1] = tsrc_swizzle1(src[1], TOY_SWIZZLE_X);
+      break;
+   }
+
+   inst->cond_modifier = cond_modifier;
+
+   if (num_dst) {
+      assert(num_dst == 1);
+      inst->dst = dst[0];
+   }
+
+   assert(num_src <= Elements(inst->src));
+   for (i = 0; i < num_src; i++)
+      inst->src[i] = src[i];
+}
+
+static void
+aos_set_on_cond(struct toy_compiler *tc,
+                const struct tgsi_full_instruction *tgsi_inst,
+                struct toy_dst *dst,
+                struct toy_src *src)
+{
+   struct toy_inst *inst;
+   int cond;
+   struct toy_src zero, one;
+
+   switch (tgsi_inst->Instruction.Opcode) {
+   case TGSI_OPCODE_SLT:
+   case TGSI_OPCODE_ISLT:
+   case TGSI_OPCODE_USLT:
+      cond = BRW_CONDITIONAL_L;
+      break;
+   case TGSI_OPCODE_SGE:
+   case TGSI_OPCODE_ISGE:
+   case TGSI_OPCODE_USGE:
+      cond = BRW_CONDITIONAL_GE;
+      break;
+   case TGSI_OPCODE_SEQ:
+   case TGSI_OPCODE_USEQ:
+      cond = BRW_CONDITIONAL_EQ;
+      break;
+   case TGSI_OPCODE_SGT:
+      cond = BRW_CONDITIONAL_G;
+      break;
+   case TGSI_OPCODE_SLE:
+      cond = BRW_CONDITIONAL_LE;
+      break;
+   case TGSI_OPCODE_SNE:
+   case TGSI_OPCODE_USNE:
+      cond = BRW_CONDITIONAL_NEQ;
+      break;
+   default:
+      assert(!"invalid aos_set_on_cond() call");
+      return;
+   }
+
+   /* note that for integer versions, all bits are set */
+   switch (dst[0].type) {
+   case TOY_TYPE_F:
+   default:
+      zero = tsrc_imm_f(0.0f);
+      one = tsrc_imm_f(1.0f);
+      break;
+   case TOY_TYPE_D:
+      zero = tsrc_imm_d(0);
+      one = tsrc_imm_d(-1);
+      break;
+   case TOY_TYPE_UD:
+      zero = tsrc_imm_ud(0);
+      one = tsrc_imm_ud(~0);
+      break;
+   }
+
+   tc_MOV(tc, dst[0], zero);
+   tc_CMP(tc, tdst_null(), src[0], src[1], cond);
+   inst = tc_MOV(tc, dst[0], one);
+   inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+}
+
+static void
+aos_compare(struct toy_compiler *tc,
+            const struct tgsi_full_instruction *tgsi_inst,
+            struct toy_dst *dst,
+            struct toy_src *src)
+{
+   struct toy_inst *inst;
+   struct toy_src zero;
+
+   switch (tgsi_inst->Instruction.Opcode) {
+   case TGSI_OPCODE_CMP:
+      zero = tsrc_imm_f(0.0f);
+      break;
+   case TGSI_OPCODE_UCMP:
+      zero = tsrc_imm_ud(0);
+      break;
+   default:
+      assert(!"invalid aos_compare() call");
+      return;
+   }
+
+   tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_L);
+   inst = tc_SEL(tc, dst[0], src[1], src[2], BRW_CONDITIONAL_NONE);
+   inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+}
+
+static void
+aos_set_sign(struct toy_compiler *tc,
+             const struct tgsi_full_instruction *tgsi_inst,
+             struct toy_dst *dst,
+             struct toy_src *src)
+{
+   struct toy_inst *inst;
+   struct toy_src zero, one, neg_one;
+
+   switch (tgsi_inst->Instruction.Opcode) {
+   case TGSI_OPCODE_SSG:
+      zero = tsrc_imm_f(0.0f);
+      one = tsrc_imm_f(1.0f);
+      neg_one = tsrc_imm_f(-1.0f);
+      break;
+   case TGSI_OPCODE_ISSG:
+      zero = tsrc_imm_d(0);
+      one = tsrc_imm_d(1);
+      neg_one = tsrc_imm_d(-1);
+      break;
+   default:
+      assert(!"invalid aos_set_sign() call");
+      return;
+   }
+
+   tc_MOV(tc, dst[0], zero);
+
+   tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_G);
+   inst = tc_MOV(tc, dst[0], one);
+   inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+
+   tc_CMP(tc, tdst_null(), src[0], zero, BRW_CONDITIONAL_L);
+   inst = tc_MOV(tc, dst[0], neg_one);
+   inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+}
+
+static void
+aos_tex(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   struct toy_inst *inst;
+   enum toy_opcode opcode;
+   int i;
+
+   switch (tgsi_inst->Instruction.Opcode) {
+   case TGSI_OPCODE_TEX:
+      opcode = TOY_OPCODE_TGSI_TEX;
+      break;
+   case TGSI_OPCODE_TXD:
+      opcode = TOY_OPCODE_TGSI_TXD;
+      break;
+   case TGSI_OPCODE_TXP:
+      opcode = TOY_OPCODE_TGSI_TXP;
+      break;
+   case TGSI_OPCODE_TXB:
+      opcode = TOY_OPCODE_TGSI_TXB;
+      break;
+   case TGSI_OPCODE_TXL:
+      opcode = TOY_OPCODE_TGSI_TXL;
+      break;
+   case TGSI_OPCODE_TXF:
+      opcode = TOY_OPCODE_TGSI_TXF;
+      break;
+   case TGSI_OPCODE_TXQ:
+      opcode = TOY_OPCODE_TGSI_TXQ;
+      break;
+   case TGSI_OPCODE_TXQ_LZ:
+      opcode = TOY_OPCODE_TGSI_TXQ_LZ;
+      break;
+   case TGSI_OPCODE_TEX2:
+      opcode = TOY_OPCODE_TGSI_TEX2;
+   case TGSI_OPCODE_TXB2:
+      opcode = TOY_OPCODE_TGSI_TXB2;
+      break;
+   case TGSI_OPCODE_TXL2:
+      opcode = TOY_OPCODE_TGSI_TXL2;
+      break;
+   default:
+      assert(!"unsupported texturing opcode");
+      return;
+      break;
+   }
+
+   assert(tgsi_inst->Instruction.Texture);
+
+   inst = tc_add(tc);
+   inst->opcode = opcode;
+   inst->tex.target = tgsi_inst->Texture.Texture;
+
+   assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src));
+   assert(tgsi_inst->Instruction.NumDstRegs == 1);
+
+   inst->dst = dst[0];
+   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
+      inst->src[i] = src[i];
+
+   for (i = 0; i < tgsi_inst->Texture.NumOffsets; i++)
+      tc_fail(tc, "texelFetchOffset unsupported");
+}
+
+static void
+aos_sample(struct toy_compiler *tc,
+           const struct tgsi_full_instruction *tgsi_inst,
+           struct toy_dst *dst,
+           struct toy_src *src)
+{
+   struct toy_inst *inst;
+   enum toy_opcode opcode;
+   int i;
+
+   assert(!"sampling untested");
+
+   switch (tgsi_inst->Instruction.Opcode) {
+   case TGSI_OPCODE_SAMPLE:
+      opcode = TOY_OPCODE_TGSI_SAMPLE;
+      break;
+   case TGSI_OPCODE_SAMPLE_I:
+      opcode = TOY_OPCODE_TGSI_SAMPLE_I;
+      break;
+   case TGSI_OPCODE_SAMPLE_I_MS:
+      opcode = TOY_OPCODE_TGSI_SAMPLE_I_MS;
+      break;
+   case TGSI_OPCODE_SAMPLE_B:
+      opcode = TOY_OPCODE_TGSI_SAMPLE_B;
+      break;
+   case TGSI_OPCODE_SAMPLE_C:
+      opcode = TOY_OPCODE_TGSI_SAMPLE_C;
+      break;
+   case TGSI_OPCODE_SAMPLE_C_LZ:
+      opcode = TOY_OPCODE_TGSI_SAMPLE_C_LZ;
+      break;
+   case TGSI_OPCODE_SAMPLE_D:
+      opcode = TOY_OPCODE_TGSI_SAMPLE_D;
+      break;
+   case TGSI_OPCODE_SAMPLE_L:
+      opcode = TOY_OPCODE_TGSI_SAMPLE_L;
+      break;
+   case TGSI_OPCODE_GATHER4:
+      opcode = TOY_OPCODE_TGSI_GATHER4;
+      break;
+   case TGSI_OPCODE_SVIEWINFO:
+      opcode = TOY_OPCODE_TGSI_SVIEWINFO;
+      break;
+   case TGSI_OPCODE_SAMPLE_POS:
+      opcode = TOY_OPCODE_TGSI_SAMPLE_POS;
+      break;
+   case TGSI_OPCODE_SAMPLE_INFO:
+      opcode = TOY_OPCODE_TGSI_SAMPLE_INFO;
+      break;
+   default:
+      assert(!"unsupported sampling opcode");
+      return;
+      break;
+   }
+
+   inst = tc_add(tc);
+   inst->opcode = opcode;
+
+   assert(tgsi_inst->Instruction.NumSrcRegs <= Elements(inst->src));
+   assert(tgsi_inst->Instruction.NumDstRegs == 1);
+
+   inst->dst = dst[0];
+   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
+      inst->src[i] = src[i];
+}
+
+static void
+aos_LIT(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   struct toy_inst *inst;
+
+   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XW), tsrc_imm_f(1.0f));
+
+   if (!(dst[0].writemask & TOY_WRITEMASK_YZ))
+      return;
+
+   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_YZ), tsrc_imm_f(0.0f));
+
+   tc_CMP(tc, tdst_null(),
+         tsrc_swizzle1(src[0], TOY_SWIZZLE_X),
+         tsrc_imm_f(0.0f),
+         BRW_CONDITIONAL_G);
+
+   inst = tc_MOV(tc,
+         tdst_writemask(dst[0], TOY_WRITEMASK_Y),
+         tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
+   inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+
+   /* clamp W to (-128, 128)? */
+   inst = tc_POW(tc,
+         tdst_writemask(dst[0], TOY_WRITEMASK_Z),
+         tsrc_swizzle1(src[0], TOY_SWIZZLE_Y),
+         tsrc_swizzle1(src[0], TOY_SWIZZLE_W));
+   inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+}
+
+static void
+aos_EXP(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
+
+   if (dst[0].writemask & TOY_WRITEMASK_X) {
+      struct toy_dst tmp =
+         tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
+
+      tc_RNDD(tc, tmp, src0);
+
+      /* construct the floating point number manually */
+      tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
+      tc_SHL(tc, tdst_d(tdst_writemask(dst[0], TOY_WRITEMASK_X)),
+            tsrc_from(tmp), tsrc_imm_d(23));
+   }
+
+   tc_FRC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src0);
+   tc_EXP(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
+   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
+}
+
+static void
+aos_LOG(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   struct toy_src src0 = tsrc_swizzle1(src[0], TOY_SWIZZLE_X);
+
+   if (dst[0].writemask & TOY_WRITEMASK_XY) {
+      struct toy_dst tmp;
+
+      tmp = tdst_d(tdst_writemask(tc_alloc_tmp(tc), TOY_WRITEMASK_X));
+
+      /* exponent */
+      tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0)), tsrc_imm_d(23));
+      tc_ADD(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X),
+            tsrc_from(tmp), tsrc_imm_d(-127));
+
+      /* mantissa  */
+      tc_AND(tc, tmp, tsrc_d(src0), tsrc_imm_d((1 << 23) - 1));
+      tc_OR(tc, tdst_writemask(tdst_d(dst[0]), TOY_WRITEMASK_Y),
+            tsrc_from(tmp), tsrc_imm_d(127 << 23));
+   }
+
+   tc_LOG(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src0);
+   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
+}
+
+static void
+aos_DST(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_X), tsrc_imm_f(1.0f));
+   tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0], src[1]);
+   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), src[0]);
+   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), src[1]);
+}
+
+static void
+aos_LRP(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   struct toy_dst tmp = tc_alloc_tmp(tc);
+
+   tc_ADD(tc, tmp, tsrc_negate(src[0]), tsrc_imm_f(1.0f));
+   tc_MUL(tc, tmp, tsrc_from(tmp), src[2]);
+   tc_MAC(tc, dst[0], src[0], src[1], tsrc_from(tmp));
+}
+
+static void
+aos_CND(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   struct toy_inst *inst;
+
+   assert(!"CND untested");
+
+   tc_CMP(tc, tdst_null(), src[2], tsrc_imm_f(0.5f), BRW_CONDITIONAL_G);
+   inst = tc_SEL(tc, dst[0], src[0], src[1], BRW_CONDITIONAL_NONE);
+   inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+}
+
+static void
+aos_DP2A(struct toy_compiler *tc,
+         const struct tgsi_full_instruction *tgsi_inst,
+         struct toy_dst *dst,
+         struct toy_src *src)
+{
+   struct toy_dst tmp = tc_alloc_tmp(tc);
+
+   assert(!"DP2A untested");
+
+   tc_DP2(tc, tmp, src[0], src[1]);
+   tc_ADD(tc, dst[0], tsrc_swizzle1(tsrc_from(tmp), TOY_SWIZZLE_X), src[2]);
+}
+
+static void
+aos_CLAMP(struct toy_compiler *tc,
+          const struct tgsi_full_instruction *tgsi_inst,
+          struct toy_dst *dst,
+          struct toy_src *src)
+{
+   assert(!"CLAMP untested");
+
+   tc_SEL(tc, dst[0], src[0], src[1], BRW_CONDITIONAL_GE);
+   tc_SEL(tc, dst[0], src[2], tsrc_from(dst[0]), BRW_CONDITIONAL_L);
+}
+
+static void
+aos_XPD(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   struct toy_dst tmp = tc_alloc_tmp(tc);
+
+   tc_MUL(tc, tdst_writemask(tmp, TOY_WRITEMASK_XYZ),
+         tsrc_swizzle(src[0], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
+                              TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
+         tsrc_swizzle(src[1], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
+                              TOY_SWIZZLE_X, TOY_SWIZZLE_W));
+
+   tc_MAC(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ),
+         tsrc_swizzle(src[0], TOY_SWIZZLE_Y, TOY_SWIZZLE_Z,
+                              TOY_SWIZZLE_X, TOY_SWIZZLE_W),
+         tsrc_swizzle(src[1], TOY_SWIZZLE_Z, TOY_SWIZZLE_X,
+                              TOY_SWIZZLE_Y, TOY_SWIZZLE_W),
+         tsrc_negate(tsrc_from(tmp)));
+
+   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W),
+         tsrc_imm_f(1.0f));
+}
+
+static void
+aos_PK2H(struct toy_compiler *tc,
+         const struct tgsi_full_instruction *tgsi_inst,
+         struct toy_dst *dst,
+         struct toy_src *src)
+{
+   const struct toy_src h1 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_X));
+   const struct toy_src h2 = tsrc_ud(tsrc_swizzle1(src[0], TOY_SWIZZLE_Y));
+   struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
+
+   assert(!"PK2H untested");
+
+   tc_SHL(tc, tmp, h2, tsrc_imm_ud(16));
+   tc_OR(tc, tdst_ud(dst[0]), h1, tsrc_from(tmp));
+}
+
+static void
+aos_SFL(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   assert(!"SFL untested");
+
+   tc_MOV(tc, dst[0], tsrc_imm_f(0.0f));
+}
+
+static void
+aos_STR(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   assert(!"STR untested");
+
+   tc_MOV(tc, dst[0], tsrc_imm_f(1.0f));
+}
+
+static void
+aos_UP2H(struct toy_compiler *tc,
+         const struct tgsi_full_instruction *tgsi_inst,
+         struct toy_dst *dst,
+         struct toy_src *src)
+{
+   assert(!"UP2H untested");
+
+   tc_AND(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_XZ),
+         tsrc_ud(src[0]), tsrc_imm_ud(0xffff));
+   tc_SHR(tc, tdst_writemask(tdst_ud(dst[0]), TOY_WRITEMASK_YW),
+         tsrc_ud(src[0]), tsrc_imm_ud(16));
+}
+
+static void
+aos_SCS(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   assert(!"SCS untested");
+
+   tc_add1(tc, TOY_OPCODE_COS,
+         tdst_writemask(dst[0], TOY_WRITEMASK_X), src[0]);
+
+   tc_add1(tc, TOY_OPCODE_SIN,
+         tdst_writemask(dst[0], TOY_WRITEMASK_Y), src[0]);
+
+   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_Z), tsrc_imm_f(0.0f));
+   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
+}
+
+static void
+aos_NRM(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   struct toy_dst tmp = tc_alloc_tmp(tc);
+
+   assert(!"NRM untested");
+
+   tc_DP3(tc, tmp, src[0], src[0]);
+   tc_INV(tc, tmp, tsrc_from(tmp));
+   tc_MUL(tc, tdst_writemask(dst[0], TOY_WRITEMASK_XYZ),
+         src[0], tsrc_from(tmp));
+
+   tc_MOV(tc, tdst_writemask(dst[0], TOY_WRITEMASK_W), tsrc_imm_f(1.0f));
+}
+
+static void
+aos_DIV(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   struct toy_dst tmp = tc_alloc_tmp(tc);
+
+   assert(!"DIV untested");
+
+   tc_INV(tc, tmp, src[1]);
+   tc_MUL(tc, dst[0], src[0], tsrc_from(tmp));
+}
+
+static void
+aos_BRK(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   tc_add0(tc, BRW_OPCODE_BREAK);
+}
+
+static void
+aos_CEIL(struct toy_compiler *tc,
+         const struct tgsi_full_instruction *tgsi_inst,
+         struct toy_dst *dst,
+         struct toy_src *src)
+{
+   struct toy_dst tmp = tc_alloc_tmp(tc);
+
+   tc_RNDD(tc, tmp, tsrc_negate(src[0]));
+   tc_MOV(tc, dst[0], tsrc_negate(tsrc_from(tmp)));
+}
+
+static void
+aos_SAD(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst,
+        struct toy_src *src)
+{
+   struct toy_dst tmp = tc_alloc_tmp(tc);
+
+   assert(!"SAD untested");
+
+   tc_ADD(tc, tmp, src[0], tsrc_negate(src[1]));
+   tc_ADD(tc, dst[0], tsrc_absolute(tsrc_from(tmp)), src[2]);
+}
+
+static void
+aos_CONT(struct toy_compiler *tc,
+         const struct tgsi_full_instruction *tgsi_inst,
+         struct toy_dst *dst,
+         struct toy_src *src)
+{
+   tc_add0(tc, BRW_OPCODE_CONTINUE);
+}
+
+static void
+aos_BGNLOOP(struct toy_compiler *tc,
+            const struct tgsi_full_instruction *tgsi_inst,
+            struct toy_dst *dst,
+            struct toy_src *src)
+{
+   struct toy_inst *inst;
+
+   inst = tc_add0(tc, BRW_OPCODE_DO);
+   /* this is just a marker */
+   inst->marker = true;
+}
+
+static void
+aos_ENDLOOP(struct toy_compiler *tc,
+            const struct tgsi_full_instruction *tgsi_inst,
+            struct toy_dst *dst,
+            struct toy_src *src)
+{
+   tc_add0(tc, BRW_OPCODE_WHILE);
+}
+
+static void
+aos_NRM4(struct toy_compiler *tc,
+         const struct tgsi_full_instruction *tgsi_inst,
+         struct toy_dst *dst,
+         struct toy_src *src)
+{
+   struct toy_dst tmp = tc_alloc_tmp(tc);
+
+   assert(!"NRM4 untested");
+
+   tc_DP4(tc, tmp, src[0], src[0]);
+   tc_INV(tc, tmp, tsrc_from(tmp));
+   tc_MUL(tc, dst[0], tsrc_swizzle1(src[0], TOY_SWIZZLE_X), tsrc_from(tmp));
+}
+
+static void
+aos_unsupported(struct toy_compiler *tc,
+                const struct tgsi_full_instruction *tgsi_inst,
+                struct toy_dst *dst,
+                struct toy_src *src)
+{
+   const char *name = tgsi_get_opcode_name(tgsi_inst->Instruction.Opcode);
+
+   ilo_warn("unsupported TGSI opcode: TGSI_OPCODE_%s\n", name);
+
+   tc_fail(tc, "unsupported TGSI instruction");
+}
+
+static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = {
+   [TGSI_OPCODE_ARL]          = aos_simple,
+   [TGSI_OPCODE_MOV]          = aos_simple,
+   [TGSI_OPCODE_LIT]          = aos_LIT,
+   [TGSI_OPCODE_RCP]          = aos_simple,
+   [TGSI_OPCODE_RSQ]          = aos_simple,
+   [TGSI_OPCODE_EXP]          = aos_EXP,
+   [TGSI_OPCODE_LOG]          = aos_LOG,
+   [TGSI_OPCODE_MUL]          = aos_simple,
+   [TGSI_OPCODE_ADD]          = aos_simple,
+   [TGSI_OPCODE_DP3]          = aos_simple,
+   [TGSI_OPCODE_DP4]          = aos_simple,
+   [TGSI_OPCODE_DST]          = aos_DST,
+   [TGSI_OPCODE_MIN]          = aos_simple,
+   [TGSI_OPCODE_MAX]          = aos_simple,
+   [TGSI_OPCODE_SLT]          = aos_set_on_cond,
+   [TGSI_OPCODE_SGE]          = aos_set_on_cond,
+   [TGSI_OPCODE_MAD]          = aos_simple,
+   [TGSI_OPCODE_SUB]          = aos_simple,
+   [TGSI_OPCODE_LRP]          = aos_LRP,
+   [TGSI_OPCODE_CND]          = aos_CND,
+   [TGSI_OPCODE_SQRT]         = aos_simple,
+   [TGSI_OPCODE_DP2A]         = aos_DP2A,
+   [22]                       = aos_unsupported,
+   [23]                       = aos_unsupported,
+   [TGSI_OPCODE_FRC]          = aos_simple,
+   [TGSI_OPCODE_CLAMP]        = aos_CLAMP,
+   [TGSI_OPCODE_FLR]          = aos_simple,
+   [TGSI_OPCODE_ROUND]        = aos_simple,
+   [TGSI_OPCODE_EX2]          = aos_simple,
+   [TGSI_OPCODE_LG2]          = aos_simple,
+   [TGSI_OPCODE_POW]          = aos_simple,
+   [TGSI_OPCODE_XPD]          = aos_XPD,
+   [32]                       = aos_unsupported,
+   [TGSI_OPCODE_ABS]          = aos_simple,
+   [TGSI_OPCODE_RCC]          = aos_unsupported,
+   [TGSI_OPCODE_DPH]          = aos_simple,
+   [TGSI_OPCODE_COS]          = aos_simple,
+   [TGSI_OPCODE_DDX]          = aos_unsupported,
+   [TGSI_OPCODE_DDY]          = aos_unsupported,
+   [TGSI_OPCODE_KILP]         = aos_simple,
+   [TGSI_OPCODE_PK2H]         = aos_PK2H,
+   [TGSI_OPCODE_PK2US]        = aos_unsupported,
+   [TGSI_OPCODE_PK4B]         = aos_unsupported,
+   [TGSI_OPCODE_PK4UB]        = aos_unsupported,
+   [TGSI_OPCODE_RFL]          = aos_unsupported,
+   [TGSI_OPCODE_SEQ]          = aos_set_on_cond,
+   [TGSI_OPCODE_SFL]          = aos_SFL,
+   [TGSI_OPCODE_SGT]          = aos_set_on_cond,
+   [TGSI_OPCODE_SIN]          = aos_simple,
+   [TGSI_OPCODE_SLE]          = aos_set_on_cond,
+   [TGSI_OPCODE_SNE]          = aos_set_on_cond,
+   [TGSI_OPCODE_STR]          = aos_STR,
+   [TGSI_OPCODE_TEX]          = aos_tex,
+   [TGSI_OPCODE_TXD]          = aos_tex,
+   [TGSI_OPCODE_TXP]          = aos_tex,
+   [TGSI_OPCODE_UP2H]         = aos_UP2H,
+   [TGSI_OPCODE_UP2US]        = aos_unsupported,
+   [TGSI_OPCODE_UP4B]         = aos_unsupported,
+   [TGSI_OPCODE_UP4UB]        = aos_unsupported,
+   [TGSI_OPCODE_X2D]          = aos_unsupported,
+   [TGSI_OPCODE_ARA]          = aos_unsupported,
+   [TGSI_OPCODE_ARR]          = aos_simple,
+   [TGSI_OPCODE_BRA]          = aos_unsupported,
+   [TGSI_OPCODE_CAL]          = aos_unsupported,
+   [TGSI_OPCODE_RET]          = aos_unsupported,
+   [TGSI_OPCODE_SSG]          = aos_set_sign,
+   [TGSI_OPCODE_CMP]          = aos_compare,
+   [TGSI_OPCODE_SCS]          = aos_SCS,
+   [TGSI_OPCODE_TXB]          = aos_tex,
+   [TGSI_OPCODE_NRM]          = aos_NRM,
+   [TGSI_OPCODE_DIV]          = aos_DIV,
+   [TGSI_OPCODE_DP2]          = aos_simple,
+   [TGSI_OPCODE_TXL]          = aos_tex,
+   [TGSI_OPCODE_BRK]          = aos_BRK,
+   [TGSI_OPCODE_IF]           = aos_simple,
+   [TGSI_OPCODE_UIF]          = aos_simple,
+   [76]                       = aos_unsupported,
+   [TGSI_OPCODE_ELSE]         = aos_simple,
+   [TGSI_OPCODE_ENDIF]        = aos_simple,
+   [79]                       = aos_unsupported,
+   [80]                       = aos_unsupported,
+   [TGSI_OPCODE_PUSHA]        = aos_unsupported,
+   [TGSI_OPCODE_POPA]         = aos_unsupported,
+   [TGSI_OPCODE_CEIL]         = aos_CEIL,
+   [TGSI_OPCODE_I2F]          = aos_simple,
+   [TGSI_OPCODE_NOT]          = aos_simple,
+   [TGSI_OPCODE_TRUNC]        = aos_simple,
+   [TGSI_OPCODE_SHL]          = aos_simple,
+   [88]                       = aos_unsupported,
+   [TGSI_OPCODE_AND]          = aos_simple,
+   [TGSI_OPCODE_OR]           = aos_simple,
+   [TGSI_OPCODE_MOD]          = aos_simple,
+   [TGSI_OPCODE_XOR]          = aos_simple,
+   [TGSI_OPCODE_SAD]          = aos_SAD,
+   [TGSI_OPCODE_TXF]          = aos_tex,
+   [TGSI_OPCODE_TXQ]          = aos_tex,
+   [TGSI_OPCODE_CONT]         = aos_CONT,
+   [TGSI_OPCODE_EMIT]         = aos_simple,
+   [TGSI_OPCODE_ENDPRIM]      = aos_simple,
+   [TGSI_OPCODE_BGNLOOP]      = aos_BGNLOOP,
+   [TGSI_OPCODE_BGNSUB]       = aos_unsupported,
+   [TGSI_OPCODE_ENDLOOP]      = aos_ENDLOOP,
+   [TGSI_OPCODE_ENDSUB]       = aos_unsupported,
+   [TGSI_OPCODE_TXQ_LZ]       = aos_tex,
+   [104]                      = aos_unsupported,
+   [105]                      = aos_unsupported,
+   [106]                      = aos_unsupported,
+   [TGSI_OPCODE_NOP]          = aos_simple,
+   [108]                      = aos_unsupported,
+   [109]                      = aos_unsupported,
+   [110]                      = aos_unsupported,
+   [111]                      = aos_unsupported,
+   [TGSI_OPCODE_NRM4]         = aos_NRM4,
+   [TGSI_OPCODE_CALLNZ]       = aos_unsupported,
+   [TGSI_OPCODE_BREAKC]       = aos_unsupported,
+   [TGSI_OPCODE_KIL]          = aos_simple,
+   [TGSI_OPCODE_END]          = aos_simple,
+   [118]                      = aos_unsupported,
+   [TGSI_OPCODE_F2I]          = aos_simple,
+   [TGSI_OPCODE_IDIV]         = aos_simple,
+   [TGSI_OPCODE_IMAX]         = aos_simple,
+   [TGSI_OPCODE_IMIN]         = aos_simple,
+   [TGSI_OPCODE_INEG]         = aos_simple,
+   [TGSI_OPCODE_ISGE]         = aos_set_on_cond,
+   [TGSI_OPCODE_ISHR]         = aos_simple,
+   [TGSI_OPCODE_ISLT]         = aos_set_on_cond,
+   [TGSI_OPCODE_F2U]          = aos_simple,
+   [TGSI_OPCODE_U2F]          = aos_simple,
+   [TGSI_OPCODE_UADD]         = aos_simple,
+   [TGSI_OPCODE_UDIV]         = aos_simple,
+   [TGSI_OPCODE_UMAD]         = aos_simple,
+   [TGSI_OPCODE_UMAX]         = aos_simple,
+   [TGSI_OPCODE_UMIN]         = aos_simple,
+   [TGSI_OPCODE_UMOD]         = aos_simple,
+   [TGSI_OPCODE_UMUL]         = aos_simple,
+   [TGSI_OPCODE_USEQ]         = aos_set_on_cond,
+   [TGSI_OPCODE_USGE]         = aos_set_on_cond,
+   [TGSI_OPCODE_USHR]         = aos_simple,
+   [TGSI_OPCODE_USLT]         = aos_set_on_cond,
+   [TGSI_OPCODE_USNE]         = aos_set_on_cond,
+   [TGSI_OPCODE_SWITCH]       = aos_unsupported,
+   [TGSI_OPCODE_CASE]         = aos_unsupported,
+   [TGSI_OPCODE_DEFAULT]      = aos_unsupported,
+   [TGSI_OPCODE_ENDSWITCH]    = aos_unsupported,
+   [TGSI_OPCODE_SAMPLE]       = aos_sample,
+   [TGSI_OPCODE_SAMPLE_I]     = aos_sample,
+   [TGSI_OPCODE_SAMPLE_I_MS]  = aos_sample,
+   [TGSI_OPCODE_SAMPLE_B]     = aos_sample,
+   [TGSI_OPCODE_SAMPLE_C]     = aos_sample,
+   [TGSI_OPCODE_SAMPLE_C_LZ]  = aos_sample,
+   [TGSI_OPCODE_SAMPLE_D]     = aos_sample,
+   [TGSI_OPCODE_SAMPLE_L]     = aos_sample,
+   [TGSI_OPCODE_GATHER4]      = aos_sample,
+   [TGSI_OPCODE_SVIEWINFO]    = aos_sample,
+   [TGSI_OPCODE_SAMPLE_POS]   = aos_sample,
+   [TGSI_OPCODE_SAMPLE_INFO]  = aos_sample,
+   [TGSI_OPCODE_UARL]         = aos_simple,
+   [TGSI_OPCODE_UCMP]         = aos_compare,
+   [TGSI_OPCODE_IABS]         = aos_simple,
+   [TGSI_OPCODE_ISSG]         = aos_set_sign,
+   [TGSI_OPCODE_LOAD]         = aos_unsupported,
+   [TGSI_OPCODE_STORE]        = aos_unsupported,
+   [TGSI_OPCODE_MFENCE]       = aos_unsupported,
+   [TGSI_OPCODE_LFENCE]       = aos_unsupported,
+   [TGSI_OPCODE_SFENCE]       = aos_unsupported,
+   [TGSI_OPCODE_BARRIER]      = aos_unsupported,
+   [TGSI_OPCODE_ATOMUADD]     = aos_unsupported,
+   [TGSI_OPCODE_ATOMXCHG]     = aos_unsupported,
+   [TGSI_OPCODE_ATOMCAS]      = aos_unsupported,
+   [TGSI_OPCODE_ATOMAND]      = aos_unsupported,
+   [TGSI_OPCODE_ATOMOR]       = aos_unsupported,
+   [TGSI_OPCODE_ATOMXOR]      = aos_unsupported,
+   [TGSI_OPCODE_ATOMUMIN]     = aos_unsupported,
+   [TGSI_OPCODE_ATOMUMAX]     = aos_unsupported,
+   [TGSI_OPCODE_ATOMIMIN]     = aos_unsupported,
+   [TGSI_OPCODE_ATOMIMAX]     = aos_unsupported,
+   [TGSI_OPCODE_TEX2]         = aos_tex,
+   [TGSI_OPCODE_TXB2]         = aos_tex,
+   [TGSI_OPCODE_TXL2]         = aos_tex,
+};
+
+static void
+soa_passthrough(struct toy_compiler *tc,
+                const struct tgsi_full_instruction *tgsi_inst,
+                struct toy_dst *dst_,
+                struct toy_src *src_)
+{
+   const toy_tgsi_translate translate =
+      aos_translate_table[tgsi_inst->Instruction.Opcode];
+
+   translate(tc, tgsi_inst, dst_, src_);
+}
+
+static void
+soa_per_channel(struct toy_compiler *tc,
+                const struct tgsi_full_instruction *tgsi_inst,
+                struct toy_dst *dst_,
+                struct toy_src *src_)
+{
+   struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS][4];
+   struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
+   int i, ch;
+
+   for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
+      tdst_transpose(dst_[i], dst[i]);
+   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
+      tsrc_transpose(src_[i], src[i]);
+
+   /* emit the same instruction four times for the four channels */
+   for (ch = 0; ch < 4; ch++) {
+      struct toy_dst aos_dst[TGSI_FULL_MAX_DST_REGISTERS];
+      struct toy_src aos_src[TGSI_FULL_MAX_SRC_REGISTERS];
+
+      for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
+         aos_dst[i] = dst[i][ch];
+      for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
+         aos_src[i] = src[i][ch];
+
+      aos_translate_table[tgsi_inst->Instruction.Opcode](tc,
+            tgsi_inst, aos_dst, aos_src);
+   }
+}
+
+static void
+soa_scalar_replicate(struct toy_compiler *tc,
+                     const struct tgsi_full_instruction *tgsi_inst,
+                     struct toy_dst *dst_,
+                     struct toy_src *src_)
+{
+   struct toy_dst dst0[4], tmp;
+   struct toy_src srcx[TGSI_FULL_MAX_SRC_REGISTERS];
+   int opcode, i;
+
+   assert(tgsi_inst->Instruction.NumDstRegs == 1);
+
+   tdst_transpose(dst_[0], dst0);
+   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
+      struct toy_src tmp[4];
+
+      tsrc_transpose(src_[i], tmp);
+      /* only the X channels */
+      srcx[i] = tmp[0];
+   }
+
+   tmp = tc_alloc_tmp(tc);
+
+   opcode = aos_simple_opcode_map[tgsi_inst->Instruction.Opcode].opcode;
+   assert(opcode);
+
+   switch (tgsi_inst->Instruction.Opcode) {
+   case TGSI_OPCODE_RCP:
+   case TGSI_OPCODE_RSQ:
+   case TGSI_OPCODE_SQRT:
+   case TGSI_OPCODE_EX2:
+   case TGSI_OPCODE_LG2:
+   case TGSI_OPCODE_COS:
+   case TGSI_OPCODE_SIN:
+      tc_add1(tc, opcode, tmp, srcx[0]);
+      break;
+   case TGSI_OPCODE_POW:
+      tc_add2(tc, opcode, tmp, srcx[0], srcx[1]);
+      break;
+   default:
+      assert(!"invalid soa_scalar_replicate() call");
+      return;
+   }
+
+   /* replicate the result */
+   for (i = 0; i < 4; i++)
+      tc_MOV(tc, dst0[i], tsrc_from(tmp));
+}
+
+static void
+soa_dot_product(struct toy_compiler *tc,
+                const struct tgsi_full_instruction *tgsi_inst,
+                struct toy_dst *dst_,
+                struct toy_src *src_)
+{
+   struct toy_dst dst0[4], tmp;
+   struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS][4];
+   int i;
+
+   tdst_transpose(dst_[0], dst0);
+   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
+      tsrc_transpose(src_[i], src[i]);
+
+   tmp = tc_alloc_tmp(tc);
+
+   switch (tgsi_inst->Instruction.Opcode) {
+   case TGSI_OPCODE_DP2:
+      tc_MUL(tc, tmp, src[0][1], src[1][1]);
+      tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
+      break;
+   case TGSI_OPCODE_DP2A:
+      tc_MAC(tc, tmp, src[0][1], src[1][1], src[2][0]);
+      tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
+      break;
+   case TGSI_OPCODE_DP3:
+      tc_MUL(tc, tmp, src[0][2], src[1][2]);
+      tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
+      tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
+      break;
+   case TGSI_OPCODE_DPH:
+      tc_MAC(tc, tmp, src[0][2], src[1][2], src[1][3]);
+      tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
+      tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
+      break;
+   case TGSI_OPCODE_DP4:
+      tc_MUL(tc, tmp, src[0][3], src[1][3]);
+      tc_MAC(tc, tmp, src[0][2], src[1][2], tsrc_from(tmp));
+      tc_MAC(tc, tmp, src[0][1], src[1][1], tsrc_from(tmp));
+      tc_MAC(tc, tmp, src[0][0], src[1][0], tsrc_from(tmp));
+      break;
+   default:
+      assert(!"invalid soa_dot_product() call");
+      return;
+   }
+
+   for (i = 0; i < 4; i++)
+      tc_MOV(tc, dst0[i], tsrc_from(tmp));
+}
+
+static void
+soa_partial_derivative(struct toy_compiler *tc,
+                       const struct tgsi_full_instruction *tgsi_inst,
+                       struct toy_dst *dst_,
+                       struct toy_src *src_)
+{
+   if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_DDX)
+      tc_add1(tc, TOY_OPCODE_DDX, dst_[0], src_[0]);
+   else
+      tc_add1(tc, TOY_OPCODE_DDY, dst_[0], src_[0]);
+}
+
+static void
+soa_if(struct toy_compiler *tc,
+       const struct tgsi_full_instruction *tgsi_inst,
+       struct toy_dst *dst_,
+       struct toy_src *src_)
+{
+   struct toy_src src0[4];
+
+   assert(tsrc_is_swizzle1(src_[0]));
+   tsrc_transpose(src_[0], src0);
+
+   if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_IF)
+      tc_IF(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), BRW_CONDITIONAL_NEQ);
+   else
+      tc_IF(tc, tdst_null(), src0[0], tsrc_imm_d(0), BRW_CONDITIONAL_NEQ);
+}
+
+static void
+soa_LIT(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst_,
+        struct toy_src *src_)
+{
+   struct toy_inst *inst;
+   struct toy_dst dst0[4];
+   struct toy_src src0[4];
+
+   tdst_transpose(dst_[0], dst0);
+   tsrc_transpose(src_[0], src0);
+
+   tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
+   tc_MOV(tc, dst0[1], src0[0]);
+   tc_POW(tc, dst0[2], src0[1], src0[3]);
+   tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
+
+   /*
+    * POW is calculated first because math with pred_ctrl is broken here.
+    * But, why?
+    */
+   tc_CMP(tc, tdst_null(), src0[0], tsrc_imm_f(0.0f), BRW_CONDITIONAL_L);
+   inst = tc_MOV(tc, dst0[1], tsrc_imm_f(0.0f));
+   inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+   inst = tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
+   inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+}
+
+static void
+soa_EXP(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst_,
+        struct toy_src *src_)
+{
+   struct toy_dst dst0[4];
+   struct toy_src src0[4];
+
+   assert(!"SoA EXP untested");
+
+   tdst_transpose(dst_[0], dst0);
+   tsrc_transpose(src_[0], src0);
+
+   if (!tdst_is_null(dst0[0])) {
+      struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
+
+      tc_RNDD(tc, tmp, src0[0]);
+
+      /* construct the floating point number manually */
+      tc_ADD(tc, tmp, tsrc_from(tmp), tsrc_imm_d(127));
+      tc_SHL(tc, tdst_d(dst0[0]), tsrc_from(tmp), tsrc_imm_d(23));
+   }
+
+   tc_FRC(tc, dst0[1], src0[0]);
+   tc_EXP(tc, dst0[2], src0[0]);
+   tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
+}
+
+static void
+soa_LOG(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst_,
+        struct toy_src *src_)
+{
+   struct toy_dst dst0[4];
+   struct toy_src src0[4];
+
+   assert(!"SoA LOG untested");
+
+   tdst_transpose(dst_[0], dst0);
+   tsrc_transpose(src_[0], src0);
+
+   if (dst_[0].writemask & TOY_WRITEMASK_XY) {
+      struct toy_dst tmp = tdst_d(tc_alloc_tmp(tc));
+
+      /* exponent */
+      tc_SHR(tc, tmp, tsrc_absolute(tsrc_d(src0[0])), tsrc_imm_d(23));
+      tc_ADD(tc, dst0[0], tsrc_from(tmp), tsrc_imm_d(-127));
+
+      /* mantissa  */
+      tc_AND(tc, tmp, tsrc_d(src0[0]), tsrc_imm_d((1 << 23) - 1));
+      tc_OR(tc, dst0[1], tsrc_from(tmp), tsrc_imm_d(127 << 23));
+   }
+
+   tc_LOG(tc, dst0[2], src0[0]);
+   tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
+}
+
+static void
+soa_DST(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst_,
+        struct toy_src *src_)
+{
+   struct toy_dst dst0[4];
+   struct toy_src src[2][4];
+
+   tdst_transpose(dst_[0], dst0);
+   tsrc_transpose(src_[0], src[0]);
+   tsrc_transpose(src_[1], src[1]);
+
+   tc_MOV(tc, dst0[0], tsrc_imm_f(1.0f));
+   tc_MUL(tc, dst0[1], src[0][1], src[1][1]);
+   tc_MOV(tc, dst0[2], src[0][2]);
+   tc_MOV(tc, dst0[3], src[1][3]);
+}
+
+static void
+soa_XPD(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst_,
+        struct toy_src *src_)
+{
+   struct toy_dst dst0[4];
+   struct toy_src src[2][4];
+
+   tdst_transpose(dst_[0], dst0);
+   tsrc_transpose(src_[0], src[0]);
+   tsrc_transpose(src_[1], src[1]);
+
+   /* dst.x = src0.y * src1.z - src1.y * src0.z */
+   tc_MUL(tc, dst0[0], src[0][2], src[1][1]);
+   tc_MAC(tc, dst0[0], src[0][1], src[1][2], tsrc_negate(tsrc_from(dst0[0])));
+
+   /* dst.y = src0.z * src1.x - src1.z * src0.x */
+   tc_MUL(tc, dst0[1], src[0][0], src[1][2]);
+   tc_MAC(tc, dst0[1], src[0][2], src[1][0], tsrc_negate(tsrc_from(dst0[1])));
+
+   /* dst.z = src0.x * src1.y - src1.x * src0.y */
+   tc_MUL(tc, dst0[2], src[0][1], src[1][0]);
+   tc_MAC(tc, dst0[2], src[0][0], src[1][1], tsrc_negate(tsrc_from(dst0[2])));
+
+   tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
+}
+
+static void
+soa_PK2H(struct toy_compiler *tc,
+         const struct tgsi_full_instruction *tgsi_inst,
+         struct toy_dst *dst_,
+         struct toy_src *src_)
+{
+   struct toy_dst tmp = tdst_ud(tc_alloc_tmp(tc));
+   struct toy_dst dst0[4];
+   struct toy_src src0[4];
+   int i;
+
+   assert(!"SoA PK2H untested");
+
+   tdst_transpose(dst_[0], dst0);
+   tsrc_transpose(src_[0], src0);
+
+   tc_SHL(tc, tmp, src0[1], tsrc_imm_ud(16));
+   tc_OR(tc, tmp, src0[0], tsrc_from(tmp));
+
+   for (i = 0; i < 4; i++)
+      tc_MOV(tc, dst0[i], tsrc_from(tmp));
+}
+
+static void
+soa_UP2H(struct toy_compiler *tc,
+         const struct tgsi_full_instruction *tgsi_inst,
+         struct toy_dst *dst_,
+         struct toy_src *src_)
+{
+   struct toy_dst dst0[4];
+   struct toy_src src0[4];
+
+   assert(!"SoA UP2H untested");
+
+   tdst_transpose(dst_[0], dst0);
+   tsrc_transpose(src_[0], src0);
+
+   tc_AND(tc, tdst_ud(dst0[0]), tsrc_ud(src0[0]), tsrc_imm_ud(0xffff));
+   tc_SHR(tc, tdst_ud(dst0[1]), tsrc_ud(src0[1]), tsrc_imm_ud(16));
+   tc_AND(tc, tdst_ud(dst0[2]), tsrc_ud(src0[2]), tsrc_imm_ud(0xffff));
+   tc_SHR(tc, tdst_ud(dst0[3]), tsrc_ud(src0[3]), tsrc_imm_ud(16));
+
+}
+
+static void
+soa_SCS(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst_,
+        struct toy_src *src_)
+{
+   struct toy_dst dst0[4];
+   struct toy_src src0[4];
+
+   tdst_transpose(dst_[0], dst0);
+   tsrc_transpose(src_[0], src0);
+
+   tc_add1(tc, TOY_OPCODE_COS, dst0[0], src0[0]);
+   tc_add1(tc, TOY_OPCODE_SIN, dst0[1], src0[0]);
+   tc_MOV(tc, dst0[2], tsrc_imm_f(0.0f));
+   tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
+}
+
+static void
+soa_NRM(struct toy_compiler *tc,
+        const struct tgsi_full_instruction *tgsi_inst,
+        struct toy_dst *dst_,
+        struct toy_src *src_)
+{
+   const struct toy_dst tmp = tc_alloc_tmp(tc);
+   struct toy_dst dst0[4];
+   struct toy_src src0[4];
+
+   assert(!"SoA NRM untested");
+
+   tdst_transpose(dst_[0], dst0);
+   tsrc_transpose(src_[0], src0);
+
+   tc_MUL(tc, tmp, src0[2], src0[2]);
+   tc_MAC(tc, tmp, src0[1], src0[1], tsrc_from(tmp));
+   tc_MAC(tc, tmp, src0[0], src0[0], tsrc_from(tmp));
+   tc_INV(tc, tmp, tsrc_from(tmp));
+
+   tc_MUL(tc, dst0[0], src0[0], tsrc_from(tmp));
+   tc_MUL(tc, dst0[1], src0[1], tsrc_from(tmp));
+   tc_MUL(tc, dst0[2], src0[2], tsrc_from(tmp));
+   tc_MOV(tc, dst0[3], tsrc_imm_f(1.0f));
+}
+
+static void
+soa_NRM4(struct toy_compiler *tc,
+         const struct tgsi_full_instruction *tgsi_inst,
+         struct toy_dst *dst_,
+         struct toy_src *src_)
+{
+   const struct toy_dst tmp = tc_alloc_tmp(tc);
+   struct toy_dst dst0[4];
+   struct toy_src src0[4];
+   int i;
+
+   assert(!"SoA NRM4 untested");
+
+   tdst_transpose(dst_[0], dst0);
+   tsrc_transpose(src_[0], src0);
+
+   tc_MUL(tc, tmp, src0[3], src0[3]);
+   tc_MAC(tc, tmp, src0[2], src0[2], tsrc_from(tmp));
+   tc_MAC(tc, tmp, src0[1], src0[1], tsrc_from(tmp));
+   tc_MAC(tc, tmp, src0[0], src0[0], tsrc_from(tmp));
+   tc_INV(tc, tmp, tsrc_from(tmp));
+
+   for (i = 0; i < 4; i++)
+      tc_MUL(tc, dst0[i], src0[0], tsrc_from(tmp));
+}
+
+static void
+soa_unsupported(struct toy_compiler *tc,
+                const struct tgsi_full_instruction *tgsi_inst,
+                struct toy_dst *dst_,
+                struct toy_src *src_)
+{
+   const struct tgsi_opcode_info *info =
+      tgsi_get_opcode_info(tgsi_inst->Instruction.Opcode);
+
+   ilo_warn("unsupported TGSI opcode in SoA form: TGSI_OPCODE_%s\n",
+         info->mnemonic);
+
+   tc_fail(tc, "unsupported TGSI instruction in SoA form");
+}
+
+static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = {
+   [TGSI_OPCODE_ARL]          = soa_per_channel,
+   [TGSI_OPCODE_MOV]          = soa_per_channel,
+   [TGSI_OPCODE_LIT]          = soa_LIT,
+   [TGSI_OPCODE_RCP]          = soa_scalar_replicate,
+   [TGSI_OPCODE_RSQ]          = soa_scalar_replicate,
+   [TGSI_OPCODE_EXP]          = soa_EXP,
+   [TGSI_OPCODE_LOG]          = soa_LOG,
+   [TGSI_OPCODE_MUL]          = soa_per_channel,
+   [TGSI_OPCODE_ADD]          = soa_per_channel,
+   [TGSI_OPCODE_DP3]          = soa_dot_product,
+   [TGSI_OPCODE_DP4]          = soa_dot_product,
+   [TGSI_OPCODE_DST]          = soa_DST,
+   [TGSI_OPCODE_MIN]          = soa_per_channel,
+   [TGSI_OPCODE_MAX]          = soa_per_channel,
+   [TGSI_OPCODE_SLT]          = soa_per_channel,
+   [TGSI_OPCODE_SGE]          = soa_per_channel,
+   [TGSI_OPCODE_MAD]          = soa_per_channel,
+   [TGSI_OPCODE_SUB]          = soa_per_channel,
+   [TGSI_OPCODE_LRP]          = soa_per_channel,
+   [TGSI_OPCODE_CND]          = soa_per_channel,
+   [TGSI_OPCODE_SQRT]         = soa_scalar_replicate,
+   [TGSI_OPCODE_DP2A]         = soa_dot_product,
+   [22]                       = soa_unsupported,
+   [23]                       = soa_unsupported,
+   [TGSI_OPCODE_FRC]          = soa_per_channel,
+   [TGSI_OPCODE_CLAMP]        = soa_per_channel,
+   [TGSI_OPCODE_FLR]          = soa_per_channel,
+   [TGSI_OPCODE_ROUND]        = soa_per_channel,
+   [TGSI_OPCODE_EX2]          = soa_scalar_replicate,
+   [TGSI_OPCODE_LG2]          = soa_scalar_replicate,
+   [TGSI_OPCODE_POW]          = soa_scalar_replicate,
+   [TGSI_OPCODE_XPD]          = soa_XPD,
+   [32]                       = soa_unsupported,
+   [TGSI_OPCODE_ABS]          = soa_per_channel,
+   [TGSI_OPCODE_RCC]          = soa_unsupported,
+   [TGSI_OPCODE_DPH]          = soa_dot_product,
+   [TGSI_OPCODE_COS]          = soa_scalar_replicate,
+   [TGSI_OPCODE_DDX]          = soa_partial_derivative,
+   [TGSI_OPCODE_DDY]          = soa_partial_derivative,
+   [TGSI_OPCODE_KILP]         = soa_passthrough,
+   [TGSI_OPCODE_PK2H]         = soa_PK2H,
+   [TGSI_OPCODE_PK2US]        = soa_unsupported,
+   [TGSI_OPCODE_PK4B]         = soa_unsupported,
+   [TGSI_OPCODE_PK4UB]        = soa_unsupported,
+   [TGSI_OPCODE_RFL]          = soa_unsupported,
+   [TGSI_OPCODE_SEQ]          = soa_per_channel,
+   [TGSI_OPCODE_SFL]          = soa_per_channel,
+   [TGSI_OPCODE_SGT]          = soa_per_channel,
+   [TGSI_OPCODE_SIN]          = soa_scalar_replicate,
+   [TGSI_OPCODE_SLE]          = soa_per_channel,
+   [TGSI_OPCODE_SNE]          = soa_per_channel,
+   [TGSI_OPCODE_STR]          = soa_per_channel,
+   [TGSI_OPCODE_TEX]          = soa_passthrough,
+   [TGSI_OPCODE_TXD]          = soa_passthrough,
+   [TGSI_OPCODE_TXP]          = soa_passthrough,
+   [TGSI_OPCODE_UP2H]         = soa_UP2H,
+   [TGSI_OPCODE_UP2US]        = soa_unsupported,
+   [TGSI_OPCODE_UP4B]         = soa_unsupported,
+   [TGSI_OPCODE_UP4UB]        = soa_unsupported,
+   [TGSI_OPCODE_X2D]          = soa_unsupported,
+   [TGSI_OPCODE_ARA]          = soa_unsupported,
+   [TGSI_OPCODE_ARR]          = soa_per_channel,
+   [TGSI_OPCODE_BRA]          = soa_unsupported,
+   [TGSI_OPCODE_CAL]          = soa_unsupported,
+   [TGSI_OPCODE_RET]          = soa_unsupported,
+   [TGSI_OPCODE_SSG]          = soa_per_channel,
+   [TGSI_OPCODE_CMP]          = soa_per_channel,
+   [TGSI_OPCODE_SCS]          = soa_SCS,
+   [TGSI_OPCODE_TXB]          = soa_passthrough,
+   [TGSI_OPCODE_NRM]          = soa_NRM,
+   [TGSI_OPCODE_DIV]          = soa_per_channel,
+   [TGSI_OPCODE_DP2]          = soa_dot_product,
+   [TGSI_OPCODE_TXL]          = soa_passthrough,
+   [TGSI_OPCODE_BRK]          = soa_passthrough,
+   [TGSI_OPCODE_IF]           = soa_if,
+   [TGSI_OPCODE_UIF]          = soa_if,
+   [76]                       = soa_unsupported,
+   [TGSI_OPCODE_ELSE]         = soa_passthrough,
+   [TGSI_OPCODE_ENDIF]        = soa_passthrough,
+   [79]                       = soa_unsupported,
+   [80]                       = soa_unsupported,
+   [TGSI_OPCODE_PUSHA]        = soa_unsupported,
+   [TGSI_OPCODE_POPA]         = soa_unsupported,
+   [TGSI_OPCODE_CEIL]         = soa_per_channel,
+   [TGSI_OPCODE_I2F]          = soa_per_channel,
+   [TGSI_OPCODE_NOT]          = soa_per_channel,
+   [TGSI_OPCODE_TRUNC]        = soa_per_channel,
+   [TGSI_OPCODE_SHL]          = soa_per_channel,
+   [88]                       = soa_unsupported,
+   [TGSI_OPCODE_AND]          = soa_per_channel,
+   [TGSI_OPCODE_OR]           = soa_per_channel,
+   [TGSI_OPCODE_MOD]          = soa_per_channel,
+   [TGSI_OPCODE_XOR]          = soa_per_channel,
+   [TGSI_OPCODE_SAD]          = soa_per_channel,
+   [TGSI_OPCODE_TXF]          = soa_passthrough,
+   [TGSI_OPCODE_TXQ]          = soa_passthrough,
+   [TGSI_OPCODE_CONT]         = soa_passthrough,
+   [TGSI_OPCODE_EMIT]         = soa_unsupported,
+   [TGSI_OPCODE_ENDPRIM]      = soa_unsupported,
+   [TGSI_OPCODE_BGNLOOP]      = soa_passthrough,
+   [TGSI_OPCODE_BGNSUB]       = soa_unsupported,
+   [TGSI_OPCODE_ENDLOOP]      = soa_passthrough,
+   [TGSI_OPCODE_ENDSUB]       = soa_unsupported,
+   [TGSI_OPCODE_TXQ_LZ]       = soa_passthrough,
+   [104]                      = soa_unsupported,
+   [105]                      = soa_unsupported,
+   [106]                      = soa_unsupported,
+   [TGSI_OPCODE_NOP]          = soa_passthrough,
+   [108]                      = soa_unsupported,
+   [109]                      = soa_unsupported,
+   [110]                      = soa_unsupported,
+   [111]                      = soa_unsupported,
+   [TGSI_OPCODE_NRM4]         = soa_NRM4,
+   [TGSI_OPCODE_CALLNZ]       = soa_unsupported,
+   [TGSI_OPCODE_BREAKC]       = soa_unsupported,
+   [TGSI_OPCODE_KIL]          = soa_passthrough,
+   [TGSI_OPCODE_END]          = soa_passthrough,
+   [118]                      = soa_unsupported,
+   [TGSI_OPCODE_F2I]          = soa_per_channel,
+   [TGSI_OPCODE_IDIV]         = soa_per_channel,
+   [TGSI_OPCODE_IMAX]         = soa_per_channel,
+   [TGSI_OPCODE_IMIN]         = soa_per_channel,
+   [TGSI_OPCODE_INEG]         = soa_per_channel,
+   [TGSI_OPCODE_ISGE]         = soa_per_channel,
+   [TGSI_OPCODE_ISHR]         = soa_per_channel,
+   [TGSI_OPCODE_ISLT]         = soa_per_channel,
+   [TGSI_OPCODE_F2U]          = soa_per_channel,
+   [TGSI_OPCODE_U2F]          = soa_per_channel,
+   [TGSI_OPCODE_UADD]         = soa_per_channel,
+   [TGSI_OPCODE_UDIV]         = soa_per_channel,
+   [TGSI_OPCODE_UMAD]         = soa_per_channel,
+   [TGSI_OPCODE_UMAX]         = soa_per_channel,
+   [TGSI_OPCODE_UMIN]         = soa_per_channel,
+   [TGSI_OPCODE_UMOD]         = soa_per_channel,
+   [TGSI_OPCODE_UMUL]         = soa_per_channel,
+   [TGSI_OPCODE_USEQ]         = soa_per_channel,
+   [TGSI_OPCODE_USGE]         = soa_per_channel,
+   [TGSI_OPCODE_USHR]         = soa_per_channel,
+   [TGSI_OPCODE_USLT]         = soa_per_channel,
+   [TGSI_OPCODE_USNE]         = soa_per_channel,
+   [TGSI_OPCODE_SWITCH]       = soa_unsupported,
+   [TGSI_OPCODE_CASE]         = soa_unsupported,
+   [TGSI_OPCODE_DEFAULT]      = soa_unsupported,
+   [TGSI_OPCODE_ENDSWITCH]    = soa_unsupported,
+   [TGSI_OPCODE_SAMPLE]       = soa_passthrough,
+   [TGSI_OPCODE_SAMPLE_I]     = soa_passthrough,
+   [TGSI_OPCODE_SAMPLE_I_MS]  = soa_passthrough,
+   [TGSI_OPCODE_SAMPLE_B]     = soa_passthrough,
+   [TGSI_OPCODE_SAMPLE_C]     = soa_passthrough,
+   [TGSI_OPCODE_SAMPLE_C_LZ]  = soa_passthrough,
+   [TGSI_OPCODE_SAMPLE_D]     = soa_passthrough,
+   [TGSI_OPCODE_SAMPLE_L]     = soa_passthrough,
+   [TGSI_OPCODE_GATHER4]      = soa_passthrough,
+   [TGSI_OPCODE_SVIEWINFO]    = soa_passthrough,
+   [TGSI_OPCODE_SAMPLE_POS]   = soa_passthrough,
+   [TGSI_OPCODE_SAMPLE_INFO]  = soa_passthrough,
+   [TGSI_OPCODE_UARL]         = soa_per_channel,
+   [TGSI_OPCODE_UCMP]         = soa_per_channel,
+   [TGSI_OPCODE_IABS]         = soa_per_channel,
+   [TGSI_OPCODE_ISSG]         = soa_per_channel,
+   [TGSI_OPCODE_LOAD]         = soa_unsupported,
+   [TGSI_OPCODE_STORE]        = soa_unsupported,
+   [TGSI_OPCODE_MFENCE]       = soa_unsupported,
+   [TGSI_OPCODE_LFENCE]       = soa_unsupported,
+   [TGSI_OPCODE_SFENCE]       = soa_unsupported,
+   [TGSI_OPCODE_BARRIER]      = soa_unsupported,
+   [TGSI_OPCODE_ATOMUADD]     = soa_unsupported,
+   [TGSI_OPCODE_ATOMXCHG]     = soa_unsupported,
+   [TGSI_OPCODE_ATOMCAS]      = soa_unsupported,
+   [TGSI_OPCODE_ATOMAND]      = soa_unsupported,
+   [TGSI_OPCODE_ATOMOR]       = soa_unsupported,
+   [TGSI_OPCODE_ATOMXOR]      = soa_unsupported,
+   [TGSI_OPCODE_ATOMUMIN]     = soa_unsupported,
+   [TGSI_OPCODE_ATOMUMAX]     = soa_unsupported,
+   [TGSI_OPCODE_ATOMIMIN]     = soa_unsupported,
+   [TGSI_OPCODE_ATOMIMAX]     = soa_unsupported,
+   [TGSI_OPCODE_TEX2]         = soa_unsupported,
+   [TGSI_OPCODE_TXB2]         = soa_unsupported,
+   [TGSI_OPCODE_TXL2]         = soa_unsupported,
+};
+
+static bool
+ra_dst_is_indirect(const struct tgsi_full_dst_register *d)
+{
+   return (d->Register.Indirect ||
+         (d->Register.Dimension && d->Dimension.Indirect));
+}
+
+static int
+ra_dst_index(const struct tgsi_full_dst_register *d)
+{
+   assert(!d->Register.Indirect);
+   return d->Register.Index;
+}
+
+static int
+ra_dst_dimension(const struct tgsi_full_dst_register *d)
+{
+   if (d->Register.Dimension) {
+      assert(!d->Dimension.Indirect);
+      return d->Dimension.Index;
+   }
+   else {
+      return 0;
+   }
+}
+
+static bool
+ra_is_src_indirect(const struct tgsi_full_src_register *s)
+{
+   return (s->Register.Indirect ||
+         (s->Register.Dimension && s->Dimension.Indirect));
+}
+
+static int
+ra_src_index(const struct tgsi_full_src_register *s)
+{
+   assert(!s->Register.Indirect);
+   return s->Register.Index;
+}
+
+static int
+ra_src_dimension(const struct tgsi_full_src_register *s)
+{
+   if (s->Register.Dimension) {
+      assert(!s->Dimension.Indirect);
+      return s->Dimension.Index;
+   }
+   else {
+      return 0;
+   }
+}
+
+/**
+ * Infer the type of either the sources or the destination.
+ */
+static enum toy_type
+ra_infer_opcode_type(int tgsi_opcode, bool is_dst)
+{
+   enum toy_type type;
+
+   if (is_dst) {
+      bool type_valid = false;
+
+      switch (tgsi_opcode) {
+      case TGSI_OPCODE_I2F:
+      case TGSI_OPCODE_U2F:
+      case TGSI_OPCODE_TXF:
+      case TGSI_OPCODE_TXQ:
+      case TGSI_OPCODE_TXQ_LZ:
+      case TGSI_OPCODE_SAMPLE_I:
+      case TGSI_OPCODE_SAMPLE_I_MS:
+      case TGSI_OPCODE_SAMPLE_POS:
+         type = TOY_TYPE_F;
+         type_valid = true;
+         break;
+      case TGSI_OPCODE_ARL:
+      case TGSI_OPCODE_ARR:
+      case TGSI_OPCODE_F2I:
+         type = TOY_TYPE_D;
+         type_valid = true;
+         break;
+      case TGSI_OPCODE_F2U:
+         type = TOY_TYPE_UD;
+         type_valid = true;
+         break;
+      default:
+         break;
+      }
+
+      if (type_valid)
+         return type;
+   }
+
+   switch (tgsi_opcode) {
+   case TGSI_OPCODE_UIF:
+   case TGSI_OPCODE_I2F:
+   case TGSI_OPCODE_NOT:
+   case TGSI_OPCODE_AND:
+   case TGSI_OPCODE_OR:
+   case TGSI_OPCODE_MOD:
+   case TGSI_OPCODE_XOR:
+   case TGSI_OPCODE_SAD: /* why? */
+   case TGSI_OPCODE_TXF:
+   case TGSI_OPCODE_TXQ:
+   case TGSI_OPCODE_TXQ_LZ:
+   case TGSI_OPCODE_IDIV:
+   case TGSI_OPCODE_IMAX:
+   case TGSI_OPCODE_IMIN:
+   case TGSI_OPCODE_INEG:
+   case TGSI_OPCODE_ISGE:
+   case TGSI_OPCODE_ISHR:
+   case TGSI_OPCODE_ISLT:
+   case TGSI_OPCODE_UARL: /* why? */
+   case TGSI_OPCODE_IABS:
+   case TGSI_OPCODE_ISSG:
+   case TGSI_OPCODE_ATOMXCHG:
+   case TGSI_OPCODE_ATOMCAS:
+   case TGSI_OPCODE_ATOMAND:
+   case TGSI_OPCODE_ATOMOR:
+   case TGSI_OPCODE_ATOMXOR:
+   case TGSI_OPCODE_ATOMIMIN:
+   case TGSI_OPCODE_ATOMIMAX:
+      type = TOY_TYPE_D;
+      break;
+   case TGSI_OPCODE_SHL:
+   case TGSI_OPCODE_U2F:
+   case TGSI_OPCODE_UADD:
+   case TGSI_OPCODE_UDIV:
+   case TGSI_OPCODE_UMAD:
+   case TGSI_OPCODE_UMAX:
+   case TGSI_OPCODE_UMIN:
+   case TGSI_OPCODE_UMOD:
+   case TGSI_OPCODE_UMUL:
+   case TGSI_OPCODE_USEQ:
+   case TGSI_OPCODE_USGE:
+   case TGSI_OPCODE_USHR:
+   case TGSI_OPCODE_USLT:
+   case TGSI_OPCODE_USNE:
+   case TGSI_OPCODE_SAMPLE_I:
+   case TGSI_OPCODE_SAMPLE_I_MS:
+   case TGSI_OPCODE_SVIEWINFO:
+   case TGSI_OPCODE_SAMPLE_POS:
+   case TGSI_OPCODE_SAMPLE_INFO:
+   case TGSI_OPCODE_UCMP:
+   case TGSI_OPCODE_LOAD:
+   case TGSI_OPCODE_STORE:
+   case TGSI_OPCODE_ATOMUADD:
+   case TGSI_OPCODE_ATOMUMIN:
+   case TGSI_OPCODE_ATOMUMAX:
+      type = TOY_TYPE_UD;
+      break;
+   default:
+      type = TOY_TYPE_F;
+      break;
+   }
+
+   return type;
+}
+
+/**
+ * Return the type of an operand of the specified instruction.
+ */
+static enum toy_type
+ra_get_type(struct toy_tgsi *tgsi, const struct tgsi_full_instruction *tgsi_inst,
+            int operand, bool is_dst)
+{
+   enum toy_type type;
+   enum tgsi_file_type file;
+
+   /* we need to look at both src and dst for MOV */
+   /* XXX it should not be this complex */
+   if (tgsi_inst->Instruction.Opcode == TGSI_OPCODE_MOV) {
+      const enum tgsi_file_type dst_file = tgsi_inst->Dst[0].Register.File;
+      const enum tgsi_file_type src_file = tgsi_inst->Src[0].Register.File;
+
+      if (dst_file == TGSI_FILE_ADDRESS || src_file == TGSI_FILE_ADDRESS) {
+         type = TOY_TYPE_D;
+      }
+      else if (src_file == TGSI_FILE_IMMEDIATE &&
+               !tgsi_inst->Src[0].Register.Indirect) {
+         const int src_idx = tgsi_inst->Src[0].Register.Index;
+         type = tgsi->imm_data.types[src_idx];
+      }
+      else {
+         /* this is the best we can do */
+         type = TOY_TYPE_F;
+      }
+
+      return type;
+   }
+
+   type = ra_infer_opcode_type(tgsi_inst->Instruction.Opcode, is_dst);
+
+   /* fix the type */
+   file = (is_dst) ?
+      tgsi_inst->Dst[operand].Register.File :
+      tgsi_inst->Src[operand].Register.File;
+   switch (file) {
+   case TGSI_FILE_SAMPLER:
+   case TGSI_FILE_RESOURCE:
+   case TGSI_FILE_SAMPLER_VIEW:
+      type = TOY_TYPE_D;
+      break;
+   case TGSI_FILE_ADDRESS:
+      assert(type == TOY_TYPE_D);
+      break;
+   default:
+      break;
+   }
+
+   return type;
+}
+
+/**
+ * Allocate a VRF register.
+ */
+static int
+ra_alloc_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file)
+{
+   const int count = (tgsi->aos) ? 1 : 4;
+   return tc_alloc_vrf(tgsi->tc, count);
+}
+
+/**
+ * Construct the key for VRF mapping look-up.
+ */
+static void *
+ra_get_map_key(enum tgsi_file_type file, unsigned dim, unsigned index)
+{
+   intptr_t key;
+
+   /* this is ugly... */
+   assert(file  < 1 << 4);
+   assert(dim   < 1 << 12);
+   assert(index < 1 << 16);
+   key = (file << 28) | (dim << 16) | index;
+
+   return intptr_to_pointer(key);
+}
+
+/**
+ * Map a TGSI register to a VRF register.
+ */
+static int
+ra_map_reg(struct toy_tgsi *tgsi, enum tgsi_file_type file,
+           int dim, int index, bool *is_new)
+{
+   void *key, *val;
+   intptr_t vrf;
+
+   key = ra_get_map_key(file, dim, index);
+
+   /*
+    * because we allocate vrf from 1 and on, val is never NULL as long as the
+    * key exists
+    */
+   val = util_hash_table_get(tgsi->reg_mapping, key);
+   if (val) {
+      vrf = pointer_to_intptr(val);
+
+      if (is_new)
+         *is_new = false;
+   }
+   else {
+      vrf = (intptr_t) ra_alloc_reg(tgsi, file);
+
+      /* add to the mapping */
+      val = intptr_to_pointer(vrf);
+      util_hash_table_set(tgsi->reg_mapping, key, val);
+
+      if (is_new)
+         *is_new = true;
+   }
+
+   return (int) vrf;
+}
+
+/**
+ * Return true if the destination aliases any of the sources.
+ */
+static bool
+ra_dst_is_aliasing(const struct tgsi_full_instruction *tgsi_inst, int dst_index)
+{
+   const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
+   int i;
+
+   /* we need a scratch register for indirect dst anyway */
+   if (ra_dst_is_indirect(d))
+      return true;
+
+   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++) {
+      const struct tgsi_full_src_register *s = &tgsi_inst->Src[i];
+
+      if (s->Register.File != d->Register.File)
+         continue;
+
+      /*
+       * we can go on to check dimension and index respectively, but
+       * keep it simple for now
+       */
+      if (ra_is_src_indirect(s))
+         return true;
+      if (ra_src_dimension(s) == ra_dst_dimension(d) &&
+          ra_src_index(s) == ra_dst_index(d))
+         return true;
+   }
+
+   return false;
+}
+
+/**
+ * Return the toy register for a TGSI destination operand.
+ */
+static struct toy_dst
+ra_get_dst(struct toy_tgsi *tgsi,
+           const struct tgsi_full_instruction *tgsi_inst, int dst_index,
+           bool *is_scratch)
+{
+   const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[dst_index];
+   bool need_vrf = false;
+   struct toy_dst dst;
+
+   switch (d->Register.File) {
+   case TGSI_FILE_NULL:
+      dst = tdst_null();
+      break;
+   case TGSI_FILE_OUTPUT:
+   case TGSI_FILE_TEMPORARY:
+   case TGSI_FILE_ADDRESS:
+   case TGSI_FILE_PREDICATE:
+      need_vrf = true;
+      break;
+   default:
+      assert(!"unhandled dst file");
+      dst = tdst_null();
+      break;
+   }
+
+   if (need_vrf) {
+      /* XXX we do not always need a scratch given the conditions... */
+      const bool need_scratch =
+         (ra_dst_is_indirect(d) || ra_dst_is_aliasing(tgsi_inst, dst_index) ||
+          tgsi_inst->Instruction.Saturate);
+      const enum toy_type type = ra_get_type(tgsi, tgsi_inst, dst_index, true);
+      int vrf;
+
+      if (need_scratch) {
+         vrf = ra_alloc_reg(tgsi, d->Register.File);
+      }
+      else {
+         vrf = ra_map_reg(tgsi, d->Register.File,
+               ra_dst_dimension(d), ra_dst_index(d), NULL);
+      }
+
+      if (is_scratch)
+         *is_scratch = need_scratch;
+
+      dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
+            false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
+   }
+
+   return dst;
+}
+
+static struct toy_src
+ra_get_src_for_vrf(const struct tgsi_full_src_register *s,
+                   enum toy_type type, int vrf)
+{
+   return tsrc_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
+                    false, 0,
+                    s->Register.SwizzleX, s->Register.SwizzleY,
+                    s->Register.SwizzleZ, s->Register.SwizzleW,
+                    s->Register.Absolute, s->Register.Negate,
+                    vrf * TOY_REG_WIDTH);
+}
+
+static int
+init_tgsi_reg(struct toy_tgsi *tgsi, struct toy_inst *inst,
+              enum tgsi_file_type file, int index,
+              const struct tgsi_ind_register *indirect,
+              const struct tgsi_dimension *dimension,
+              const struct tgsi_ind_register *dim_indirect)
+{
+   struct toy_src src;
+   int num_src = 0;
+
+   /* src[0]: TGSI file */
+   inst->src[num_src++] = tsrc_imm_d(file);
+
+   /* src[1]: TGSI dimension */
+   inst->src[num_src++] = tsrc_imm_d((dimension) ? dimension->Index : 0);
+
+   /* src[2]: TGSI dimension indirection */
+   if (dim_indirect) {
+      const int vrf = ra_map_reg(tgsi, dim_indirect->File, 0,
+            dim_indirect->Index, NULL);
+
+      src = tsrc(TOY_FILE_VRF, vrf, 0);
+      src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
+   }
+   else {
+      src = tsrc_imm_d(0);
+   }
+
+   inst->src[num_src++] = src;
+
+   /* src[3]: TGSI index */
+   inst->src[num_src++] = tsrc_imm_d(index);
+
+   /* src[4]: TGSI index indirection */
+   if (indirect) {
+      const int vrf = ra_map_reg(tgsi, indirect->File, 0,
+            indirect->Index, NULL);
+
+      src = tsrc(TOY_FILE_VRF, vrf, 0);
+      src = tsrc_swizzle1(tsrc_d(src), indirect->Swizzle);
+   }
+   else {
+      src = tsrc_imm_d(0);
+   }
+
+   inst->src[num_src++] = src;
+
+   return num_src;
+}
+
+static struct toy_src
+ra_get_src_indirect(struct toy_tgsi *tgsi,
+                    const struct tgsi_full_instruction *tgsi_inst,
+                    int src_index)
+{
+   const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
+   bool need_vrf = false, is_resource = false;
+   struct toy_src src;
+
+   switch (s->Register.File) {
+   case TGSI_FILE_NULL:
+      src = tsrc_null();
+      break;
+   case TGSI_FILE_SAMPLER:
+   case TGSI_FILE_RESOURCE:
+   case TGSI_FILE_SAMPLER_VIEW:
+      is_resource = true;
+      /* fall through */
+   case TGSI_FILE_CONSTANT:
+   case TGSI_FILE_INPUT:
+   case TGSI_FILE_SYSTEM_VALUE:
+   case TGSI_FILE_TEMPORARY:
+   case TGSI_FILE_ADDRESS:
+   case TGSI_FILE_IMMEDIATE:
+   case TGSI_FILE_PREDICATE:
+      need_vrf = true;
+      break;
+   default:
+      assert(!"unhandled src file");
+      src = tsrc_null();
+      break;
+   }
+
+   if (need_vrf) {
+      const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
+      int vrf;
+
+      if (is_resource) {
+         assert(!s->Register.Dimension);
+         assert(s->Register.Indirect);
+
+         vrf = ra_map_reg(tgsi, s->Indirect.File, 0, s->Indirect.Index, NULL);
+      }
+      else {
+         vrf = ra_alloc_reg(tgsi, s->Register.File);
+      }
+
+      src = ra_get_src_for_vrf(s, type, vrf);
+
+      /* emit indirect fetch */
+      if (!is_resource) {
+         struct toy_inst *inst;
+
+         inst = tc_add(tgsi->tc);
+         inst->opcode = TOY_OPCODE_TGSI_INDIRECT_FETCH;
+         inst->dst = tdst_from(src);
+         inst->dst.writemask = TOY_WRITEMASK_XYZW;
+
+         init_tgsi_reg(tgsi, inst, s->Register.File, s->Register.Index,
+               (s->Register.Indirect) ? &s->Indirect : NULL,
+               (s->Register.Dimension) ? &s->Dimension : NULL,
+               (s->Dimension.Indirect) ? &s->DimIndirect : NULL);
+      }
+   }
+
+   return src;
+}
+
+/**
+ * Return the toy register for a TGSI source operand.
+ */
+static struct toy_src
+ra_get_src(struct toy_tgsi *tgsi,
+           const struct tgsi_full_instruction *tgsi_inst,
+           int src_index)
+{
+   const struct tgsi_full_src_register *s = &tgsi_inst->Src[src_index];
+   bool need_vrf = false;
+   struct toy_src src;
+
+   if (ra_is_src_indirect(s))
+      return ra_get_src_indirect(tgsi, tgsi_inst, src_index);
+
+   switch (s->Register.File) {
+   case TGSI_FILE_NULL:
+      src = tsrc_null();
+      break;
+   case TGSI_FILE_CONSTANT:
+   case TGSI_FILE_INPUT:
+   case TGSI_FILE_SYSTEM_VALUE:
+      need_vrf = true;
+      break;
+   case TGSI_FILE_TEMPORARY:
+   case TGSI_FILE_ADDRESS:
+   case TGSI_FILE_PREDICATE:
+      need_vrf = true;
+      break;
+   case TGSI_FILE_SAMPLER:
+   case TGSI_FILE_RESOURCE:
+   case TGSI_FILE_SAMPLER_VIEW:
+      assert(!s->Register.Dimension);
+      src = tsrc_imm_d(s->Register.Index);
+      break;
+   case TGSI_FILE_IMMEDIATE:
+      {
+         const uint32_t *imm;
+         enum toy_type imm_type;
+         bool is_scalar;
+
+         imm = toy_tgsi_get_imm(tgsi, s->Register.Index, &imm_type);
+
+         is_scalar =
+            (imm[s->Register.SwizzleX] == imm[s->Register.SwizzleY] &&
+             imm[s->Register.SwizzleX] == imm[s->Register.SwizzleZ] &&
+             imm[s->Register.SwizzleX] == imm[s->Register.SwizzleW]);
+
+         if (is_scalar) {
+            const enum toy_type type =
+               ra_get_type(tgsi, tgsi_inst, src_index, false);
+
+            /* ignore imm_type */
+            src = tsrc_imm_ud(imm[s->Register.SwizzleX]);
+            src.type = type;
+            src.absolute = s->Register.Absolute;
+            src.negate = s->Register.Negate;
+         }
+         else {
+            need_vrf = true;
+         }
+      }
+      break;
+   default:
+      assert(!"unhandled src file");
+      src = tsrc_null();
+      break;
+   }
+
+   if (need_vrf) {
+      const enum toy_type type = ra_get_type(tgsi, tgsi_inst, src_index, false);
+      bool is_new;
+      int vrf;
+
+      vrf = ra_map_reg(tgsi, s->Register.File,
+            ra_src_dimension(s), ra_src_index(s), &is_new);
+
+      src = ra_get_src_for_vrf(s, type, vrf);
+
+      if (is_new) {
+         switch (s->Register.File) {
+         case TGSI_FILE_TEMPORARY:
+         case TGSI_FILE_ADDRESS:
+         case TGSI_FILE_PREDICATE:
+            {
+               struct toy_dst dst = tdst_from(src);
+               dst.writemask = TOY_WRITEMASK_XYZW;
+
+               /*
+                * Always initialize registers.  Otherwise, if the random value
+                * ends up in a VUE, FS may fail to interpolate correctly.
+                */
+               tc_MOV(tgsi->tc, dst, tsrc_type(tsrc_imm_d(0), type));
+            }
+            break;
+         default:
+            break;
+         }
+      }
+
+   }
+
+   return src;
+}
+
+static void
+parse_instruction(struct toy_tgsi *tgsi,
+                  const struct tgsi_full_instruction *tgsi_inst)
+{
+   struct toy_dst dst[TGSI_FULL_MAX_DST_REGISTERS];
+   struct toy_src src[TGSI_FULL_MAX_SRC_REGISTERS];
+   bool dst_is_scratch[TGSI_FULL_MAX_DST_REGISTERS];
+   toy_tgsi_translate translate;
+   int i;
+
+   /* convert TGSI registers to toy registers */
+   for (i = 0; i < tgsi_inst->Instruction.NumSrcRegs; i++)
+      src[i] = ra_get_src(tgsi, tgsi_inst, i);
+   for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++)
+      dst[i] = ra_get_dst(tgsi, tgsi_inst, i, &dst_is_scratch[i]);
+
+   /* translate the instruction */
+   translate = tgsi->translate_table[tgsi_inst->Instruction.Opcode];
+   translate(tgsi->tc, tgsi_inst, dst, src);
+
+   /* write the result to the real destinations if needed */
+   for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
+      const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
+
+      if (!dst_is_scratch[i])
+         continue;
+
+      if (tgsi_inst->Instruction.Saturate == TGSI_SAT_MINUS_PLUS_ONE)
+         tc_fail(tgsi->tc, "TGSI_SAT_MINUS_PLUS_ONE unhandled");
+
+      tgsi->tc->templ.saturate = tgsi_inst->Instruction.Saturate;
+
+      /* emit indirect store */
+      if (ra_dst_is_indirect(d)) {
+         struct toy_inst *inst;
+
+         inst = tc_add(tgsi->tc);
+         inst->opcode = TOY_OPCODE_TGSI_INDIRECT_STORE;
+         inst->dst = dst[i];
+
+         init_tgsi_reg(tgsi, inst, d->Register.File, d->Register.Index,
+               (d->Register.Indirect) ? &d->Indirect : NULL,
+               (d->Register.Dimension) ? &d->Dimension : NULL,
+               (d->Dimension.Indirect) ? &d->DimIndirect : NULL);
+      }
+      else {
+         const enum toy_type type = ra_get_type(tgsi, tgsi_inst, i, true);
+         struct toy_dst real_dst;
+         int vrf;
+
+         vrf = ra_map_reg(tgsi, d->Register.File,
+               ra_dst_dimension(d), ra_dst_index(d), NULL);
+         real_dst = tdst_full(TOY_FILE_VRF, type, TOY_RECT_LINEAR,
+               false, 0, d->Register.WriteMask, vrf * TOY_REG_WIDTH);
+
+         if (tgsi->aos) {
+            tc_MOV(tgsi->tc, real_dst, tsrc_from(dst[i]));
+         }
+         else {
+            struct toy_dst tdst[4];
+            struct toy_src tsrc[4];
+            int j;
+
+            tdst_transpose(real_dst, tdst);
+            tsrc_transpose(tsrc_from(dst[i]), tsrc);
+
+            for (j = 0; j < 4; j++)
+               tc_MOV(tgsi->tc, tdst[j], tsrc[j]);
+         }
+      }
+
+      tgsi->tc->templ.saturate = false;
+   }
+
+   switch (tgsi_inst->Instruction.Opcode) {
+   case TGSI_OPCODE_KIL:
+   case TGSI_OPCODE_KILP:
+      tgsi->uses_kill = true;
+      break;
+   }
+
+   /* remember channels written */
+   for (i = 0; i < tgsi_inst->Instruction.NumDstRegs; i++) {
+      const struct tgsi_full_dst_register *d = &tgsi_inst->Dst[i];
+
+      if (d->Register.File != TGSI_FILE_OUTPUT)
+         continue;
+      for (i = 0; i < tgsi->num_outputs; i++) {
+         if (tgsi->outputs[i].index == d->Register.Index) {
+            tgsi->outputs[i].undefined_mask &= ~d->Register.WriteMask;
+            break;
+         }
+      }
+   }
+}
+
+static void
+decl_add_in(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
+{
+   static const struct tgsi_declaration_interp default_interp = {
+      TGSI_INTERPOLATE_PERSPECTIVE, false, 0,
+   };
+   const struct tgsi_declaration_interp *interp =
+      (decl->Declaration.Interpolate) ? &decl->Interp: &default_interp;
+   int index;
+
+   if (decl->Range.Last >= Elements(tgsi->inputs)) {
+      assert(!"invalid IN");
+      return;
+   }
+
+   for (index = decl->Range.First; index <= decl->Range.Last; index++) {
+      const int slot = tgsi->num_inputs++;
+
+      tgsi->inputs[slot].index = index;
+      tgsi->inputs[slot].usage_mask = decl->Declaration.UsageMask;
+      if (decl->Declaration.Semantic) {
+         tgsi->inputs[slot].semantic_name = decl->Semantic.Name;
+         tgsi->inputs[slot].semantic_index = decl->Semantic.Index;
+      }
+      else {
+         tgsi->inputs[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
+         tgsi->inputs[slot].semantic_index = index;
+      }
+      tgsi->inputs[slot].interp = interp->Interpolate;
+      tgsi->inputs[slot].centroid = interp->Centroid;
+   }
+}
+
+static void
+decl_add_out(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
+{
+   int index;
+
+   if (decl->Range.Last >= Elements(tgsi->outputs)) {
+      assert(!"invalid OUT");
+      return;
+   }
+
+   assert(decl->Declaration.Semantic);
+
+   for (index = decl->Range.First; index <= decl->Range.Last; index++) {
+      const int slot = tgsi->num_outputs++;
+
+      tgsi->outputs[slot].index = index;
+      tgsi->outputs[slot].undefined_mask = TOY_WRITEMASK_XYZW;
+      tgsi->outputs[slot].usage_mask = decl->Declaration.UsageMask;
+      tgsi->outputs[slot].semantic_name = decl->Semantic.Name;
+      tgsi->outputs[slot].semantic_index = decl->Semantic.Index;
+   }
+}
+
+static void
+decl_add_sv(struct toy_tgsi *tgsi, const struct tgsi_full_declaration *decl)
+{
+   int index;
+
+   if (decl->Range.Last >= Elements(tgsi->system_values)) {
+      assert(!"invalid SV");
+      return;
+   }
+
+   for (index = decl->Range.First; index <= decl->Range.Last; index++) {
+      const int slot = tgsi->num_system_values++;
+
+      tgsi->system_values[slot].index = index;
+      if (decl->Declaration.Semantic) {
+         tgsi->system_values[slot].semantic_name = decl->Semantic.Name;
+         tgsi->system_values[slot].semantic_index = decl->Semantic.Index;
+      }
+      else {
+         tgsi->system_values[slot].semantic_name = TGSI_SEMANTIC_GENERIC;
+         tgsi->system_values[slot].semantic_index = index;
+      }
+   }
+}
+
+/**
+ * Emit an instruction to fetch the value of a TGSI register.
+ */
+static void
+fetch_source(struct toy_tgsi *tgsi, enum tgsi_file_type file, int dim, int idx)
+{
+   struct toy_dst dst;
+   int vrf;
+   enum toy_opcode opcode;
+   enum toy_type type = TOY_TYPE_F;
+
+   switch (file) {
+   case TGSI_FILE_INPUT:
+      opcode = TOY_OPCODE_TGSI_IN;
+      break;
+   case TGSI_FILE_CONSTANT:
+      opcode = TOY_OPCODE_TGSI_CONST;
+      break;
+   case TGSI_FILE_SYSTEM_VALUE:
+      opcode = TOY_OPCODE_TGSI_SV;
+      break;
+   case TGSI_FILE_IMMEDIATE:
+      opcode = TOY_OPCODE_TGSI_IMM;
+      toy_tgsi_get_imm(tgsi, idx, &type);
+      break;
+   default:
+      /* no need to fetch */
+      return;
+      break;
+   }
+
+   vrf = ra_map_reg(tgsi, file, dim, idx, NULL);
+   dst = tdst(TOY_FILE_VRF, vrf, 0);
+   dst = tdst_type(dst, type);
+
+   tc_add2(tgsi->tc, opcode, dst, tsrc_imm_d(dim), tsrc_imm_d(idx));
+}
+
+static void
+parse_declaration(struct toy_tgsi *tgsi,
+                  const struct tgsi_full_declaration *decl)
+{
+   int i;
+
+   switch (decl->Declaration.File) {
+   case TGSI_FILE_INPUT:
+      decl_add_in(tgsi, decl);
+      break;
+   case TGSI_FILE_OUTPUT:
+      decl_add_out(tgsi, decl);
+      break;
+   case TGSI_FILE_SYSTEM_VALUE:
+      decl_add_sv(tgsi, decl);
+      break;
+   case TGSI_FILE_IMMEDIATE:
+      /* immediates should be declared with TGSI_TOKEN_TYPE_IMMEDIATE */
+      assert(!"unexpected immediate declaration");
+      break;
+   case TGSI_FILE_NULL:
+   case TGSI_FILE_CONSTANT:
+   case TGSI_FILE_TEMPORARY:
+   case TGSI_FILE_SAMPLER:
+   case TGSI_FILE_PREDICATE:
+   case TGSI_FILE_ADDRESS:
+   case TGSI_FILE_RESOURCE:
+   case TGSI_FILE_SAMPLER_VIEW:
+      /* nothing to do */
+      break;
+   default:
+      assert(!"unhandled TGSI file");
+      break;
+   }
+
+   /* fetch the registers now */
+   for (i = decl->Range.First; i <= decl->Range.Last; i++) {
+      const int dim = (decl->Declaration.Dimension) ? decl->Dim.Index2D : 0;
+      fetch_source(tgsi, decl->Declaration.File, dim, i);
+   }
+}
+
+static int
+add_imm(struct toy_tgsi *tgsi, enum toy_type type, const uint32_t *buf)
+{
+   /* reallocate the buffer if necessary */
+   if (tgsi->imm_data.cur >= tgsi->imm_data.size) {
+      const int cur_size = tgsi->imm_data.size;
+      int new_size;
+      enum toy_type *new_types;
+      uint32_t (*new_buf)[4];
+
+      new_size = (cur_size) ? cur_size << 1 : 16;
+      while (new_size <= tgsi->imm_data.cur)
+         new_size <<= 1;
+
+      new_buf = REALLOC(tgsi->imm_data.buf,
+            cur_size * sizeof(new_buf[0]),
+            new_size * sizeof(new_buf[0]));
+      new_types = REALLOC(tgsi->imm_data.types,
+            cur_size * sizeof(new_types[0]),
+            new_size * sizeof(new_types[0]));
+      if (!new_buf || !new_types) {
+         if (new_buf)
+            FREE(new_buf);
+         if (new_types)
+            FREE(new_types);
+         return -1;
+      }
+
+      tgsi->imm_data.buf = new_buf;
+      tgsi->imm_data.types = new_types;
+      tgsi->imm_data.size = new_size;
+   }
+
+   tgsi->imm_data.types[tgsi->imm_data.cur] = type;
+   memcpy(&tgsi->imm_data.buf[tgsi->imm_data.cur],
+         buf, sizeof(tgsi->imm_data.buf[0]));
+
+   return tgsi->imm_data.cur++;
+}
+
+static void
+parse_immediate(struct toy_tgsi *tgsi, const struct tgsi_full_immediate *imm)
+{
+   enum toy_type type;
+   uint32_t imm_buf[4];
+   int idx;
+
+   switch (imm->Immediate.DataType) {
+   case TGSI_IMM_FLOAT32:
+      type = TOY_TYPE_F;
+      imm_buf[0] = fui(imm->u[0].Float);
+      imm_buf[1] = fui(imm->u[1].Float);
+      imm_buf[2] = fui(imm->u[2].Float);
+      imm_buf[3] = fui(imm->u[3].Float);
+      break;
+   case TGSI_IMM_INT32:
+      type = TOY_TYPE_D;
+      imm_buf[0] = (uint32_t) imm->u[0].Int;
+      imm_buf[1] = (uint32_t) imm->u[1].Int;
+      imm_buf[2] = (uint32_t) imm->u[2].Int;
+      imm_buf[3] = (uint32_t) imm->u[3].Int;
+      break;
+   case TGSI_IMM_UINT32:
+      type = TOY_TYPE_UD;
+      imm_buf[0] = imm->u[0].Uint;
+      imm_buf[1] = imm->u[1].Uint;
+      imm_buf[2] = imm->u[2].Uint;
+      imm_buf[3] = imm->u[3].Uint;
+      break;
+   default:
+      assert(!"unhandled TGSI imm type");
+      type = TOY_TYPE_F;
+      memset(imm_buf, 0, sizeof(imm_buf));
+      break;
+   }
+
+   idx = add_imm(tgsi, type, imm_buf);
+   if (idx >= 0)
+      fetch_source(tgsi, TGSI_FILE_IMMEDIATE, 0, idx);
+   else
+      tc_fail(tgsi->tc, "failed to add TGSI imm");
+}
+
+static void
+parse_property(struct toy_tgsi *tgsi, const struct tgsi_full_property *prop)
+{
+   switch (prop->Property.PropertyName) {
+   case TGSI_PROPERTY_VS_PROHIBIT_UCPS:
+      tgsi->props.vs_prohibit_ucps = prop->u[0].Data;
+      break;
+   case TGSI_PROPERTY_FS_COORD_ORIGIN:
+      tgsi->props.fs_coord_origin = prop->u[0].Data;
+      break;
+   case TGSI_PROPERTY_FS_COORD_PIXEL_CENTER:
+      tgsi->props.fs_coord_pixel_center = prop->u[0].Data;
+      break;
+   case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
+      tgsi->props.fs_color0_writes_all_cbufs = prop->u[0].Data;
+      break;
+   case TGSI_PROPERTY_FS_DEPTH_LAYOUT:
+      tgsi->props.fs_depth_layout = prop->u[0].Data;
+      break;
+   case TGSI_PROPERTY_GS_INPUT_PRIM:
+      tgsi->props.gs_input_prim = prop->u[0].Data;
+      break;
+   case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+      tgsi->props.gs_output_prim = prop->u[0].Data;
+      break;
+   case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+      tgsi->props.gs_max_output_vertices = prop->u[0].Data;
+      break;
+   default:
+      assert(!"unhandled TGSI property");
+      break;
+   }
+}
+
+static void
+parse_token(struct toy_tgsi *tgsi, const union tgsi_full_token *token)
+{
+   switch (token->Token.Type) {
+   case TGSI_TOKEN_TYPE_DECLARATION:
+      parse_declaration(tgsi, &token->FullDeclaration);
+      break;
+   case TGSI_TOKEN_TYPE_IMMEDIATE:
+      parse_immediate(tgsi, &token->FullImmediate);
+      break;
+   case TGSI_TOKEN_TYPE_INSTRUCTION:
+      parse_instruction(tgsi, &token->FullInstruction);
+      break;
+   case TGSI_TOKEN_TYPE_PROPERTY:
+      parse_property(tgsi, &token->FullProperty);
+      break;
+   default:
+      assert(!"unhandled TGSI token type");
+      break;
+   }
+}
+
+static enum pipe_error
+dump_reg_mapping(void *key, void *val, void *data)
+{
+   int tgsi_file, tgsi_dim, tgsi_index;
+   uint32_t sig, vrf;
+
+   sig = (uint32_t) pointer_to_intptr(key);
+   vrf = (uint32_t) pointer_to_intptr(val);
+
+   /* see ra_get_map_key() */
+   tgsi_file =  (sig >> 28) & 0xf;
+   tgsi_dim =   (sig >> 16) & 0xfff;
+   tgsi_index = (sig >> 0)  & 0xffff;
+
+   if (tgsi_dim) {
+      ilo_printf("  v%d:\t%s[%d][%d]\n", vrf,
+            tgsi_file_names[tgsi_file], tgsi_dim, tgsi_index);
+   }
+   else {
+      ilo_printf("  v%d:\t%s[%d]\n", vrf,
+            tgsi_file_names[tgsi_file], tgsi_index);
+   }
+
+   return PIPE_OK;
+}
+
+/**
+ * Dump the TGSI translator, currently only the register mapping.
+ */
+void
+toy_tgsi_dump(const struct toy_tgsi *tgsi)
+{
+   util_hash_table_foreach(tgsi->reg_mapping, dump_reg_mapping, NULL);
+}
+
+/**
+ * Clean up the TGSI translator.
+ */
+void
+toy_tgsi_cleanup(struct toy_tgsi *tgsi)
+{
+   FREE(tgsi->imm_data.buf);
+   FREE(tgsi->imm_data.types);
+
+   util_hash_table_destroy(tgsi->reg_mapping);
+}
+
+static unsigned
+reg_mapping_hash(void *key)
+{
+   return (unsigned) pointer_to_intptr(key);
+}
+
+static int
+reg_mapping_compare(void *key1, void *key2)
+{
+   return (key1 != key2);
+}
+
+/**
+ * Initialize the TGSI translator.
+ */
+static bool
+init_tgsi(struct toy_tgsi *tgsi, struct toy_compiler *tc, bool aos)
+{
+   memset(tgsi, 0, sizeof(*tgsi));
+
+   tgsi->tc = tc;
+   tgsi->aos = aos;
+   tgsi->translate_table = (aos) ? aos_translate_table : soa_translate_table;
+
+   /* create a mapping of TGSI registers to VRF reigsters */
+   tgsi->reg_mapping =
+      util_hash_table_create(reg_mapping_hash, reg_mapping_compare);
+
+   return (tgsi->reg_mapping != NULL);
+}
+
+/**
+ * Translate TGSI tokens into toy instructions.
+ */
+void
+toy_compiler_translate_tgsi(struct toy_compiler *tc,
+                            const struct tgsi_token *tokens, bool aos,
+                            struct toy_tgsi *tgsi)
+{
+   struct tgsi_parse_context parse;
+
+   if (!init_tgsi(tgsi, tc, aos)) {
+      tc_fail(tc, "failed to initialize TGSI translator");
+      return;
+   }
+
+   tgsi_parse_init(&parse, tokens);
+   while (!tgsi_parse_end_of_tokens(&parse)) {
+      tgsi_parse_token(&parse);
+      parse_token(tgsi, &parse.FullToken);
+   }
+   tgsi_parse_free(&parse);
+}
+
+/**
+ * Map the TGSI register to VRF register.
+ */
+int
+toy_tgsi_get_vrf(const struct toy_tgsi *tgsi,
+                 enum tgsi_file_type file, int dimension, int index)
+{
+   void *key, *val;
+
+   key = ra_get_map_key(file, dimension, index);
+
+   val = util_hash_table_get(tgsi->reg_mapping, key);
+
+   return (val) ? pointer_to_intptr(val) : -1;
+}
diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.h b/src/gallium/drivers/ilo/shader/toy_tgsi.h
new file mode 100644 (file)
index 0000000..1bfb57f
--- /dev/null
@@ -0,0 +1,253 @@
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+
+#ifndef TOY_TGSI_H
+#define TOY_TGSI_H
+
+#include "pipe/p_state.h"
+#include "pipe/p_shader_tokens.h"
+#include "toy_compiler.h"
+
+struct tgsi_token;
+struct tgsi_full_instruction;
+struct util_hash_table;
+
+typedef void (*toy_tgsi_translate)(struct toy_compiler *tc,
+      const struct tgsi_full_instruction *tgsi_inst,
+      struct toy_dst *dst,
+      struct toy_src *src);
+
+struct toy_tgsi {
+   struct toy_compiler *tc;
+   bool aos;
+   const toy_tgsi_translate *translate_table;
+
+   struct util_hash_table *reg_mapping;
+
+   struct {
+      bool vs_prohibit_ucps;
+      int fs_coord_origin;
+      int fs_coord_pixel_center;
+      bool fs_color0_writes_all_cbufs;
+      int fs_depth_layout;
+      int gs_input_prim;
+      int gs_output_prim;
+      int gs_max_output_vertices;
+   } props;
+
+   struct {
+      enum toy_type *types;
+      uint32_t (*buf)[4];
+      int cur, size;
+   } imm_data;
+
+   struct {
+      int index:16;
+      unsigned usage_mask:4;        /* TGSI_WRITEMASK_x */
+      unsigned semantic_name:8;     /* TGSI_SEMANTIC_x */
+      unsigned semantic_index:8;
+      unsigned interp:4;            /* TGSI_INTERPOLATE_x */
+      unsigned centroid:1;
+   } inputs[PIPE_MAX_SHADER_INPUTS];
+   int num_inputs;
+
+   struct {
+      int index:16;
+      unsigned undefined_mask:4;
+      unsigned usage_mask:4;        /* TGSI_WRITEMASK_x */
+      unsigned semantic_name:8;     /* TGSI_SEMANTIC_x */
+      unsigned semantic_index:8;
+   } outputs[PIPE_MAX_SHADER_OUTPUTS];
+   int num_outputs;
+
+   struct {
+      int index:16;
+      unsigned semantic_name:8;     /* TGSI_SEMANTIC_x */
+      unsigned semantic_index:8;
+   } system_values[8];
+   int num_system_values;
+
+   bool uses_kill;
+};
+
+/**
+ * Find the slot of the TGSI input.
+ */
+static inline int
+toy_tgsi_find_input(const struct toy_tgsi *tgsi, int index)
+{
+   int slot;
+
+   for (slot = 0; slot < tgsi->num_inputs; slot++) {
+      if (tgsi->inputs[slot].index == index)
+         return slot;
+   }
+
+   return -1;
+}
+
+/**
+ * Find the slot of the TGSI system value.
+ */
+static inline int
+toy_tgsi_find_system_value(const struct toy_tgsi *tgsi, int index)
+{
+   int slot;
+
+   for (slot = 0; slot < tgsi->num_system_values; slot++) {
+      if (tgsi->system_values[slot].index == index)
+         return slot;
+   }
+
+   return -1;
+}
+
+/**
+ * Return the immediate data of the TGSI immediate.
+ */
+static inline const uint32_t *
+toy_tgsi_get_imm(const struct toy_tgsi *tgsi, unsigned index,
+                 enum toy_type *type)
+{
+   const uint32_t *imm;
+
+   if (index >= tgsi->imm_data.cur)
+      return NULL;
+
+   imm = tgsi->imm_data.buf[index];
+   if (type)
+      *type = tgsi->imm_data.types[index];
+
+   return imm;
+}
+
+/**
+ * Return the dimension of the texture coordinates, as well as the location of
+ * the shadow reference value or the sample index.
+ */
+static inline int
+toy_tgsi_get_texture_coord_dim(int tgsi_tex, int *shadow_or_sample)
+{
+   int dim;
+
+   /*
+    * Depending on the texture target, (src0, src1.x) is interpreted
+    * differently:
+    *
+    *   (s, *, *, *, *),          for 1D
+    *   (s, t, *, *, *),          for 2D, RECT
+    *   (s, t, r, *, *),          for 3D, CUBE
+    *
+    *   (s, layer, *, *, *),      for 1D_ARRAY
+    *   (s, t, layer, *, *),      for 2D_ARRAY
+    *   (s, t, r, layer, *),      for CUBE_ARRAY
+    *
+    *   (s, *, shadow, *, *),     for SHADOW1D
+    *   (s, t, shadow, *, *),     for SHADOW2D, SHADOWRECT
+    *   (s, t, r, shadow, *),     for SHADOWCUBE
+    *
+    *   (s, layer, shadow, *, *), for SHADOW1D_ARRAY
+    *   (s, t, layer, shadow, *), for SHADOW2D_ARRAY
+    *   (s, t, r, layer, shadow), for SHADOWCUBE_ARRAY
+    *
+    *   (s, t, sample, *, *),     for 2D_MSAA
+    *   (s, t, layer, sample, *), for 2D_ARRAY_MSAA
+    */
+   switch (tgsi_tex) {
+   case TGSI_TEXTURE_1D:
+   case TGSI_TEXTURE_SHADOW1D:
+      dim = 1;
+      break;
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+   case TGSI_TEXTURE_1D_ARRAY:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+   case TGSI_TEXTURE_SHADOW1D_ARRAY:
+   case TGSI_TEXTURE_2D_MSAA:
+      dim = 2;
+      break;
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_CUBE:
+   case TGSI_TEXTURE_2D_ARRAY:
+   case TGSI_TEXTURE_SHADOWCUBE:
+   case TGSI_TEXTURE_SHADOW2D_ARRAY:
+   case TGSI_TEXTURE_2D_ARRAY_MSAA:
+      dim = 3;
+      break;
+   case TGSI_TEXTURE_CUBE_ARRAY:
+   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+      dim = 4;
+      break;
+   default:
+      assert(!"unknown texture target");
+      dim = 0;
+      break;
+   }
+
+   if (shadow_or_sample) {
+      switch (tgsi_tex) {
+      case TGSI_TEXTURE_SHADOW1D:
+         /* there is a gap */
+         *shadow_or_sample = 2;
+         break;
+      case TGSI_TEXTURE_SHADOW2D:
+      case TGSI_TEXTURE_SHADOWRECT:
+      case TGSI_TEXTURE_SHADOWCUBE:
+      case TGSI_TEXTURE_SHADOW1D_ARRAY:
+      case TGSI_TEXTURE_SHADOW2D_ARRAY:
+      case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+      case TGSI_TEXTURE_2D_MSAA:
+      case TGSI_TEXTURE_2D_ARRAY_MSAA:
+         *shadow_or_sample = dim;
+         break;
+      default:
+         /* no shadow nor sample */
+         *shadow_or_sample = -1;
+         break;
+      }
+   }
+
+   return dim;
+}
+
+void
+toy_compiler_translate_tgsi(struct toy_compiler *tc,
+                            const struct tgsi_token *tokens, bool aos,
+                            struct toy_tgsi *tgsi);
+
+void
+toy_tgsi_cleanup(struct toy_tgsi *tgsi);
+
+int
+toy_tgsi_get_vrf(const struct toy_tgsi *tgsi,
+                 enum tgsi_file_type file, int dimension, int index);
+
+void
+toy_tgsi_dump(const struct toy_tgsi *tgsi);
+
+#endif /* TOY_TGSI_H */