i965: Start building direct GLSL2 IR to 965 assembly codegen.
authorEric Anholt <eric@anholt.net>
Thu, 26 Aug 2010 19:12:00 +0000 (12:12 -0700)
committerEric Anholt <eric@anholt.net>
Thu, 26 Aug 2010 21:55:44 +0000 (14:55 -0700)
Our channel-expressions and vector-splitting changes now happen into a
private copy of the IR that we maintain for ourselves.  Uniform
assignment still happens by the core, so we continue using Mesa IR
generation not just for swrast fallbacks but also for uniform values
(since there's no storage for their contents other than
shader_program->FragmentProgram->Parameters->ParameterValues).  And
most importantly, at the moment no actual codegen is hooked up other
than emitting our favorite color to the framebuffer.

src/mesa/drivers/dri/i965/brw_fs.cpp
src/mesa/drivers/dri/i965/brw_wm.c
src/mesa/drivers/dri/i965/brw_wm.h
src/mesa/drivers/dri/i965/brw_wm_state.c

index 9a6ee7a0100a0e181962546cfeb80ae95082c14f..5dada65909d0dde7a2c070d7ba767232be968817 100644 (file)
@@ -39,6 +39,20 @@ extern "C" {
 #include "../glsl/ir_optimization.h"
 #include "../glsl/ir_print_visitor.h"
 
+enum register_file {
+   ARF = BRW_ARCHITECTURE_REGISTER_FILE,
+   GRF = BRW_GENERAL_REGISTER_FILE,
+   MRF = BRW_MESSAGE_REGISTER_FILE,
+   IMM = BRW_IMMEDIATE_VALUE,
+   BAD_FILE
+};
+
+enum fs_opcodes {
+   FS_OPCODE_FB_WRITE = 256,
+};
+
+static int using_new_fs = -1;
+
 struct gl_shader *
 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
 {
@@ -77,18 +91,31 @@ brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
 GLboolean
 brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
 {
-   static int using_new_fs = -1;
-
    if (using_new_fs == -1)
       using_new_fs = getenv("INTEL_NEW_FS") != NULL;
 
    for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
-      struct gl_shader *shader = prog->_LinkedShaders[i];
+      struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
+
+      if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
+        void *mem_ctx = talloc_new(NULL);
+        bool progress;
+
+        shader->ir = new(shader) exec_list;
+        clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
 
-      if (using_new_fs && shader->Type == GL_FRAGMENT_SHADER) {
         do_mat_op_to_vec(shader->ir);
         brw_do_channel_expressions(shader->ir);
         brw_do_vector_splitting(shader->ir);
+
+        do {
+           progress = false;
+
+           progress = do_common_optimization(shader->ir, true) || progress;
+        } while (progress);
+
+        reparent_ir(shader->ir, shader);
+        talloc_free(mem_ctx);
       }
    }
 
@@ -97,3 +124,323 @@ brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
 
    return GL_TRUE;
 }
+
+class fs_reg {
+public:
+   fs_reg()
+   {
+      this->file = BAD_FILE;
+      this->reg = 0;
+      this->hw_reg = -1;
+   }
+
+   fs_reg(float f)
+   {
+      this->file = IMM;
+      this->reg = 0;
+      this->hw_reg = 0;
+      this->type = BRW_REGISTER_TYPE_F;
+      this->imm.f = f;
+   }
+
+   fs_reg(int32_t i)
+   {
+      this->file = IMM;
+      this->reg = 0;
+      this->hw_reg = 0;
+      this->type = BRW_REGISTER_TYPE_D;
+      this->imm.i = i;
+   }
+
+   fs_reg(uint32_t u)
+   {
+      this->file = IMM;
+      this->reg = 0;
+      this->hw_reg = 0;
+      this->type = BRW_REGISTER_TYPE_UD;
+      this->imm.u = u;
+   }
+
+   fs_reg(enum register_file file, int hw_reg)
+   {
+      this->file = file;
+      this->reg = 0;
+      this->hw_reg = hw_reg;
+      this->type = BRW_REGISTER_TYPE_F;
+   }
+
+   /** Register file: ARF, GRF, MRF, IMM. */
+   enum register_file file;
+   /** Abstract register number.  0 = fixed hw reg */
+   int reg;
+   /** HW register number.  Generally unset until register allocation. */
+   int hw_reg;
+   /** Register type.  BRW_REGISTER_TYPE_* */
+   int type;
+
+   /** Value for file == BRW_IMMMEDIATE_FILE */
+   union {
+      int32_t i;
+      uint32_t u;
+      float f;
+   } imm;
+};
+
+static const fs_reg reg_undef(BAD_FILE, -1);
+static const fs_reg reg_null(ARF, BRW_ARF_NULL);
+
+class fs_inst : public exec_node {
+public:
+   /* Callers of this talloc-based new need not call delete. It's
+    * easier to just talloc_free 'ctx' (or any of its ancestors). */
+   static void* operator new(size_t size, void *ctx)
+   {
+      void *node;
+
+      node = talloc_size(ctx, size);
+      assert(node != NULL);
+
+      return node;
+   }
+
+   fs_inst()
+   {
+      this->opcode = BRW_OPCODE_NOP;
+      this->dst = reg_undef;
+      this->src[0] = reg_undef;
+      this->src[1] = reg_undef;
+   }
+   fs_inst(int opcode, fs_reg dst, fs_reg src0)
+   {
+      this->opcode = opcode;
+      this->dst = dst;
+      this->src[0] = src0;
+      this->src[1] = reg_undef;
+   }
+   fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
+   {
+      this->opcode = opcode;
+      this->dst = dst;
+      this->src[0] = src0;
+      this->src[1] = src1;
+   }
+
+   int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+   fs_reg dst;
+   fs_reg src[2];
+};
+
+class fs_visitor : public ir_hierarchical_visitor
+{
+public:
+
+   fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
+   {
+      this->c = c;
+      this->p = &c->func;
+      this->mem_ctx = talloc_new(NULL);
+      this->shader = shader;
+   }
+   ~fs_visitor()
+   {
+      talloc_free(this->mem_ctx);
+   }
+
+   fs_inst *emit(fs_inst inst);
+   void generate_code();
+   void generate_fb_write(fs_inst *inst);
+
+   void emit_dummy_fs();
+
+   struct brw_wm_compile *c;
+   struct brw_compile *p;
+   struct brw_shader *shader;
+   void *mem_ctx;
+   exec_list instructions;
+
+   int grf_used;
+
+};
+
+fs_inst *
+fs_visitor::emit(fs_inst inst)
+{
+   fs_inst *list_inst = new(mem_ctx) fs_inst;
+   *list_inst = inst;
+
+   this->instructions.push_tail(list_inst);
+
+   return list_inst;
+}
+
+/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
+void
+fs_visitor::emit_dummy_fs()
+{
+   /* Everyone's favorite color. */
+   emit(fs_inst(BRW_OPCODE_MOV,
+               fs_reg(MRF, 2),
+               fs_reg(1.0f)));
+   emit(fs_inst(BRW_OPCODE_MOV,
+               fs_reg(MRF, 3),
+               fs_reg(0.0f)));
+   emit(fs_inst(BRW_OPCODE_MOV,
+               fs_reg(MRF, 4),
+               fs_reg(1.0f)));
+   emit(fs_inst(BRW_OPCODE_MOV,
+               fs_reg(MRF, 5),
+               fs_reg(0.0f)));
+
+   fs_inst *write;
+   write = emit(fs_inst(FS_OPCODE_FB_WRITE,
+                       fs_reg(0),
+                       fs_reg(0)));
+}
+
+void
+fs_visitor::generate_fb_write(fs_inst *inst)
+{
+   GLboolean eot = 1; /* FINISHME: MRT */
+   /* FINISHME: AADS */
+
+   /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
+    * move, here's g1.
+    */
+   brw_push_insn_state(p);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+   brw_MOV(p,
+          brw_message_reg(1),
+          brw_vec8_grf(1, 0));
+   brw_pop_insn_state(p);
+
+   int nr = 2 + 4;
+
+   brw_fb_WRITE(p,
+               8, /* dispatch_width */
+               retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
+               0, /* base MRF */
+               retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
+               0, /* FINISHME: MRT target */
+               nr,
+               0,
+               eot);
+}
+
+void
+fs_visitor::generate_code()
+{
+   this->grf_used = 2; /* header */
+
+   foreach_iter(exec_list_iterator, iter, this->instructions) {
+      fs_inst *inst = (fs_inst *)iter.get();
+      struct brw_reg src[2], dst;
+
+      for (unsigned int i = 0; i < 2; i++) {
+        switch (inst->src[i].file) {
+        case GRF:
+        case ARF:
+        case MRF:
+           src[i] = brw_vec8_reg(inst->src[i].file,
+                                 inst->src[i].hw_reg, 0);
+           src[i] = retype(src[i], inst->src[i].type);
+           break;
+        case IMM:
+           switch (inst->src[i].type) {
+           case BRW_REGISTER_TYPE_F:
+              src[i] = brw_imm_f(inst->src[i].imm.f);
+              break;
+           case BRW_REGISTER_TYPE_D:
+              src[i] = brw_imm_f(inst->src[i].imm.i);
+              break;
+           case BRW_REGISTER_TYPE_UD:
+              src[i] = brw_imm_f(inst->src[i].imm.u);
+              break;
+           default:
+              assert(!"not reached");
+              break;
+           }
+           break;
+        case BAD_FILE:
+           /* Probably unused. */
+           src[i] = brw_null_reg();
+        }
+      }
+      dst = brw_vec8_reg(inst->dst.file, inst->dst.hw_reg, 0);
+
+      switch (inst->opcode) {
+      case BRW_OPCODE_MOV:
+        brw_MOV(p, dst, src[0]);
+        break;
+      case FS_OPCODE_FB_WRITE:
+        generate_fb_write(inst);
+        break;
+      default:
+        assert(!"not reached");
+      }
+   }
+}
+
+GLboolean
+brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
+{
+   struct brw_compile *p = &c->func;
+   struct intel_context *intel = &brw->intel;
+   GLcontext *ctx = &intel->ctx;
+   struct brw_shader *shader = NULL;
+   struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
+
+   if (!prog)
+      return GL_FALSE;
+
+   if (!using_new_fs)
+      return GL_FALSE;
+
+   for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
+      if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
+        shader = (struct brw_shader *)prog->_LinkedShaders[i];
+        break;
+      }
+   }
+   if (!shader)
+      return GL_FALSE;
+
+   /* We always use 8-wide mode, at least for now.  For one, flow
+    * control only works in 8-wide.  Also, when we're fragment shader
+    * bound, we're almost always under register pressure as well, so
+    * 8-wide would save us from the performance cliff of spilling
+    * regs.
+    */
+   c->dispatch_width = 8;
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      printf("GLSL IR for native fragment shader %d:\n", prog->Name);
+      _mesa_print_ir(shader->ir, NULL);
+      printf("\n");
+   }
+
+   /* Now the main event: Visit the shader IR and generate our FS IR for it.
+    */
+   fs_visitor v(c, shader);
+   visit_list_elements(&v, shader->ir);
+
+   v.emit_dummy_fs();
+
+   v.generate_code();
+
+   if (INTEL_DEBUG & DEBUG_WM) {
+      printf("Native code for fragment shader %d:\n", prog->Name);
+      for (unsigned int i = 0; i < p->nr_insn; i++)
+        brw_disasm(stdout, &p->store[i], intel->gen);
+      printf("\n");
+   }
+
+   c->prog_data.nr_params = 0; /* FINISHME */
+   c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
+   c->prog_data.urb_read_length = 1; /* FINISHME: attrs */
+   c->prog_data.curb_read_length = 0; /* FINISHME */
+   c->prog_data.total_grf = v.grf_used;
+   c->prog_data.total_scratch = 0;
+
+   return GL_TRUE;
+}
index 34cefeea32a0aa8f568cdc920d956cf34d3b4cfd..899e9b1dfb592d552fe37147a0a9ab6ffacb9e5f 100644 (file)
@@ -177,17 +177,19 @@ static void do_wm_prog( struct brw_context *brw,
    /* temporary sanity check assertion */
    ASSERT(fp->isGLSL == brw_wm_is_glsl(&c->fp->program));
 
-   /*
-    * Shader which use GLSL features such as flow control are handled
-    * differently from "simple" shaders.
-    */
-   if (fp->isGLSL) {
-      c->dispatch_width = 8;
-      brw_wm_glsl_emit(brw, c);
-   }
-   else {
-      c->dispatch_width = 16;
-      brw_wm_non_glsl_emit(brw, c);
+   if (!brw_wm_fs_emit(brw, c)) {
+      /*
+       * Shader which use GLSL features such as flow control are handled
+       * differently from "simple" shaders.
+       */
+      if (fp->isGLSL) {
+        c->dispatch_width = 8;
+        brw_wm_glsl_emit(brw, c);
+      }
+      else {
+        c->dispatch_width = 16;
+        brw_wm_non_glsl_emit(brw, c);
+      }
    }
 
    if (INTEL_DEBUG & DEBUG_WM)
index 6a761e723b410ee51c5eba9e17076b616457aadf..2639d4f26b392e93a69f75988fa89f2293a4dbfa 100644 (file)
@@ -306,6 +306,7 @@ void brw_wm_lookup_iz( GLuint line_aa,
 
 GLboolean brw_wm_is_glsl(const struct gl_fragment_program *fp);
 void brw_wm_glsl_emit(struct brw_context *brw, struct brw_wm_compile *c);
+GLboolean brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c);
 
 /* brw_wm_emit.c */
 void emit_alu1(struct brw_compile *p,
index c1cf4db1caeba957d7a1f010861d60a0c727f376..6699d0a73e6501bb3a688c4fdb7a2dfb0ac21a85 100644 (file)
@@ -104,8 +104,22 @@ wm_unit_populate_key(struct brw_context *brw, struct brw_wm_unit_key *key)
    key->uses_kill = fp->UsesKill || ctx->Color.AlphaEnabled;
    key->is_glsl = bfp->isGLSL;
 
-   /* temporary sanity check assertion */
-   ASSERT(bfp->isGLSL == brw_wm_is_glsl(fp));
+   /* If using the fragment shader backend, the program is always
+    * 8-wide.
+    */
+   if (ctx->Shader.CurrentProgram) {
+      int i;
+
+      for (i = 0; i < ctx->Shader.CurrentProgram->_NumLinkedShaders; i++) {
+        struct brw_shader *shader =
+           (struct brw_shader *)ctx->Shader.CurrentProgram->_LinkedShaders[i];;
+
+        if (shader->base.Type == GL_FRAGMENT_SHADER &&
+            shader->ir != NULL) {
+           key->is_glsl = GL_TRUE;
+        }
+      }
+   }
 
    /* _NEW_DEPTH */
    key->stats_wm = intel->stats_wm;