i965/fs: Add support for translating ir_triop_fma into MAD.

[mesa.git] / src / mesa / drivers / dri / i965 / brw_program.c
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c

index 94efa79109187bb5a8ff77e1a0717145111cc0e4..9a517be0b3443a544863cf89b933e5e80c14d57a 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -29,6 +29,7 @@
    *   Keith Whitwell <keith@tungstengraphics.com>
    */
    
+#include <pthread.h>
  #include "main/imports.h"
  #include "main/enums.h"
  #include "main/shaderobj.h"
@@ -36,11 +37,21 @@
  #include "program/program.h"
  #include "program/programopt.h"
  #include "tnl/tnl.h"
-#include "talloc.h"
+#include "glsl/ralloc.h"
  
  #include "brw_context.h"
  #include "brw_wm.h"
  
+static unsigned
+get_new_program_id(struct intel_screen *screen)
+{
+   static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
+   pthread_mutex_lock(&m);
+   unsigned id = screen->program_id++;
+   pthread_mutex_unlock(&m);
+   return id;
+}
+
  static void brwBindProgram( struct gl_context *ctx,
                             GLenum target, 
                             struct gl_program *prog )
@@ -51,6 +62,9 @@ static void brwBindProgram( struct gl_context *ctx,
     case GL_VERTEX_PROGRAM_ARB: 
        brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
        break;
+   case MESA_GEOMETRY_PROGRAM:
+      brw->state.dirty.brw |= BRW_NEW_GEOMETRY_PROGRAM;
+      break;
     case GL_FRAGMENT_PROGRAM_ARB:
        brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
        break;
@@ -67,7 +81,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
     case GL_VERTEX_PROGRAM_ARB: {
        struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
        if (prog) {
-        prog->id = brw->program_id++;
+        prog->id = get_new_program_id(brw->intelScreen);
  
          return _mesa_init_vertex_program( ctx, &prog->program,
                                              target, id );
@@ -79,7 +93,7 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
     case GL_FRAGMENT_PROGRAM_ARB: {
        struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
        if (prog) {
-        prog->id = brw->program_id++;
+        prog->id = get_new_program_id(brw->intelScreen);
  
          return _mesa_init_fragment_program( ctx, &prog->program,
                                              target, id );
@@ -88,8 +102,20 @@ static struct gl_program *brwNewProgram( struct gl_context *ctx,
          return NULL;
     }
  
+   case MESA_GEOMETRY_PROGRAM: {
+      struct brw_geometry_program *prog = CALLOC_STRUCT(brw_geometry_program);
+      if (prog) {
+         prog->id = get_new_program_id(brw->intelScreen);
+
+         return _mesa_init_geometry_program(ctx, &prog->program, target, id);
+      } else {
+         return NULL;
+      }
+   }
+
     default:
-      return _mesa_new_program(ctx, target, id);
+      assert(!"Unsupported target in brwNewProgram()");
+      return NULL;
     }
  }
  
@@ -100,59 +126,34 @@ static void brwDeleteProgram( struct gl_context *ctx,
  }
  
  
-static GLboolean brwIsProgramNative( struct gl_context *ctx,
-                                    GLenum target, 
-                                    struct gl_program *prog )
+static GLboolean
+brwIsProgramNative(struct gl_context *ctx,
+                  GLenum target,
+                  struct gl_program *prog)
  {
-   return GL_TRUE;
+   return true;
  }
  
-static void
-shader_error(struct gl_context *ctx, struct gl_program *prog, const char *msg)
-{
-   struct gl_shader_program *shader;
-
-   shader = _mesa_lookup_shader_program(ctx, prog->Id);
-
-   if (shader) {
-      shader->InfoLog = talloc_strdup_append(shader->InfoLog, msg);
-      shader->LinkStatus = GL_FALSE;
-   }
-}
-
-static GLboolean brwProgramStringNotify( struct gl_context *ctx,
-                                         GLenum target,
-                                         struct gl_program *prog )
+static GLboolean
+brwProgramStringNotify(struct gl_context *ctx,
+                      GLenum target,
+                      struct gl_program *prog)
  {
     struct brw_context *brw = brw_context(ctx);
-   int i;
  
-   if (target == GL_FRAGMENT_PROGRAM_ARB) {
+   switch (target) {
+   case GL_FRAGMENT_PROGRAM_ARB: {
        struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog;
        struct brw_fragment_program *newFP = brw_fragment_program(fprog);
        const struct brw_fragment_program *curFP =
           brw_fragment_program_const(brw->fragment_program);
-      struct gl_shader_program *shader_program;
-
-      if (fprog->FogOption) {
-         _mesa_append_fog_code(ctx, fprog);
-         fprog->FogOption = GL_NONE;
-      }
  
        if (newFP == curFP)
          brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM;
-      newFP->id = brw->program_id++;      
-
-      /* Don't reject fragment shaders for their Mesa IR state when we're
-       * using the new FS backend.
-       */
-      shader_program = _mesa_lookup_shader_program(ctx, prog->Id);
-      if (shader_program
-         && shader_program->_LinkedShaders[MESA_SHADER_FRAGMENT]) {
-        return GL_TRUE;
-      }
+      newFP->id = get_new_program_id(brw->intelScreen);
+      break;
     }
-   else if (target == GL_VERTEX_PROGRAM_ARB) {
+   case GL_VERTEX_PROGRAM_ARB: {
        struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog;
        struct brw_vertex_program *newVP = brw_vertex_program(vprog);
        const struct brw_vertex_program *curVP =
@@ -163,72 +164,75 @@ static GLboolean brwProgramStringNotify( struct gl_context *ctx,
        if (newVP->program.IsPositionInvariant) {
          _mesa_insert_mvp_code(ctx, &newVP->program);
        }
-      newVP->id = brw->program_id++;      
+      newVP->id = get_new_program_id(brw->intelScreen);
  
        /* Also tell tnl about it:
         */
        _tnl_program_string(ctx, target, prog);
+      break;
+   }
+   default:
+      /*
+       * driver->ProgramStringNotify is only called for ARB programs, fixed
+       * function vertex programs, and ir_to_mesa (which isn't used by the
+       * i965 back-end).  Therefore, even after geometry shaders are added,
+       * this function should only ever be called with a target of
+       * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
+       */
+      assert(!"Unexpected target in brwProgramStringNotify");
+      break;
     }
  
-   /* Reject programs with subroutines, which are totally broken at the moment
-    * (all program flows return when any program flow returns, and
-    * the VS also hangs if a function call calls a function.
-    *
-    * See piglit glsl-{vs,fs}-functions-[23] tests.
-    */
-   for (i = 0; i < prog->NumInstructions; i++) {
-      struct prog_instruction *inst = prog->Instructions + i;
-      int r;
-
-      if (prog->Instructions[i].Opcode == OPCODE_CAL) {
-        shader_error(ctx, prog,
-                     "i965 driver doesn't yet support uninlined function "
-                     "calls.  Move to using a single return statement at "
-                     "the end of the function to work around it.\n");
-        return GL_FALSE;
-      }
+   brw_add_texrect_params(prog);
  
-      if (prog->Instructions[i].Opcode == OPCODE_RET) {
-        shader_error(ctx, prog,
-                     "i965 driver doesn't yet support \"return\" "
-                     "from main().\n");
-        return GL_FALSE;
-      }
+   return true;
+}
  
-      for (r = 0; r < _mesa_num_inst_src_regs(inst->Opcode); r++) {
-        if (prog->Instructions[i].SrcReg[r].RelAddr &&
-            prog->Instructions[i].SrcReg[r].File == PROGRAM_INPUT) {
-           shader_error(ctx, prog,
-                        "Variable indexing of shader inputs unsupported\n");
-           return GL_FALSE;
-        }
-      }
+void
+brw_add_texrect_params(struct gl_program *prog)
+{
+   for (int texunit = 0; texunit < BRW_MAX_TEX_UNIT; texunit++) {
+      if (!(prog->TexturesUsed[texunit] & (1 << TEXTURE_RECT_INDEX)))
+         continue;
+
+      int tokens[STATE_LENGTH] = {
+         STATE_INTERNAL,
+         STATE_TEXRECT_SCALE,
+         texunit,
+         0,
+         0
+      };
+
+      _mesa_add_state_reference(prog->Parameters, (gl_state_index *)tokens);
+   }
+}
  
-      if (target == GL_FRAGMENT_PROGRAM_ARB &&
-         prog->Instructions[i].DstReg.RelAddr &&
-         prog->Instructions[i].DstReg.File == PROGRAM_OUTPUT) {
-        shader_error(ctx, prog,
-                     "Variable indexing of FS outputs unsupported\n");
-        return GL_FALSE;
-      }
-      if (target == GL_FRAGMENT_PROGRAM_ARB) {
-        if ((prog->Instructions[i].DstReg.RelAddr &&
-             prog->Instructions[i].DstReg.File == PROGRAM_TEMPORARY) ||
-            (prog->Instructions[i].SrcReg[0].RelAddr &&
-             prog->Instructions[i].SrcReg[0].File == PROGRAM_TEMPORARY) ||
-            (prog->Instructions[i].SrcReg[1].RelAddr &&
-             prog->Instructions[i].SrcReg[1].File == PROGRAM_TEMPORARY) ||
-            (prog->Instructions[i].SrcReg[2].RelAddr &&
-             prog->Instructions[i].SrcReg[2].File == PROGRAM_TEMPORARY)) {
-           shader_error(ctx, prog,
-                        "Variable indexing of variable arrays in the FS "
-                        "unsupported\n");
-           return GL_FALSE;
-        }
-      }
+/* Per-thread scratch space is a power-of-two multiple of 1KB. */
+int
+brw_get_scratch_size(int size)
+{
+   int i;
+
+   for (i = 1024; i < size; i *= 2)
+      ;
+
+   return i;
+}
+
+void
+brw_get_scratch_bo(struct brw_context *brw,
+                  drm_intel_bo **scratch_bo, int size)
+{
+   drm_intel_bo *old_bo = *scratch_bo;
+
+   if (old_bo && old_bo->size < size) {
+      drm_intel_bo_unreference(old_bo);
+      old_bo = NULL;
     }
  
-   return GL_TRUE;
+   if (!old_bo) {
+      *scratch_bo = drm_intel_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
+   }
  }
  
  void brwInitFragProgFuncs( struct dd_function_table *functions )
@@ -243,7 +247,281 @@ void brwInitFragProgFuncs( struct dd_function_table *functions )
  
     functions->NewShader = brw_new_shader;
     functions->NewShaderProgram = brw_new_shader_program;
-   functions->CompileShader = brw_compile_shader;
     functions->LinkShader = brw_link_shader;
  }
  
+void
+brw_init_shader_time(struct brw_context *brw)
+{
+   const int max_entries = 4096;
+   brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time",
+                                            max_entries * SHADER_TIME_STRIDE,
+                                            4096);
+   brw->shader_time.shader_programs = rzalloc_array(brw, struct gl_shader_program *,
+                                                    max_entries);
+   brw->shader_time.programs = rzalloc_array(brw, struct gl_program *,
+                                             max_entries);
+   brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
+                                          max_entries);
+   brw->shader_time.cumulative = rzalloc_array(brw, uint64_t,
+                                               max_entries);
+   brw->shader_time.max_entries = max_entries;
+}
+
+static int
+compare_time(const void *a, const void *b)
+{
+   uint64_t * const *a_val = a;
+   uint64_t * const *b_val = b;
+
+   /* We don't just subtract because we're turning the value to an int. */
+   if (**a_val < **b_val)
+      return -1;
+   else if (**a_val == **b_val)
+      return 0;
+   else
+      return 1;
+}
+
+static void
+get_written_and_reset(struct brw_context *brw, int i,
+                      uint64_t *written, uint64_t *reset)
+{
+   enum shader_time_shader_type type = brw->shader_time.types[i];
+   assert(type == ST_VS || type == ST_FS8 || type == ST_FS16);
+
+   /* Find where we recorded written and reset. */
+   int wi, ri;
+
+   for (wi = i; brw->shader_time.types[wi] != type + 1; wi++)
+      ;
+
+   for (ri = i; brw->shader_time.types[ri] != type + 2; ri++)
+      ;
+
+   *written = brw->shader_time.cumulative[wi];
+   *reset = brw->shader_time.cumulative[ri];
+}
+
+static void
+print_shader_time_line(const char *stage, const char *name,
+                       int shader_num, uint64_t time, uint64_t total)
+{
+   printf("%-6s%-6s", stage, name);
+
+   if (shader_num != -1)
+      printf("%4d: ", shader_num);
+   else
+      printf("    : ");
+
+   printf("%16lld (%7.2f Gcycles)      %4.1f%%\n",
+          (long long)time,
+          (double)time / 1000000000.0,
+          (double)time / total * 100.0);
+}
+
+static void
+brw_report_shader_time(struct brw_context *brw)
+{
+   if (!brw->shader_time.bo || !brw->shader_time.num_entries)
+      return;
+
+   uint64_t scaled[brw->shader_time.num_entries];
+   uint64_t *sorted[brw->shader_time.num_entries];
+   uint64_t total_by_type[ST_FS16 + 1];
+   memset(total_by_type, 0, sizeof(total_by_type));
+   double total = 0;
+   for (int i = 0; i < brw->shader_time.num_entries; i++) {
+      uint64_t written = 0, reset = 0;
+      enum shader_time_shader_type type = brw->shader_time.types[i];
+
+      sorted[i] = &scaled[i];
+
+      switch (type) {
+      case ST_VS_WRITTEN:
+      case ST_VS_RESET:
+      case ST_FS8_WRITTEN:
+      case ST_FS8_RESET:
+      case ST_FS16_WRITTEN:
+      case ST_FS16_RESET:
+         /* We'll handle these when along with the time. */
+         scaled[i] = 0;
+         continue;
+
+      case ST_VS:
+      case ST_FS8:
+      case ST_FS16:
+         get_written_and_reset(brw, i, &written, &reset);
+         break;
+
+      default:
+         /* I sometimes want to print things that aren't the 3 shader times.
+          * Just print the sum in that case.
+          */
+         written = 1;
+         reset = 0;
+         break;
+      }
+
+      uint64_t time = brw->shader_time.cumulative[i];
+      if (written) {
+         scaled[i] = time / written * (written + reset);
+      } else {
+         scaled[i] = time;
+      }
+
+      switch (type) {
+      case ST_VS:
+      case ST_FS8:
+      case ST_FS16:
+         total_by_type[type] += scaled[i];
+         break;
+      default:
+         break;
+      }
+
+      total += scaled[i];
+   }
+
+   if (total == 0) {
+      printf("No shader time collected yet\n");
+      return;
+   }
+
+   qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
+
+   printf("\n");
+   printf("type          ID      cycles spent                   %% of total\n");
+   for (int s = 0; s < brw->shader_time.num_entries; s++) {
+      const char *shader_name;
+      const char *stage;
+      /* Work back from the sorted pointers times to a time to print. */
+      int i = sorted[s] - scaled;
+
+      if (scaled[i] == 0)
+         continue;
+
+      int shader_num = -1;
+      if (brw->shader_time.shader_programs[i]) {
+         shader_num = brw->shader_time.shader_programs[i]->Name;
+
+         /* The fixed function fragment shader generates GLSL IR with a Name
+          * of 0, and nothing else does.
+          */
+         if (shader_num == 0 &&
+             (brw->shader_time.types[i] == ST_FS8 ||
+              brw->shader_time.types[i] == ST_FS16)) {
+            shader_name = "ff";
+            shader_num = -1;
+         } else {
+            shader_name = "glsl";
+         }
+      } else if (brw->shader_time.programs[i]) {
+         shader_num = brw->shader_time.programs[i]->Id;
+         if (shader_num == 0) {
+            shader_name = "ff";
+            shader_num = -1;
+         } else {
+            shader_name = "prog";
+         }
+      } else {
+         shader_name = "other";
+      }
+
+      switch (brw->shader_time.types[i]) {
+      case ST_VS:
+         stage = "vs";
+         break;
+      case ST_FS8:
+         stage = "fs8";
+         break;
+      case ST_FS16:
+         stage = "fs16";
+         break;
+      default:
+         stage = "other";
+         break;
+      }
+
+      print_shader_time_line(stage, shader_name, shader_num,
+                             scaled[i], total);
+   }
+
+   printf("\n");
+   print_shader_time_line("total", "vs", -1, total_by_type[ST_VS], total);
+   print_shader_time_line("total", "fs8", -1, total_by_type[ST_FS8], total);
+   print_shader_time_line("total", "fs16", -1, total_by_type[ST_FS16], total);
+}
+
+static void
+brw_collect_shader_time(struct brw_context *brw)
+{
+   if (!brw->shader_time.bo)
+      return;
+
+   /* This probably stalls on the last rendering.  We could fix that by
+    * delaying reading the reports, but it doesn't look like it's a big
+    * overhead compared to the cost of tracking the time in the first place.
+    */
+   drm_intel_bo_map(brw->shader_time.bo, true);
+
+   uint32_t *times = brw->shader_time.bo->virtual;
+
+   for (int i = 0; i < brw->shader_time.num_entries; i++) {
+      brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4];
+   }
+
+   /* Zero the BO out to clear it out for our next collection.
+    */
+   memset(times, 0, brw->shader_time.bo->size);
+   drm_intel_bo_unmap(brw->shader_time.bo);
+}
+
+void
+brw_collect_and_report_shader_time(struct brw_context *brw)
+{
+   brw_collect_shader_time(brw);
+
+   if (brw->shader_time.report_time == 0 ||
+       get_time() - brw->shader_time.report_time >= 1.0) {
+      brw_report_shader_time(brw);
+      brw->shader_time.report_time = get_time();
+   }
+}
+
+/**
+ * Chooses an index in the shader_time buffer and sets up tracking information
+ * for our printouts.
+ *
+ * Note that this holds on to references to the underlying programs, which may
+ * change their lifetimes compared to normal operation.
+ */
+int
+brw_get_shader_time_index(struct brw_context *brw,
+                          struct gl_shader_program *shader_prog,
+                          struct gl_program *prog,
+                          enum shader_time_shader_type type)
+{
+   struct gl_context *ctx = &brw->ctx;
+
+   int shader_time_index = brw->shader_time.num_entries++;
+   assert(shader_time_index < brw->shader_time.max_entries);
+   brw->shader_time.types[shader_time_index] = type;
+
+   _mesa_reference_shader_program(ctx,
+                                  &brw->shader_time.shader_programs[shader_time_index],
+                                  shader_prog);
+
+   _mesa_reference_program(ctx,
+                           &brw->shader_time.programs[shader_time_index],
+                           prog);
+
+   return shader_time_index;
+}
+
+void
+brw_destroy_shader_time(struct brw_context *brw)
+{
+   drm_intel_bo_unreference(brw->shader_time.bo);
+   brw->shader_time.bo = NULL;
+}