llvmpipe: fix denorm handling for r11g11b10_float format when blending
[mesa.git] / src / gallium / drivers / llvmpipe / lp_rast.c
index dcd66ab00c1149c774c63d876f1ecbe05571a45b..0ae5976eedc7aaa8e7b83647127d5bbefeec62ee 100644 (file)
@@ -35,6 +35,7 @@
 #include "os/os_time.h"
 
 #include "lp_scene_queue.h"
+#include "lp_context.h"
 #include "lp_debug.h"
 #include "lp_fence.h"
 #include "lp_perf.h"
@@ -61,7 +62,6 @@ static void
 lp_rast_begin( struct lp_rasterizer *rast,
                struct lp_scene *scene )
 {
-
    rast->curr_scene = scene;
 
    LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
@@ -100,12 +100,34 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
    task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ?
                     task->scene->fb.height - y * TILE_SIZE : TILE_SIZE;
 
+   task->thread_data.vis_counter = 0;
+   task->ps_invocations = 0;
+
    /* reset pointers to color and depth tile(s) */
    memset(task->color_tiles, 0, sizeof(task->color_tiles));
    task->depth_tile = NULL;
 }
 
 
+/**
+ * Examine a framebuffer object to determine if any of the colorbuffers
+ * use a pure integer format.
+ * XXX this could be a gallium utility function if useful elsewhere.
+ */
+static boolean
+is_fb_pure_integer(const struct pipe_framebuffer_state *fb)
+{
+   unsigned i;
+   for (i = 0; i < fb->nr_cbufs; i++) {
+      if (fb->cbufs[i] &&
+          util_format_is_pure_integer(fb->cbufs[i]->format)) {
+         return TRUE;
+      }
+   }
+   return FALSE;
+}
+
+
 /**
  * Clear the rasterizer's current color tile.
  * This is a bin command called during bin processing.
@@ -121,7 +143,7 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
       unsigned i;
       union util_color uc;
 
-      if (util_format_is_pure_integer(scene->fb.cbufs[0]->format)) {
+      if (is_fb_pure_integer(&scene->fb)) {
          /*
           * We expect int/uint clear values here, though some APIs
           * might disagree (but in any case util_pack_color()
@@ -135,8 +157,6 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
 
          for (i = 0; i < scene->fb.nr_cbufs; i++) {
             enum pipe_format format = scene->fb.cbufs[i]->format;
-            unsigned layer;
-            uint8_t *map_layer = scene->cbufs[i].map;
 
             if (util_format_is_pure_sint(format)) {
                util_format_write_4i(format, arg.clear_color.i, 0, &uc, 0, 0, 0, 1, 1);
@@ -146,17 +166,17 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
                util_format_write_4ui(format, arg.clear_color.ui, 0, &uc, 0, 0, 0, 1, 1);
             }
 
-            for (layer = 0; layer <= scene->fb_max_layer; layer++) {
-               util_fill_rect(map_layer,
-                              scene->fb.cbufs[i]->format,
-                              scene->cbufs[i].stride,
-                              task->x,
-                              task->y,
-                              task->width,
-                              task->height,
-                              &uc);
-               map_layer += scene->cbufs[i].layer_stride;
-            }
+            util_fill_box(scene->cbufs[i].map,
+                          format,
+                          scene->cbufs[i].stride,
+                          scene->cbufs[i].layer_stride,
+                          task->x,
+                          task->y,
+                          0,
+                          task->width,
+                          task->height,
+                          scene->fb_max_layer + 1,
+                          &uc);
          }
       }
       else {
@@ -173,21 +193,21 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
                     clear_color[3]);
 
          for (i = 0; i < scene->fb.nr_cbufs; i++) {
-            unsigned layer;
-            uint8_t *map_layer = scene->cbufs[i].map;
-
-            for (layer = 0; layer <= scene->fb_max_layer; layer++) {
+            if (scene->fb.cbufs[i]) {
                util_pack_color(arg.clear_color.f,
                                scene->fb.cbufs[i]->format, &uc);
-               util_fill_rect(map_layer,
-                              scene->fb.cbufs[i]->format,
-                              scene->cbufs[i].stride,
-                              task->x,
-                              task->y,
-                              task->width,
-                              task->height,
-                              &uc);
-               map_layer += scene->cbufs[i].layer_stride;
+
+               util_fill_box(scene->cbufs[i].map,
+                             scene->fb.cbufs[i]->format,
+                             scene->cbufs[i].stride,
+                             scene->cbufs[i].layer_stride,
+                             task->x,
+                             task->y,
+                             0,
+                             task->width,
+                             task->height,
+                             scene->fb_max_layer + 1,
+                             &uc);
             }
          }
       }
@@ -368,6 +388,9 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
             depth_stride = scene->zsbuf.stride;
          }
 
+         /* Propagate non-interpolated raster state. */
+         task->thread_data.raster_state.viewport_index = inputs->viewport_index;
+
          /* run shader on 4x4 block */
          BEGIN_JIT_CALL(state, task);
          variant->jit_function[RAST_WHOLE]( &state->jit_context,
@@ -442,8 +465,15 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
 
    /* color buffer */
    for (i = 0; i < scene->fb.nr_cbufs; i++) {
-      stride[i] = scene->cbufs[i].stride;
-      color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y, inputs->layer);
+      if (scene->fb.cbufs[i]) {
+         stride[i] = scene->cbufs[i].stride;
+         color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y,
+                                                               inputs->layer);
+      }
+      else {
+         stride[i] = 0;
+         color[i] = NULL;
+      }
    }
 
    /* depth buffer */
@@ -459,6 +489,13 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
     * allocated 4x4 blocks hence need to filter them out here.
     */
    if ((x % TILE_SIZE) < task->width && (y % TILE_SIZE) < task->height) {
+      /* not very accurate would need a popcount on the mask */
+      /* always count this not worth bothering? */
+      task->ps_invocations += 1 * variant->ps_inv_multiplier;
+
+      /* Propagate non-interpolated raster state. */
+      task->thread_data.raster_state.viewport_index = inputs->viewport_index;
+
       /* run shader on 4x4 block */
       BEGIN_JIT_CALL(state, task);
       variant->jit_function[RAST_EDGE_TEST](&state->jit_context,
@@ -490,24 +527,18 @@ lp_rast_begin_query(struct lp_rasterizer_task *task,
 {
    struct llvmpipe_query *pq = arg.query_obj;
 
-   assert(task->query[pq->type] == NULL);
-
    switch (pq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
-      task->thread_data.vis_counter = 0;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      pq->start[task->thread_index] = task->thread_data.vis_counter;
       break;
-   case PIPE_QUERY_PRIMITIVES_GENERATED:
-   case PIPE_QUERY_PRIMITIVES_EMITTED:
-   case PIPE_QUERY_SO_STATISTICS:
    case PIPE_QUERY_PIPELINE_STATISTICS:
-   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+      pq->start[task->thread_index] = task->ps_invocations;
       break;
    default:
       assert(0);
       break;
    }
-
-   task->query[pq->type] = pq;
 }
 
 
@@ -521,29 +552,26 @@ lp_rast_end_query(struct lp_rasterizer_task *task,
                   const union lp_rast_cmd_arg arg)
 {
    struct llvmpipe_query *pq = arg.query_obj;
-   assert(task->query[pq->type] == pq || pq->type == PIPE_QUERY_TIMESTAMP);
 
    switch (pq->type) {
    case PIPE_QUERY_OCCLUSION_COUNTER:
-      pq->count[task->thread_index] += task->thread_data.vis_counter;
+   case PIPE_QUERY_OCCLUSION_PREDICATE:
+      pq->end[task->thread_index] +=
+         task->thread_data.vis_counter - pq->start[task->thread_index];
+      pq->start[task->thread_index] = 0;
       break;
    case PIPE_QUERY_TIMESTAMP:
-      pq->count[task->thread_index] = os_time_get_nano();
+      pq->end[task->thread_index] = os_time_get_nano();
       break;
-   case PIPE_QUERY_PRIMITIVES_GENERATED:
-   case PIPE_QUERY_PRIMITIVES_EMITTED:
-   case PIPE_QUERY_SO_STATISTICS:
    case PIPE_QUERY_PIPELINE_STATISTICS:
-   case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+      pq->end[task->thread_index] +=
+         task->ps_invocations - pq->start[task->thread_index];
+      pq->start[task->thread_index] = 0;
       break;
    default:
       assert(0);
       break;
    }
-
-   if (task->query[pq->type] == pq) {
-      task->query[pq->type] = NULL;
-   }
 }
 
 
@@ -564,10 +592,8 @@ lp_rast_tile_end(struct lp_rasterizer_task *task)
 {
    unsigned i;
 
-   for (i = 0; i < PIPE_QUERY_TYPES; ++i) {
-      if (task->query[i]) {
-         lp_rast_end_query(task, lp_rast_arg_query(task->query[i]));
-      }
+   for (i = 0; i < task->scene->num_active_queries; ++i) {
+      lp_rast_end_query(task, lp_rast_arg_query(task->scene->active_queries[i]));
    }
 
    /* debug */
@@ -597,6 +623,17 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] =
    lp_rast_begin_query,
    lp_rast_end_query,
    lp_rast_set_state,
+   lp_rast_triangle_32_1,
+   lp_rast_triangle_32_2,
+   lp_rast_triangle_32_3,
+   lp_rast_triangle_32_4,
+   lp_rast_triangle_32_5,
+   lp_rast_triangle_32_6,
+   lp_rast_triangle_32_7,
+   lp_rast_triangle_32_8,
+   lp_rast_triangle_32_3_4,
+   lp_rast_triangle_32_3_16,
+   lp_rast_triangle_32_4_16
 };
 
 
@@ -706,6 +743,12 @@ lp_rast_queue_scene( struct lp_rasterizer *rast,
 
    if (rast->num_threads == 0) {
       /* no threading */
+      unsigned fpstate = util_fpstate_get();
+
+      /* Make sure that denorms are treated like zeros. This is 
+       * the behavior required by D3D10. OpenGL doesn't care.
+       */
+      util_fpstate_set_denorms_to_zero(fpstate);
 
       lp_rast_begin( rast, scene );
 
@@ -713,6 +756,8 @@ lp_rast_queue_scene( struct lp_rasterizer *rast,
 
       lp_rast_end( rast );
 
+      util_fpstate_set(fpstate);
+
       rast->curr_scene = NULL;
    }
    else {
@@ -760,6 +805,12 @@ static PIPE_THREAD_ROUTINE( thread_function, init_data )
    struct lp_rasterizer_task *task = (struct lp_rasterizer_task *) init_data;
    struct lp_rasterizer *rast = task->rast;
    boolean debug = false;
+   unsigned fpstate = util_fpstate_get();
+
+   /* Make sure that denorms are treated like zeros. This is 
+    * the behavior required by D3D10. OpenGL doesn't care.
+    */
+   util_fpstate_set_denorms_to_zero(fpstate);
 
    while (1) {
       /* wait for work */
@@ -807,7 +858,7 @@ static PIPE_THREAD_ROUTINE( thread_function, init_data )
       pipe_semaphore_signal(&task->work_done);
    }
 
-   return NULL;
+   return 0;
 }
 
 
@@ -912,11 +963,3 @@ void lp_rast_destroy( struct lp_rasterizer *rast )
 }
 
 
-/** Return number of rasterization threads */
-unsigned
-lp_rast_get_num_threads( struct lp_rasterizer *rast )
-{
-   return rast->num_threads;
-}
-
-