i965g: update to similiar gen stuff as i965
[mesa.git] / src / gallium / drivers / i965 / brw_sf_state.c
index 648a16a038cd42a6d439c94e72b8bcdde57b5437..eec024650ce1362dc7b6c610faccedb4381aca10 100644 (file)
   *   Keith Whitwell <keith@tungstengraphics.com>
   */
    
+#include "util/u_math.h"
 
+#include "pipe/p_state.h"
 
 #include "brw_context.h"
 #include "brw_state.h"
 #include "brw_defines.h"
+#include "brw_debug.h"
+#include "brw_pipe_rast.h"
 
-static void upload_sf_vp(struct brw_context *brw)
+static enum pipe_error upload_sf_vp(struct brw_context *brw)
 {
-   const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
+   const struct pipe_viewport_state *vp = &brw->curr.viewport;
+   const struct pipe_scissor_state *scissor = &brw->curr.scissor;
    struct brw_sf_viewport sfv;
-   GLfloat y_scale, y_bias;
-   const GLfloat *v = ctx->Viewport._WindowMap.m;
+   enum pipe_error ret;
 
    memset(&sfv, 0, sizeof(sfv));
 
-   y_scale = 1.0;
-   y_bias = 0;
+   /* PIPE_NEW_VIEWPORT, PIPE_NEW_SCISSOR */
 
-   /* _NEW_VIEWPORT */
+   sfv.viewport.m00 = vp->scale[0];
+   sfv.viewport.m11 = vp->scale[1];
+   sfv.viewport.m22 = vp->scale[2];
+   sfv.viewport.m30 = vp->translate[0];
+   sfv.viewport.m31 = vp->translate[1];
+   sfv.viewport.m32 = vp->translate[2];
 
-   sfv.viewport.m00 = v[MAT_SX];
-   sfv.viewport.m11 = v[MAT_SY] * y_scale;
-   sfv.viewport.m22 = v[MAT_SZ] * depth_scale;
-   sfv.viewport.m30 = v[MAT_TX];
-   sfv.viewport.m31 = v[MAT_TY] * y_scale + y_bias;
-   sfv.viewport.m32 = v[MAT_TZ] * depth_scale;
+   sfv.scissor.xmin = scissor->minx;
+   sfv.scissor.xmax = scissor->maxx - 1; /* ? */
+   sfv.scissor.ymin = scissor->miny;
+   sfv.scissor.ymax = scissor->maxy - 1; /* ? */
 
-   /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT
-    * for DrawBuffer->_[XY]{min,max}
-    */
+   ret = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0,
+                         &brw->sf.vp_bo );
+   if (ret)
+      return ret;
 
-   /* The scissor only needs to handle the intersection of drawable and
-    * scissor rect.
-    *
-    * Note that the hardware's coordinates are inclusive, while Mesa's min is
-    * inclusive but max is exclusive.
-    */
-   /* Y=0=bottom */
-   sfv.scissor.xmin = ctx->DrawBuffer->_Xmin;
-   sfv.scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
-   sfv.scissor.ymin = ctx->DrawBuffer->_Ymin;
-   sfv.scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
-
-   brw->sws->bo_unreference(brw->sf.vp_bo);
-   brw->sf.vp_bo = brw_cache_data( &brw->cache, BRW_SF_VP, &sfv, NULL, 0 );
+   return PIPE_OK;
 }
 
 const struct brw_tracked_state brw_sf_vp = {
    .dirty = {
-      .mesa  = (_NEW_VIEWPORT | 
-               _NEW_SCISSOR |
-               _NEW_BUFFERS),
+      .mesa  = (PIPE_NEW_VIEWPORT | 
+               PIPE_NEW_SCISSOR),
       .brw   = 0,
       .cache = 0
    },
@@ -90,15 +83,17 @@ const struct brw_tracked_state brw_sf_vp = {
 struct brw_sf_unit_key {
    unsigned int total_grf;
    unsigned int urb_entry_read_length;
-
    unsigned int nr_urb_entries, urb_size, sfsize;
-
-   GLenum front_face, cull_face, provoking_vertex;
+   
    unsigned scissor:1;
    unsigned line_smooth:1;
    unsigned point_sprite:1;
    unsigned point_attenuated:1;
-   unsigned render_to_fbo:1;
+   unsigned front_ccw:1;
+   unsigned cull_face:2;
+   unsigned flatshade_first:1;
+   unsigned gl_rasterization_rules:1;
+   unsigned line_last_pixel_enable:1;
    float line_width;
    float point_size;
 };
@@ -106,6 +101,7 @@ struct brw_sf_unit_key {
 static void
 sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
 {
+   const struct pipe_rasterizer_state *rast = &brw->curr.rast->templ;
    memset(key, 0, sizeof(*key));
 
    /* CACHE_NEW_SF_PROG */
@@ -117,44 +113,42 @@ sf_unit_populate_key(struct brw_context *brw, struct brw_sf_unit_key *key)
    key->urb_size = brw->urb.vsize;
    key->sfsize = brw->urb.sfsize;
 
-   key->scissor = ctx->Scissor.Enabled;
-   key->front_face = ctx->Polygon.FrontFace;
-
-   if (ctx->Polygon.CullFlag)
-      key->cull_face = ctx->Polygon.CullFaceMode;
-   else
-      key->cull_face = GL_NONE;
-
-   key->line_width = ctx->Line.Width;
-   key->line_smooth = ctx->Line.SmoothFlag;
-
-   key->point_sprite = ctx->Point.PointSprite;
-   key->point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
-   key->point_attenuated = ctx->Point._Attenuated;
+   /* PIPE_NEW_RAST */
+   key->scissor = rast->scissor;
+   key->front_ccw = rast->front_ccw;
+   key->cull_face = rast->cull_face;
+   key->line_smooth = rast->line_smooth;
+   key->line_width = rast->line_width;
+   key->flatshade_first = rast->flatshade_first;
+   key->line_last_pixel_enable = rast->line_last_pixel;
+   key->gl_rasterization_rules = rast->gl_rasterization_rules;
 
-   /* _NEW_LIGHT */
-   key->provoking_vertex = ctx->Light.ProvokingVertex;
+   key->point_sprite = rast->sprite_coord_enable ? 1 : 0;
+   key->point_attenuated = rast->point_size_per_vertex;
 
-   key->render_to_fbo = 1;
+   key->point_size = rast->point_size;
 }
 
-static struct brw_winsys_buffer *
-sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
-                       struct brw_winsys_buffer **reloc_bufs)
+static enum pipe_error
+sf_unit_create_from_key(struct brw_context *brw,
+                        struct brw_sf_unit_key *key,
+                        struct brw_winsys_reloc *reloc,
+                        struct brw_winsys_buffer **bo_out)
 {
    struct brw_sf_unit_state sf;
-   struct brw_winsys_buffer *bo;
+   enum pipe_error ret;
    int chipset_max_threads;
    memset(&sf, 0, sizeof(sf));
 
-   sf.thread0.grf_reg_count = ALIGN(key->total_grf, 16) / 16 - 1;
-   sf.thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */
+   sf.thread0.grf_reg_count = align(key->total_grf, 16) / 16 - 1;
+   /* reloc */
+   sf.thread0.kernel_start_pointer = 0;
 
    sf.thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
 
    sf.thread3.dispatch_grf_start_reg = 3;
 
-   if (BRW_IS_IGDNG(brw))
+   if (brw->gen == 5)
        sf.thread3.urb_entry_read_offset = 3;
    else
        sf.thread3.urb_entry_read_offset = 1;
@@ -167,49 +161,49 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
    /* Each SF thread produces 1 PUE, and there can be up to 24(Pre-IGDNG) or 
     * 48(IGDNG) threads 
     */
-   if (BRW_IS_IGDNG(brw))
+   if (brw->gen == 5)
       chipset_max_threads = 48;
    else
       chipset_max_threads = 24;
 
    sf.thread4.max_threads = MIN2(chipset_max_threads, key->nr_urb_entries) - 1;
 
-   if (INTEL_DEBUG & DEBUG_SINGLE_THREAD)
+   if (BRW_DEBUG & DEBUG_SINGLE_THREAD)
       sf.thread4.max_threads = 0;
 
-   if (INTEL_DEBUG & DEBUG_STATS)
+   if (BRW_DEBUG & DEBUG_STATS)
       sf.thread4.stats_enable = 1;
 
    /* CACHE_NEW_SF_VP */
-   sf.sf5.sf_viewport_state_offset = brw->sf.vp_bo->offset >> 5; /* reloc */
+   /* reloc */
+   sf.sf5.sf_viewport_state_offset = 0;
 
    sf.sf5.viewport_transform = 1;
 
-   /* _NEW_SCISSOR */
    if (key->scissor)
       sf.sf6.scissor = 1;
 
-   /* _NEW_POLYGON */
-   if (key->front_face == GL_CCW)
+   if (key->front_ccw)
       sf.sf5.front_winding = BRW_FRONTWINDING_CCW;
    else
       sf.sf5.front_winding = BRW_FRONTWINDING_CW;
 
    switch (key->cull_face) {
-   case GL_FRONT:
+   case PIPE_FACE_FRONT:
       sf.sf6.cull_mode = BRW_CULLMODE_FRONT;
       break;
-   case GL_BACK:
+   case PIPE_FACE_BACK:
       sf.sf6.cull_mode = BRW_CULLMODE_BACK;
       break;
-   case GL_FRONT_AND_BACK:
+   case PIPE_FACE_FRONT_AND_BACK:
       sf.sf6.cull_mode = BRW_CULLMODE_BOTH;
       break;
-   case GL_NONE:
+   case PIPE_FACE_NONE:
       sf.sf6.cull_mode = BRW_CULLMODE_NONE;
       break;
    default:
       assert(0);
+      sf.sf6.cull_mode = BRW_CULLMODE_NONE;
       break;
    }
 
@@ -223,34 +217,12 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
    else if (sf.sf6.line_width <= 0x2)
        sf.sf6.line_width = 0;
 
-   /* _NEW_BUFFERS */
-   key->render_to_fbo = 1;
-   if (!key->render_to_fbo) {
-      /* Rendering to an OpenGL window */
-      sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
-   }
-   else {
-      /* If rendering to an FBO, the pixel coordinate system is
-       * inverted with respect to the normal OpenGL coordinate
-       * system, so BRW_RASTRULE_LOWER_RIGHT is correct.
-       * But this value is listed as "Reserved, but not seen as useful"
-       * in Intel documentation (page 212, "Point Rasterization Rule",
-       * section 7.4 "SF Pipeline State Summary", of document
-       * "IntelĀ® 965 Express Chipset Family and IntelĀ® G35 Express
-       * Chipset Graphics Controller Programmer's Reference Manual,
-       * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
-       * available at 
-       *     http://intellinuxgraphics.org/documentation.html
-       * at the time of this writing).
-       *
-       * It does work on at least some devices, if not all;
-       * if devices that don't support it can be identified,
-       * the likely failure case is that points are rasterized
-       * incorrectly, which is no worse than occurs without
-       * the value, so we're using it here.
-       */
-      sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
-   }
+   /* XXX: gl_rasterization_rules?  something else?
+    */
+   sf.sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
+   sf.sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
+   sf.sf6.point_rast_rule = 1;
+
    /* XXX clamp max depends on AA vs. non-AA */
 
    /* _NEW_POINT */
@@ -261,7 +233,7 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
 
    /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
     */
-   if (key->provoking_vertex == GL_LAST_VERTEX_CONVENTION) {
+   if (!key->flatshade_first) {
       sf.sf7.trifan_pv = 2;
       sf.sf7.linestrip_pv = 1;
       sf.sf7.tristrip_pv = 2;
@@ -270,67 +242,87 @@ sf_unit_create_from_key(struct brw_context *brw, struct brw_sf_unit_key *key,
       sf.sf7.linestrip_pv = 0;
       sf.sf7.tristrip_pv = 0;
    }
-   sf.sf7.line_last_pixel_enable = 0;
+
+   sf.sf7.line_last_pixel_enable = key->line_last_pixel_enable;
 
    /* Set bias for OpenGL rasterization rules:
     */
-   sf.sf6.dest_org_vbias = 0x8;
-   sf.sf6.dest_org_hbias = 0x8;
+   if (key->gl_rasterization_rules) {
+      sf.sf6.dest_org_vbias = 0x8;
+      sf.sf6.dest_org_hbias = 0x8;
+   }
+   else {
+      sf.sf6.dest_org_vbias = 0x0;
+      sf.sf6.dest_org_hbias = 0x0;
+   }
+
+   ret = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
+                          key, sizeof(*key),
+                          reloc, 2,
+                          &sf, sizeof(sf),
+                          NULL, NULL,
+                          bo_out);
+   if (ret)
+      return ret;
+
+   
+   return PIPE_OK;
+}
 
-   bo = brw_upload_cache(&brw->cache, BRW_SF_UNIT,
-                        key, sizeof(*key),
-                        reloc_bufs, 2,
-                        &sf, sizeof(sf),
-                        NULL, NULL);
+static enum pipe_error upload_sf_unit( struct brw_context *brw )
+{
+   struct brw_sf_unit_key key;
+   struct brw_winsys_reloc reloc[2];
+   unsigned total_grf;
+   unsigned viewport_transform;
+   unsigned front_winding;
+   enum pipe_error ret;
 
-   /* STATE_PREFETCH command description describes this state as being
-    * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
+   sf_unit_populate_key(brw, &key);
+   
+   /* XXX: cut this crap and pre calculate the key:
     */
+   total_grf = (align(key.total_grf, 16) / 16 - 1);
+   viewport_transform = 1;
+   front_winding = (key.front_ccw ?
+                    BRW_FRONTWINDING_CCW :
+                    BRW_FRONTWINDING_CW);
+
    /* Emit SF program relocation */
-   dri_bo_emit_reloc(bo,
-                    I915_GEM_DOMAIN_INSTRUCTION, 0,
-                    sf.thread0.grf_reg_count << 1,
-                    offsetof(struct brw_sf_unit_state, thread0),
-                    brw->sf.prog_bo);
+   make_reloc(&reloc[0],
+              BRW_USAGE_STATE,
+              total_grf << 1,
+              offsetof(struct brw_sf_unit_state, thread0),
+              brw->sf.prog_bo);
 
    /* Emit SF viewport relocation */
-   dri_bo_emit_reloc(bo,
-                    I915_GEM_DOMAIN_INSTRUCTION, 0,
-                    sf.sf5.front_winding | (sf.sf5.viewport_transform << 1),
-                    offsetof(struct brw_sf_unit_state, sf5),
-                    brw->sf.vp_bo);
+   make_reloc(&reloc[1],
+              BRW_USAGE_STATE,
+              front_winding | (viewport_transform << 1),
+              offsetof(struct brw_sf_unit_state, sf5),
+              brw->sf.vp_bo);
 
-   return bo;
-}
 
-static void upload_sf_unit( struct brw_context *brw )
-{
-   struct brw_sf_unit_key key;
-   struct brw_winsys_buffer *reloc_bufs[2];
+   if (brw_search_cache(&brw->cache, BRW_SF_UNIT,
+                        &key, sizeof(key),
+                        reloc, 2,
+                        NULL,
+                        &brw->sf.state_bo))
+      return PIPE_OK;
 
-   sf_unit_populate_key(brw, &key);
 
-   reloc_bufs[0] = brw->sf.prog_bo;
-   reloc_bufs[1] = brw->sf.vp_bo;
+   ret = sf_unit_create_from_key(brw, &key,
+                                 reloc,
+                                 &brw->sf.state_bo);
+   if (ret)
+      return ret;
 
-   brw->sws->bo_unreference(brw->sf.state_bo);
-   brw->sf.state_bo = brw_search_cache(&brw->cache, BRW_SF_UNIT,
-                                      &key, sizeof(key),
-                                      reloc_bufs, 2,
-                                      NULL);
-   if (brw->sf.state_bo == NULL) {
-      brw->sf.state_bo = sf_unit_create_from_key(brw, &key, reloc_bufs);
-   }
+   return PIPE_OK;
 }
 
 const struct brw_tracked_state brw_sf_unit = {
    .dirty = {
-      .mesa  = (_NEW_POLYGON | 
-               _NEW_LIGHT |
-               _NEW_LINE | 
-               _NEW_POINT | 
-               _NEW_SCISSOR |
-               _NEW_BUFFERS),
+      .mesa  = (PIPE_NEW_RAST),
       .brw   = BRW_NEW_URB_FENCE,
       .cache = (CACHE_NEW_SF_VP |
                CACHE_NEW_SF_PROG)