src/mesa/drivers/dri/i965/brw_sf_state.c

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33
  34 #include "brw_context.h"
  35 #include "brw_state.h"
  36 #include "brw_defines.h"
  37 #include "main/macros.h"
  38
  39 static void upload_sf_vp(struct brw_context *brw)
  40 {
  41    struct intel_context *intel = &brw->intel;
  42    struct gl_context *ctx = &intel->ctx;
  43    const GLfloat depth_scale = 1.0F / ctx->DrawBuffer->_DepthMaxF;
  44    struct brw_sf_viewport *sfv;
  45    GLfloat y_scale, y_bias;
  46    const GLboolean render_to_fbo = (ctx->DrawBuffer->Name != 0);
  47    const GLfloat *v = ctx->Viewport._WindowMap.m;
  48
  49    sfv = brw_state_batch(brw, sizeof(*sfv), 32, &brw->sf.vp_offset);
  50    memset(sfv, 0, sizeof(*sfv));
  51
  52    if (render_to_fbo) {
  53       y_scale = 1.0;
  54       y_bias = 0;
  55    }
  56    else {
  57       y_scale = -1.0;
  58       y_bias = ctx->DrawBuffer->Height;
  59    }
  60
  61    /* _NEW_VIEWPORT */
  62
  63    sfv->viewport.m00 = v[MAT_SX];
  64    sfv->viewport.m11 = v[MAT_SY] * y_scale;
  65    sfv->viewport.m22 = v[MAT_SZ] * depth_scale;
  66    sfv->viewport.m30 = v[MAT_TX];
  67    sfv->viewport.m31 = v[MAT_TY] * y_scale + y_bias;
  68    sfv->viewport.m32 = v[MAT_TZ] * depth_scale;
  69
  70    /* _NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT
  71     * for DrawBuffer->_[XY]{min,max}
  72     */
  73
  74    /* The scissor only needs to handle the intersection of drawable
  75     * and scissor rect, since there are no longer cliprects for shared
  76     * buffers with DRI2.
  77     *
  78     * Note that the hardware's coordinates are inclusive, while Mesa's min is
  79     * inclusive but max is exclusive.
  80     */
  81
  82    if (ctx->DrawBuffer->_Xmin == ctx->DrawBuffer->_Xmax ||
  83        ctx->DrawBuffer->_Ymin == ctx->DrawBuffer->_Ymax) {
  84       /* If the scissor was out of bounds and got clamped to 0
  85        * width/height at the bounds, the subtraction of 1 from
  86        * maximums could produce a negative number and thus not clip
  87        * anything.  Instead, just provide a min > max scissor inside
  88        * the bounds, which produces the expected no rendering.
  89        */
  90       sfv->scissor.xmin = 1;
  91       sfv->scissor.xmax = 0;
  92       sfv->scissor.ymin = 1;
  93       sfv->scissor.ymax = 0;
  94    } else if (render_to_fbo) {
  95       /* texmemory: Y=0=bottom */
  96       sfv->scissor.xmin = ctx->DrawBuffer->_Xmin;
  97       sfv->scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
  98       sfv->scissor.ymin = ctx->DrawBuffer->_Ymin;
  99       sfv->scissor.ymax = ctx->DrawBuffer->_Ymax - 1;
 100    }
 101    else {
 102       /* memory: Y=0=top */
 103       sfv->scissor.xmin = ctx->DrawBuffer->_Xmin;
 104       sfv->scissor.xmax = ctx->DrawBuffer->_Xmax - 1;
 105       sfv->scissor.ymin = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymax;
 106       sfv->scissor.ymax = ctx->DrawBuffer->Height - ctx->DrawBuffer->_Ymin - 1;
 107    }
 108
 109    brw->state.dirty.cache |= CACHE_NEW_SF_VP;
 110 }
 111
 112 const struct brw_tracked_state brw_sf_vp = {
 113    .dirty = {
 114       .mesa  = (_NEW_VIEWPORT |
 115                 _NEW_SCISSOR |
 116                 _NEW_BUFFERS),
 117       .brw   = BRW_NEW_BATCH,
 118       .cache = 0
 119    },
 120    .prepare = upload_sf_vp
 121 };
 122
 123 static void upload_sf_unit( struct brw_context *brw )
 124 {
 125    struct intel_context *intel = &brw->intel;
 126    struct gl_context *ctx = &intel->ctx;
 127    struct brw_sf_unit_state *sf;
 128    drm_intel_bo *bo = intel->batch.bo;
 129    int chipset_max_threads;
 130    bool render_to_fbo = brw->intel.ctx.DrawBuffer->Name != 0;
 131
 132    sf = brw_state_batch(brw, sizeof(*sf), 64, &brw->sf.state_offset);
 133
 134    memset(sf, 0, sizeof(*sf));
 135
 136    /* CACHE_NEW_SF_PROG */
 137    sf->thread0.grf_reg_count = ALIGN(brw->sf.prog_data->total_grf, 16) / 16 - 1;
 138    sf->thread0.kernel_start_pointer = brw->sf.prog_bo->offset >> 6; /* reloc */
 139
 140    sf->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
 141
 142    sf->thread3.dispatch_grf_start_reg = 3;
 143
 144    if (intel->gen == 5)
 145        sf->thread3.urb_entry_read_offset = 3;
 146    else
 147        sf->thread3.urb_entry_read_offset = 1;
 148
 149    /* CACHE_NEW_SF_PROG */
 150    sf->thread3.urb_entry_read_length = brw->sf.prog_data->urb_read_length;
 151
 152    /* BRW_NEW_URB_FENCE */
 153    sf->thread4.nr_urb_entries = brw->urb.nr_sf_entries;
 154    sf->thread4.urb_entry_allocation_size = brw->urb.sfsize - 1;
 155
 156    /* Each SF thread produces 1 PUE, and there can be up to 24 (Pre-Ironlake) or
 157     * 48 (Ironlake) threads.
 158     */
 159    if (intel->gen == 5)
 160       chipset_max_threads = 48;
 161    else
 162       chipset_max_threads = 24;
 163
 164    /* BRW_NEW_URB_FENCE */
 165    sf->thread4.max_threads = MIN2(chipset_max_threads,
 166                                   brw->urb.nr_sf_entries) - 1;
 167
 168    if (unlikely(INTEL_DEBUG & DEBUG_SINGLE_THREAD))
 169       sf->thread4.max_threads = 0;
 170
 171    if (unlikely(INTEL_DEBUG & DEBUG_STATS))
 172       sf->thread4.stats_enable = 1;
 173
 174    /* CACHE_NEW_SF_VP */
 175    sf->sf5.sf_viewport_state_offset = (intel->batch.bo->offset +
 176                                        brw->sf.vp_offset) >> 5; /* reloc */
 177
 178    sf->sf5.viewport_transform = 1;
 179
 180    /* _NEW_SCISSOR */
 181    if (ctx->Scissor.Enabled)
 182       sf->sf6.scissor = 1;
 183
 184    /* _NEW_POLYGON */
 185    if (ctx->Polygon.FrontFace == GL_CCW)
 186       sf->sf5.front_winding = BRW_FRONTWINDING_CCW;
 187    else
 188       sf->sf5.front_winding = BRW_FRONTWINDING_CW;
 189
 190    /* _NEW_BUFFERS
 191     * The viewport is inverted for rendering to a FBO, and that inverts
 192     * polygon front/back orientation.
 193     */
 194    sf->sf5.front_winding ^= render_to_fbo;
 195
 196    /* _NEW_POLYGON */
 197    switch (ctx->Polygon.CullFlag ? ctx->Polygon.CullFaceMode : GL_NONE) {
 198    case GL_FRONT:
 199       sf->sf6.cull_mode = BRW_CULLMODE_FRONT;
 200       break;
 201    case GL_BACK:
 202       sf->sf6.cull_mode = BRW_CULLMODE_BACK;
 203       break;
 204    case GL_FRONT_AND_BACK:
 205       sf->sf6.cull_mode = BRW_CULLMODE_BOTH;
 206       break;
 207    case GL_NONE:
 208       sf->sf6.cull_mode = BRW_CULLMODE_NONE;
 209       break;
 210    default:
 211       assert(0);
 212       break;
 213    }
 214
 215    /* _NEW_LINE */
 216    /* XXX use ctx->Const.Min/MaxLineWidth here */
 217    sf->sf6.line_width = CLAMP(ctx->Line.Width, 1.0, 5.0) * (1<<1);
 218
 219    sf->sf6.line_endcap_aa_region_width = 1;
 220    if (ctx->Line.SmoothFlag)
 221       sf->sf6.aa_enable = 1;
 222    else if (sf->sf6.line_width <= 0x2)
 223        sf->sf6.line_width = 0;
 224
 225    /* _NEW_BUFFERS */
 226    if (!render_to_fbo) {
 227       /* Rendering to an OpenGL window */
 228       sf->sf6.point_rast_rule = BRW_RASTRULE_UPPER_RIGHT;
 229    }
 230    else {
 231       /* If rendering to an FBO, the pixel coordinate system is
 232        * inverted with respect to the normal OpenGL coordinate
 233        * system, so BRW_RASTRULE_LOWER_RIGHT is correct.
 234        * But this value is listed as "Reserved, but not seen as useful"
 235        * in Intel documentation (page 212, "Point Rasterization Rule",
 236        * section 7.4 "SF Pipeline State Summary", of document
 237        * "Intel® 965 Express Chipset Family and Intel® G35 Express
 238        * Chipset Graphics Controller Programmer's Reference Manual,
 239        * Volume 2: 3D/Media", Revision 1.0b as of January 2008,
 240        * available at
 241        *     http://intellinuxgraphics.org/documentation.html
 242        * at the time of this writing).
 243        *
 244        * It does work on at least some devices, if not all;
 245        * if devices that don't support it can be identified,
 246        * the likely failure case is that points are rasterized
 247        * incorrectly, which is no worse than occurs without
 248        * the value, so we're using it here.
 249        */
 250       sf->sf6.point_rast_rule = BRW_RASTRULE_LOWER_RIGHT;
 251    }
 252    /* XXX clamp max depends on AA vs. non-AA */
 253
 254    /* _NEW_POINT */
 255    sf->sf7.sprite_point = ctx->Point.PointSprite;
 256    sf->sf7.point_size = CLAMP(rint(CLAMP(ctx->Point.Size,
 257                                          ctx->Point.MinSize,
 258                                          ctx->Point.MaxSize)), 1, 255) * (1<<3);
 259    sf->sf7.use_point_size_state = !(ctx->VertexProgram.PointSizeEnabled ||
 260                                     ctx->Point._Attenuated);
 261    sf->sf7.aa_line_distance_mode = 0;
 262
 263    /* might be BRW_NEW_PRIMITIVE if we have to adjust pv for polygons:
 264     * _NEW_LIGHT
 265     */
 266    if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) {
 267       sf->sf7.trifan_pv = 2;
 268       sf->sf7.linestrip_pv = 1;
 269       sf->sf7.tristrip_pv = 2;
 270    } else {
 271       sf->sf7.trifan_pv = 1;
 272       sf->sf7.linestrip_pv = 0;
 273       sf->sf7.tristrip_pv = 0;
 274    }
 275    sf->sf7.line_last_pixel_enable = 0;
 276
 277    /* Set bias for OpenGL rasterization rules:
 278     */
 279    sf->sf6.dest_org_vbias = 0x8;
 280    sf->sf6.dest_org_hbias = 0x8;
 281
 282    /* STATE_PREFETCH command description describes this state as being
 283     * something loaded through the GPE (L2 ISC), so it's INSTRUCTION domain.
 284     */
 285    /* Emit SF program relocation */
 286    drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset +
 287                                 offsetof(struct brw_sf_unit_state, thread0)),
 288                            brw->sf.prog_bo, sf->thread0.grf_reg_count << 1,
 289                            I915_GEM_DOMAIN_INSTRUCTION, 0);
 290
 291    /* Emit SF viewport relocation */
 292    drm_intel_bo_emit_reloc(bo, (brw->sf.state_offset +
 293                                 offsetof(struct brw_sf_unit_state, sf5)),
 294                            intel->batch.bo, (brw->sf.vp_offset |
 295                                              sf->sf5.front_winding |
 296                                              (sf->sf5.viewport_transform << 1)),
 297                            I915_GEM_DOMAIN_INSTRUCTION, 0);
 298
 299    brw->state.dirty.cache |= CACHE_NEW_SF_UNIT;
 300 }
 301
 302 const struct brw_tracked_state brw_sf_unit = {
 303    .dirty = {
 304       .mesa  = (_NEW_POLYGON |
 305                 _NEW_LIGHT |
 306                 _NEW_LINE |
 307                 _NEW_POINT |
 308                 _NEW_SCISSOR |
 309                 _NEW_BUFFERS),
 310       .brw   = (BRW_NEW_BATCH |
 311                 BRW_NEW_URB_FENCE),
 312       .cache = (CACHE_NEW_SF_VP |
 313                 CACHE_NEW_SF_PROG)
 314    },
 315    .prepare = upload_sf_unit,
 316 };