src/mesa/drivers/dri/i965/brw_context.h

   1 /*
   2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
   3  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
   4  develop this 3D driver.
   5
   6  Permission is hereby granted, free of charge, to any person obtaining
   7  a copy of this software and associated documentation files (the
   8  "Software"), to deal in the Software without restriction, including
   9  without limitation the rights to use, copy, modify, merge, publish,
  10  distribute, sublicense, and/or sell copies of the Software, and to
  11  permit persons to whom the Software is furnished to do so, subject to
  12  the following conditions:
  13
  14  The above copyright notice and this permission notice (including the
  15  next paragraph) shall be included in all copies or substantial
  16  portions of the Software.
  17
  18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25
  26  **********************************************************************/
  27  /*
  28   * Authors:
  29   *   Keith Whitwell <keith@tungstengraphics.com>
  30   */
  31
  32
  33 #ifndef BRWCONTEXT_INC
  34 #define BRWCONTEXT_INC
  35
  36 #include "intel_context.h"
  37 #include "brw_structs.h"
  38 #include "main/imports.h"
  39
  40
  41 /* Glossary:
  42  *
  43  * URB - uniform resource buffer.  A mid-sized buffer which is
  44  * partitioned between the fixed function units and used for passing
  45  * values (vertices, primitives, constants) between them.
  46  *
  47  * CURBE - constant URB entry.  An urb region (entry) used to hold
  48  * constant values which the fixed function units can be instructed to
  49  * preload into the GRF when spawning a thread.
  50  *
  51  * VUE - vertex URB entry.  An urb entry holding a vertex and usually
  52  * a vertex header.  The header contains control information and
  53  * things like primitive type, Begin/end flags and clip codes.
  54  *
  55  * PUE - primitive URB entry.  An urb entry produced by the setup (SF)
  56  * unit holding rasterization and interpolation parameters.
  57  *
  58  * GRF - general register file.  One of several register files
  59  * addressable by programmed threads.  The inputs (r0, payload, curbe,
  60  * urb) of the thread are preloaded to this area before the thread is
  61  * spawned.  The registers are individually 8 dwords wide and suitable
  62  * for general usage.  Registers holding thread input values are not
  63  * special and may be overwritten.
  64  *
  65  * MRF - message register file.  Threads communicate (and terminate)
  66  * by sending messages.  Message parameters are placed in contiguous
  67  * MRF registers.  All program output is via these messages.  URB
  68  * entries are populated by sending a message to the shared URB
  69  * function containing the new data, together with a control word,
  70  * often an unmodified copy of R0.
  71  *
  72  * R0 - GRF register 0.  Typically holds control information used when
  73  * sending messages to other threads.
  74  *
  75  * EU or GEN4 EU: The name of the programmable subsystem of the
  76  * i965 hardware.  Threads are executed by the EU, the registers
  77  * described above are part of the EU architecture.
  78  *
  79  * Fixed function units:
  80  *
  81  * CS - Command streamer.  Notional first unit, little software
  82  * interaction.  Holds the URB entries used for constant data, ie the
  83  * CURBEs.
  84  *
  85  * VF/VS - Vertex Fetch / Vertex Shader.  The fixed function part of
  86  * this unit is responsible for pulling vertices out of vertex buffers
  87  * in vram and injecting them into the processing pipe as VUEs.  If
  88  * enabled, it first passes them to a VS thread which is a good place
  89  * for the driver to implement any active vertex shader.
  90  *
  91  * GS - Geometry Shader.  This corresponds to a new DX10 concept.  If
  92  * enabled, incoming strips etc are passed to GS threads in individual
  93  * line/triangle/point units.  The GS thread may perform arbitary
  94  * computation and emit whatever primtives with whatever vertices it
  95  * chooses.  This makes GS an excellent place to implement GL's
  96  * unfilled polygon modes, though of course it is capable of much
  97  * more.  Additionally, GS is used to translate away primitives not
  98  * handled by latter units, including Quads and Lineloops.
  99  *
 100  * CS - Clipper.  Mesa's clipping algorithms are imported to run on
 101  * this unit.  The fixed function part performs cliptesting against
 102  * the 6 fixed clipplanes and makes descisions on whether or not the
 103  * incoming primitive needs to be passed to a thread for clipping.
 104  * User clip planes are handled via cooperation with the VS thread.
 105  *
 106  * SF - Strips Fans or Setup: Triangles are prepared for
 107  * rasterization.  Interpolation coefficients are calculated.
 108  * Flatshading and two-side lighting usually performed here.
 109  *
 110  * WM - Windower.  Interpolation of vertex attributes performed here.
 111  * Fragment shader implemented here.  SIMD aspects of EU taken full
 112  * advantage of, as pixels are processed in blocks of 16.
 113  *
 114  * CC - Color Calculator.  No EU threads associated with this unit.
 115  * Handles blending and (presumably) depth and stencil testing.
 116  */
 117
 118
 119 #define BRW_MAX_CURBE                    (32*16)
 120
 121 struct brw_context;
 122
 123 enum brw_state_id {
 124    BRW_STATE_URB_FENCE,
 125    BRW_STATE_FRAGMENT_PROGRAM,
 126    BRW_STATE_VERTEX_PROGRAM,
 127    BRW_STATE_INPUT_DIMENSIONS,
 128    BRW_STATE_CURBE_OFFSETS,
 129    BRW_STATE_REDUCED_PRIMITIVE,
 130    BRW_STATE_PRIMITIVE,
 131    BRW_STATE_CONTEXT,
 132    BRW_STATE_WM_INPUT_DIMENSIONS,
 133    BRW_STATE_PSP,
 134    BRW_STATE_WM_SURFACES,
 135    BRW_STATE_VS_BINDING_TABLE,
 136    BRW_STATE_GS_BINDING_TABLE,
 137    BRW_STATE_PS_BINDING_TABLE,
 138    BRW_STATE_INDICES,
 139    BRW_STATE_VERTICES,
 140    BRW_STATE_BATCH,
 141    BRW_STATE_NR_WM_SURFACES,
 142    BRW_STATE_NR_VS_SURFACES,
 143    BRW_STATE_INDEX_BUFFER,
 144    BRW_STATE_VS_CONSTBUF,
 145    BRW_STATE_WM_CONSTBUF,
 146    BRW_STATE_PROGRAM_CACHE,
 147    BRW_STATE_STATE_BASE_ADDRESS,
 148 };
 149
 150 #define BRW_NEW_URB_FENCE               (1 << BRW_STATE_URB_FENCE)
 151 #define BRW_NEW_FRAGMENT_PROGRAM        (1 << BRW_STATE_FRAGMENT_PROGRAM)
 152 #define BRW_NEW_VERTEX_PROGRAM          (1 << BRW_STATE_VERTEX_PROGRAM)
 153 #define BRW_NEW_INPUT_DIMENSIONS        (1 << BRW_STATE_INPUT_DIMENSIONS)
 154 #define BRW_NEW_CURBE_OFFSETS           (1 << BRW_STATE_CURBE_OFFSETS)
 155 #define BRW_NEW_REDUCED_PRIMITIVE       (1 << BRW_STATE_REDUCED_PRIMITIVE)
 156 #define BRW_NEW_PRIMITIVE               (1 << BRW_STATE_PRIMITIVE)
 157 #define BRW_NEW_CONTEXT                 (1 << BRW_STATE_CONTEXT)
 158 #define BRW_NEW_WM_INPUT_DIMENSIONS     (1 << BRW_STATE_WM_INPUT_DIMENSIONS)
 159 #define BRW_NEW_PSP                     (1 << BRW_STATE_PSP)
 160 #define BRW_NEW_WM_SURFACES             (1 << BRW_STATE_WM_SURFACES)
 161 #define BRW_NEW_VS_BINDING_TABLE        (1 << BRW_STATE_VS_BINDING_TABLE)
 162 #define BRW_NEW_GS_BINDING_TABLE        (1 << BRW_STATE_GS_BINDING_TABLE)
 163 #define BRW_NEW_PS_BINDING_TABLE        (1 << BRW_STATE_PS_BINDING_TABLE)
 164 #define BRW_NEW_INDICES                 (1 << BRW_STATE_INDICES)
 165 #define BRW_NEW_VERTICES                (1 << BRW_STATE_VERTICES)
 166 /**
 167  * Used for any batch entry with a relocated pointer that will be used
 168  * by any 3D rendering.
 169  */
 170 #define BRW_NEW_BATCH                  (1 << BRW_STATE_BATCH)
 171 /** \see brw.state.depth_region */
 172 #define BRW_NEW_NR_WM_SURFACES         (1 << BRW_STATE_NR_WM_SURFACES)
 173 #define BRW_NEW_NR_VS_SURFACES         (1 << BRW_STATE_NR_VS_SURFACES)
 174 #define BRW_NEW_INDEX_BUFFER           (1 << BRW_STATE_INDEX_BUFFER)
 175 #define BRW_NEW_VS_CONSTBUF            (1 << BRW_STATE_VS_CONSTBUF)
 176 #define BRW_NEW_WM_CONSTBUF            (1 << BRW_STATE_WM_CONSTBUF)
 177 #define BRW_NEW_PROGRAM_CACHE           (1 << BRW_STATE_PROGRAM_CACHE)
 178 #define BRW_NEW_STATE_BASE_ADDRESS      (1 << BRW_STATE_STATE_BASE_ADDRESS)
 179
 180 struct brw_state_flags {
 181    /** State update flags signalled by mesa internals */
 182    GLuint mesa;
 183    /**
 184     * State update flags signalled as the result of brw_tracked_state updates
 185     */
 186    GLuint brw;
 187    /** State update flags signalled by brw_state_cache.c searches */
 188    GLuint cache;
 189 };
 190
 191 enum state_struct_type {
 192    AUB_TRACE_VS_STATE =                 1,
 193    AUB_TRACE_GS_STATE =                 2,
 194    AUB_TRACE_CLIP_STATE =               3,
 195    AUB_TRACE_SF_STATE =                 4,
 196    AUB_TRACE_WM_STATE =                 5,
 197    AUB_TRACE_CC_STATE =                 6,
 198    AUB_TRACE_CLIP_VP_STATE =            7,
 199    AUB_TRACE_SF_VP_STATE =              8,
 200    AUB_TRACE_CC_VP_STATE =              0x9,
 201    AUB_TRACE_SAMPLER_STATE =            0xa,
 202    AUB_TRACE_KERNEL_INSTRUCTIONS =      0xb,
 203    AUB_TRACE_SCRATCH_SPACE =            0xc,
 204    AUB_TRACE_SAMPLER_DEFAULT_COLOR =    0xd,
 205
 206    AUB_TRACE_SCISSOR_STATE =            0x15,
 207    AUB_TRACE_BLEND_STATE =              0x16,
 208    AUB_TRACE_DEPTH_STENCIL_STATE =      0x17,
 209
 210    /* Not written to .aub files the same way the structures above are. */
 211    AUB_TRACE_NO_TYPE =                  0x100,
 212    AUB_TRACE_BINDING_TABLE =            0x101,
 213    AUB_TRACE_SURFACE_STATE =            0x102,
 214    AUB_TRACE_VS_CONSTANTS =             0x103,
 215    AUB_TRACE_WM_CONSTANTS =             0x104,
 216 };
 217
 218 /** Subclass of Mesa vertex program */
 219 struct brw_vertex_program {
 220    struct gl_vertex_program program;
 221    GLuint id;
 222    GLboolean use_const_buffer;
 223 };
 224
 225
 226 /** Subclass of Mesa fragment program */
 227 struct brw_fragment_program {
 228    struct gl_fragment_program program;
 229    GLuint id;  /**< serial no. to identify frag progs, never re-used */
 230
 231    /** for debugging, which texture units are referenced */
 232    GLbitfield tex_units_used;
 233 };
 234
 235 struct brw_shader {
 236    struct gl_shader base;
 237
 238    /** Shader IR transformed for native compile, at link time. */
 239    struct exec_list *ir;
 240 };
 241
 242 struct brw_shader_program {
 243    struct gl_shader_program base;
 244 };
 245
 246 enum param_conversion {
 247    PARAM_NO_CONVERT,
 248    PARAM_CONVERT_F2I,
 249    PARAM_CONVERT_F2U,
 250    PARAM_CONVERT_F2B,
 251    PARAM_CONVERT_ZERO,
 252 };
 253
 254 /* Data about a particular attempt to compile a program.  Note that
 255  * there can be many of these, each in a different GL state
 256  * corresponding to a different brw_wm_prog_key struct, with different
 257  * compiled programs:
 258  */
 259 struct brw_wm_prog_data {
 260    GLuint curb_read_length;
 261    GLuint urb_read_length;
 262
 263    GLuint first_curbe_grf;
 264    GLuint first_curbe_grf_16;
 265    GLuint reg_blocks;
 266    GLuint reg_blocks_16;
 267    GLuint total_scratch;
 268
 269    GLuint nr_params;       /**< number of float params/constants */
 270    GLuint nr_pull_params;
 271    GLboolean error;
 272    int dispatch_width;
 273    uint32_t prog_offset_16;
 274
 275    /* Pointer to tracked values (only valid once
 276     * _mesa_load_state_parameters has been called at runtime).
 277     */
 278    const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
 279    enum param_conversion param_convert[MAX_UNIFORMS * 4];
 280    const float *pull_param[MAX_UNIFORMS * 4];
 281    enum param_conversion pull_param_convert[MAX_UNIFORMS * 4];
 282 };
 283
 284 struct brw_sf_prog_data {
 285    GLuint urb_read_length;
 286    GLuint total_grf;
 287
 288    /* Each vertex may have upto 12 attributes, 4 components each,
 289     * except WPOS which requires only 2.  (11*4 + 2) == 44 ==> 11
 290     * rows.
 291     *
 292     * Actually we use 4 for each, so call it 12 rows.
 293     */
 294    GLuint urb_entry_size;
 295 };
 296
 297 struct brw_clip_prog_data {
 298    GLuint curb_read_length;     /* user planes? */
 299    GLuint clip_mode;
 300    GLuint urb_read_length;
 301    GLuint total_grf;
 302 };
 303
 304 struct brw_gs_prog_data {
 305    GLuint urb_read_length;
 306    GLuint total_grf;
 307 };
 308
 309 struct brw_vs_prog_data {
 310    GLuint curb_read_length;
 311    GLuint urb_read_length;
 312    GLuint total_grf;
 313    GLbitfield64 outputs_written;
 314    GLuint nr_params;       /**< number of float params/constants */
 315    GLuint total_scratch;
 316
 317    GLuint inputs_read;
 318
 319    /* Used for calculating urb partitions:
 320     */
 321    GLuint urb_entry_size;
 322
 323    const float *param[MAX_UNIFORMS * 4]; /* should be: BRW_MAX_CURBE */
 324    const float *pull_param[MAX_UNIFORMS * 4];
 325
 326    bool uses_new_param_layout;
 327 };
 328
 329
 330 /* Size == 0 if output either not written, or always [0,0,0,1]
 331  */
 332 struct brw_vs_ouput_sizes {
 333    GLubyte output_size[VERT_RESULT_MAX];
 334 };
 335
 336
 337 /** Number of texture sampler units */
 338 #define BRW_MAX_TEX_UNIT 16
 339
 340 /** Max number of render targets in a shader */
 341 #define BRW_MAX_DRAW_BUFFERS 8
 342
 343 /**
 344  * Size of our surface binding table for the WM.
 345  * This contains pointers to the drawing surfaces and current texture
 346  * objects and shader constant buffers (+2).
 347  */
 348 #define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
 349
 350 /**
 351  * Helpers to convert drawing buffers, textures and constant buffers
 352  * to surface binding table indexes, for WM.
 353  */
 354 #define SURF_INDEX_DRAW(d)           (d)
 355 #define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS)
 356 #define SURF_INDEX_TEXTURE(t)        (BRW_MAX_DRAW_BUFFERS + 1 + (t))
 357
 358 /**
 359  * Size of surface binding table for the VS.
 360  * Only one constant buffer for now.
 361  */
 362 #define BRW_VS_MAX_SURF 1
 363
 364 /**
 365  * Only a VS constant buffer
 366  */
 367 #define SURF_INDEX_VERT_CONST_BUFFER 0
 368
 369
 370 enum brw_cache_id {
 371    BRW_BLEND_STATE,
 372    BRW_DEPTH_STENCIL_STATE,
 373    BRW_COLOR_CALC_STATE,
 374    BRW_CC_VP,
 375    BRW_CC_UNIT,
 376    BRW_WM_PROG,
 377    BRW_SAMPLER,
 378    BRW_WM_UNIT,
 379    BRW_SF_PROG,
 380    BRW_SF_VP,
 381    BRW_SF_UNIT, /* scissor state on gen6 */
 382    BRW_VS_UNIT,
 383    BRW_VS_PROG,
 384    BRW_GS_UNIT,
 385    BRW_GS_PROG,
 386    BRW_CLIP_VP,
 387    BRW_CLIP_UNIT,
 388    BRW_CLIP_PROG,
 389
 390    BRW_MAX_CACHE
 391 };
 392
 393 struct brw_cache_item {
 394    /**
 395     * Effectively part of the key, cache_id identifies what kind of state
 396     * buffer is involved, and also which brw->state.dirty.cache flag should
 397     * be set when this cache item is chosen.
 398     */
 399    enum brw_cache_id cache_id;
 400    /** 32-bit hash of the key data */
 401    GLuint hash;
 402    GLuint key_size;             /* for variable-sized keys */
 403    GLuint aux_size;
 404    const void *key;
 405
 406    uint32_t offset;
 407    uint32_t size;
 408
 409    struct brw_cache_item *next;
 410 };
 411
 412
 413
 414 struct brw_cache {
 415    struct brw_context *brw;
 416
 417    struct brw_cache_item **items;
 418    drm_intel_bo *bo;
 419    GLuint size, n_items;
 420
 421    uint32_t next_offset;
 422    bool bo_used_by_gpu;
 423 };
 424
 425
 426 /* Considered adding a member to this struct to document which flags
 427  * an update might raise so that ordering of the state atoms can be
 428  * checked or derived at runtime.  Dropped the idea in favor of having
 429  * a debug mode where the state is monitored for flags which are
 430  * raised that have already been tested against.
 431  */
 432 struct brw_tracked_state {
 433    struct brw_state_flags dirty;
 434    void (*prepare)( struct brw_context *brw );
 435    void (*emit)( struct brw_context *brw );
 436 };
 437
 438 /* Flags for brw->state.cache.
 439  */
 440 #define CACHE_NEW_BLEND_STATE            (1<<BRW_BLEND_STATE)
 441 #define CACHE_NEW_DEPTH_STENCIL_STATE    (1<<BRW_DEPTH_STENCIL_STATE)
 442 #define CACHE_NEW_COLOR_CALC_STATE       (1<<BRW_COLOR_CALC_STATE)
 443 #define CACHE_NEW_CC_VP                  (1<<BRW_CC_VP)
 444 #define CACHE_NEW_CC_UNIT                (1<<BRW_CC_UNIT)
 445 #define CACHE_NEW_WM_PROG                (1<<BRW_WM_PROG)
 446 #define CACHE_NEW_SAMPLER                (1<<BRW_SAMPLER)
 447 #define CACHE_NEW_WM_UNIT                (1<<BRW_WM_UNIT)
 448 #define CACHE_NEW_SF_PROG                (1<<BRW_SF_PROG)
 449 #define CACHE_NEW_SF_VP                  (1<<BRW_SF_VP)
 450 #define CACHE_NEW_SF_UNIT                (1<<BRW_SF_UNIT)
 451 #define CACHE_NEW_VS_UNIT                (1<<BRW_VS_UNIT)
 452 #define CACHE_NEW_VS_PROG                (1<<BRW_VS_PROG)
 453 #define CACHE_NEW_GS_UNIT                (1<<BRW_GS_UNIT)
 454 #define CACHE_NEW_GS_PROG                (1<<BRW_GS_PROG)
 455 #define CACHE_NEW_CLIP_VP                (1<<BRW_CLIP_VP)
 456 #define CACHE_NEW_CLIP_UNIT              (1<<BRW_CLIP_UNIT)
 457 #define CACHE_NEW_CLIP_PROG              (1<<BRW_CLIP_PROG)
 458
 459 struct brw_cached_batch_item {
 460    struct header *header;
 461    GLuint sz;
 462    struct brw_cached_batch_item *next;
 463 };
 464
 465
 466
 467 /* Protect against a future where VERT_ATTRIB_MAX > 32.  Wouldn't life
 468  * be easier if C allowed arrays of packed elements?
 469  */
 470 #define ATTRIB_BIT_DWORDS  ((VERT_ATTRIB_MAX+31)/32)
 471
 472 struct brw_vertex_buffer {
 473    /** Buffer object containing the uploaded vertex data */
 474    drm_intel_bo *bo;
 475    uint32_t offset;
 476    /** Byte stride between elements in the uploaded array */
 477    GLuint stride;
 478 };
 479 struct brw_vertex_element {
 480    const struct gl_client_array *glarray;
 481
 482    int buffer;
 483
 484    /** The corresponding Mesa vertex attribute */
 485    gl_vert_attrib attrib;
 486    /** Size of a complete element */
 487    GLuint element_size;
 488    /** Offset of the first element within the buffer object */
 489    unsigned int offset;
 490 };
 491
 492
 493
 494 struct brw_vertex_info {
 495    GLuint sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[VERT_ATTRIB_MAX] */
 496 };
 497
 498 struct brw_query_object {
 499    struct gl_query_object Base;
 500
 501    /** Last query BO associated with this query. */
 502    drm_intel_bo *bo;
 503    /** First index in bo with query data for this object. */
 504    int first_index;
 505    /** Last index in bo with query data for this object. */
 506    int last_index;
 507 };
 508
 509
 510 /**
 511  * brw_context is derived from intel_context.
 512  */
 513 struct brw_context
 514 {
 515    struct intel_context intel;  /**< base class, must be first field */
 516    GLuint primitive;
 517
 518    GLboolean emit_state_always;
 519    GLboolean has_surface_tile_offset;
 520    GLboolean has_compr4;
 521    GLboolean has_negative_rhw_bug;
 522    GLboolean has_aa_line_parameters;
 523    GLboolean has_pln;
 524    GLboolean new_vs_backend;
 525
 526    struct {
 527       struct brw_state_flags dirty;
 528       /**
 529        * List of buffers accumulated in brw_validate_state to receive
 530        * drm_intel_bo_check_aperture treatment before exec, so we can
 531        * know if we should flush the batch and try again before
 532        * emitting primitives.
 533        *
 534        * This can be a fixed number as we only have a limited number of
 535        * objects referenced from the batchbuffer in a primitive emit,
 536        * consisting of the vertex buffers, pipelined state pointers,
 537        * the CURBE, the depth buffer, and a query BO.
 538        */
 539       drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
 540       unsigned int validated_bo_count;
 541    } state;
 542
 543    struct brw_cache cache;
 544    struct brw_cached_batch_item *cached_batch_items;
 545
 546    struct {
 547       struct brw_vertex_element inputs[VERT_ATTRIB_MAX];
 548       struct brw_vertex_buffer buffers[VERT_ATTRIB_MAX];
 549       struct {
 550               uint32_t handle;
 551               uint32_t offset;
 552               uint32_t stride;
 553       } current_buffers[VERT_ATTRIB_MAX];
 554
 555       struct brw_vertex_element *enabled[VERT_ATTRIB_MAX];
 556       GLuint nr_enabled;
 557       GLuint nr_buffers, nr_current_buffers;
 558
 559       /* Summary of size and varying of active arrays, so we can check
 560        * for changes to this state:
 561        */
 562       struct brw_vertex_info info;
 563       unsigned int min_index, max_index;
 564
 565       /* Offset from start of vertex buffer so we can avoid redefining
 566        * the same VB packed over and over again.
 567        */
 568       unsigned int start_vertex_bias;
 569    } vb;
 570
 571    struct {
 572       /**
 573        * Index buffer for this draw_prims call.
 574        *
 575        * Updates are signaled by BRW_NEW_INDICES.
 576        */
 577       const struct _mesa_index_buffer *ib;
 578
 579       /* Updates are signaled by BRW_NEW_INDEX_BUFFER. */
 580       drm_intel_bo *bo;
 581       GLuint type;
 582
 583       /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
 584        * avoid re-uploading the IB packet over and over if we're actually
 585        * referencing the same index buffer.
 586        */
 587       unsigned int start_vertex_offset;
 588    } ib;
 589
 590    /* Active vertex program:
 591     */
 592    const struct gl_vertex_program *vertex_program;
 593    const struct gl_fragment_program *fragment_program;
 594
 595    /* hw-dependent 3DSTATE_VF_STATISTICS opcode */
 596    uint32_t CMD_VF_STATISTICS;
 597    /* hw-dependent 3DSTATE_PIPELINE_SELECT opcode */
 598    uint32_t CMD_PIPELINE_SELECT;
 599    int vs_max_threads;
 600    int wm_max_threads;
 601
 602    /* BRW_NEW_URB_ALLOCATIONS:
 603     */
 604    struct {
 605       GLuint vsize;             /* vertex size plus header in urb registers */
 606       GLuint csize;             /* constant buffer size in urb registers */
 607       GLuint sfsize;            /* setup data size in urb registers */
 608
 609       GLboolean constrained;
 610
 611       GLuint max_vs_entries;    /* Maximum number of VS entries */
 612       GLuint max_gs_entries;    /* Maximum number of GS entries */
 613
 614       GLuint nr_vs_entries;
 615       GLuint nr_gs_entries;
 616       GLuint nr_clip_entries;
 617       GLuint nr_sf_entries;
 618       GLuint nr_cs_entries;
 619
 620       /* gen6:
 621        * The length of each URB entry owned by the VS (or GS), as
 622        * a number of 1024-bit (128-byte) rows.  Should be >= 1.
 623        *
 624        * gen7: Same meaning, but in 512-bit (64-byte) rows.
 625        */
 626       GLuint vs_size;
 627       GLuint gs_size;
 628
 629       GLuint vs_start;
 630       GLuint gs_start;
 631       GLuint clip_start;
 632       GLuint sf_start;
 633       GLuint cs_start;
 634       GLuint size; /* Hardware URB size, in KB. */
 635    } urb;
 636
 637
 638    /* BRW_NEW_CURBE_OFFSETS:
 639     */
 640    struct {
 641       GLuint wm_start;  /**< pos of first wm const in CURBE buffer */
 642       GLuint wm_size;   /**< number of float[4] consts, multiple of 16 */
 643       GLuint clip_start;
 644       GLuint clip_size;
 645       GLuint vs_start;
 646       GLuint vs_size;
 647       GLuint total_size;
 648
 649       drm_intel_bo *curbe_bo;
 650       /** Offset within curbe_bo of space for current curbe entry */
 651       GLuint curbe_offset;
 652       /** Offset within curbe_bo of space for next curbe entry */
 653       GLuint curbe_next_offset;
 654
 655       /**
 656        * Copy of the last set of CURBEs uploaded.  Frequently we'll end up
 657        * in brw_curbe.c with the same set of constant data to be uploaded,
 658        * so we'd rather not upload new constants in that case (it can cause
 659        * a pipeline bubble since only up to 4 can be pipelined at a time).
 660        */
 661       GLfloat *last_buf;
 662       /**
 663        * Allocation for where to calculate the next set of CURBEs.
 664        * It's a hot enough path that malloc/free of that data matters.
 665        */
 666       GLfloat *next_buf;
 667       GLuint last_bufsz;
 668    } curbe;
 669
 670    struct {
 671       struct brw_vs_prog_data *prog_data;
 672       int8_t *constant_map; /* variable array following prog_data */
 673
 674       drm_intel_bo *scratch_bo;
 675       drm_intel_bo *const_bo;
 676       /** Offset in the program cache to the VS program */
 677       uint32_t prog_offset;
 678       uint32_t state_offset;
 679
 680       /** Binding table of pointers to surf_bo entries */
 681       uint32_t bind_bo_offset;
 682       uint32_t surf_offset[BRW_VS_MAX_SURF];
 683       GLuint nr_surfaces;
 684
 685       uint32_t push_const_offset; /* Offset in the batchbuffer */
 686       int push_const_size; /* in 256-bit register increments */
 687
 688       /** @{ register allocator */
 689
 690       struct ra_regs *regs;
 691
 692       /**
 693        * Array of the ra classes for the unaligned contiguous register
 694        * block sizes used.
 695        */
 696       int *classes;
 697
 698       /**
 699        * Mapping for register-allocated objects in *regs to the first
 700        * GRF for that object.
 701       */
 702       uint8_t *ra_reg_to_grf;
 703       /** @} */
 704    } vs;
 705
 706    struct {
 707       struct brw_gs_prog_data *prog_data;
 708
 709       GLboolean prog_active;
 710       /** Offset in the program cache to the CLIP program pre-gen6 */
 711       uint32_t prog_offset;
 712       uint32_t state_offset;
 713    } gs;
 714
 715    struct {
 716       struct brw_clip_prog_data *prog_data;
 717
 718       /** Offset in the program cache to the CLIP program pre-gen6 */
 719       uint32_t prog_offset;
 720
 721       /* Offset in the batch to the CLIP state on pre-gen6. */
 722       uint32_t state_offset;
 723
 724       /* As of gen6, this is the offset in the batch to the CLIP VP,
 725        * instead of vp_bo.
 726        */
 727       uint32_t vp_offset;
 728    } clip;
 729
 730
 731    struct {
 732       struct brw_sf_prog_data *prog_data;
 733
 734       /** Offset in the program cache to the CLIP program pre-gen6 */
 735       uint32_t prog_offset;
 736       uint32_t state_offset;
 737       uint32_t vp_offset;
 738    } sf;
 739
 740    struct {
 741       struct brw_wm_prog_data *prog_data;
 742       struct brw_wm_compile *compile_data;
 743
 744       /** Input sizes, calculated from active vertex program.
 745        * One bit per fragment program input attribute.
 746        */
 747       GLbitfield input_size_masks[4];
 748
 749       /** offsets in the batch to sampler default colors (texture border color)
 750        */
 751       uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
 752
 753       GLuint render_surf;
 754       GLuint nr_surfaces;
 755
 756       drm_intel_bo *scratch_bo;
 757
 758       GLuint sampler_count;
 759       uint32_t sampler_offset;
 760
 761       /** Offset in the program cache to the WM program */
 762       uint32_t prog_offset;
 763
 764       /** Binding table of pointers to surf_bo entries */
 765       uint32_t bind_bo_offset;
 766       uint32_t surf_offset[BRW_WM_MAX_SURF];
 767       uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */
 768
 769       drm_intel_bo *const_bo; /* pull constant buffer. */
 770       /**
 771        * This is offset in the batch to the push constants on gen6.
 772        *
 773        * Pre-gen6, push constants live in the CURBE.
 774        */
 775       uint32_t push_const_offset;
 776
 777       /** @{ register allocator */
 778
 779       struct ra_regs *regs;
 780
 781       /** Array of the ra classes for the unaligned contiguous
 782        * register block sizes used.
 783        */
 784       int *classes;
 785
 786       /**
 787        * Mapping for register-allocated objects in *regs to the first
 788        * GRF for that object.
 789       */
 790       uint8_t *ra_reg_to_grf;
 791
 792       /**
 793        * ra class for the aligned pairs we use for PLN, which doesn't
 794        * appear in *classes.
 795        */
 796       int aligned_pairs_class;
 797
 798       /** @} */
 799    } wm;
 800
 801
 802    struct {
 803       uint32_t state_offset;
 804       uint32_t blend_state_offset;
 805       uint32_t depth_stencil_state_offset;
 806       uint32_t vp_offset;
 807    } cc;
 808
 809    struct {
 810       struct brw_query_object *obj;
 811       drm_intel_bo *bo;
 812       int index;
 813       GLboolean active;
 814    } query;
 815    /* Used to give every program string a unique id
 816     */
 817    GLuint program_id;
 818
 819    int num_prepare_atoms, num_emit_atoms;
 820    struct brw_tracked_state prepare_atoms[64], emit_atoms[64];
 821
 822    /* If (INTEL_DEBUG & DEBUG_BATCH) */
 823    struct {
 824       uint32_t offset;
 825       uint32_t size;
 826       enum state_struct_type type;
 827    } *state_batch_list;
 828    int state_batch_count;
 829 };
 830
 831
 832 #define BRW_PACKCOLOR8888(r,g,b,a)  ((r<<24) | (g<<16) | (b<<8) | a)
 833
 834 struct brw_instruction_info {
 835     char    *name;
 836     int     nsrc;
 837     int     ndst;
 838     GLboolean is_arith;
 839 };
 840 extern const struct brw_instruction_info brw_opcodes[128];
 841
 842 /*======================================================================
 843  * brw_vtbl.c
 844  */
 845 void brwInitVtbl( struct brw_context *brw );
 846
 847 /*======================================================================
 848  * brw_context.c
 849  */
 850 GLboolean brwCreateContext( int api,
 851                             const struct gl_config *mesaVis,
 852                             __DRIcontext *driContextPriv,
 853                             void *sharedContextPrivate);
 854
 855 /*======================================================================
 856  * brw_queryobj.c
 857  */
 858 void brw_init_queryobj_functions(struct dd_function_table *functions);
 859 void brw_prepare_query_begin(struct brw_context *brw);
 860 void brw_emit_query_begin(struct brw_context *brw);
 861 void brw_emit_query_end(struct brw_context *brw);
 862
 863 /*======================================================================
 864  * brw_state_dump.c
 865  */
 866 void brw_debug_batch(struct intel_context *intel);
 867
 868 /*======================================================================
 869  * brw_tex.c
 870  */
 871 void brw_validate_textures( struct brw_context *brw );
 872
 873
 874 /*======================================================================
 875  * brw_program.c
 876  */
 877 void brwInitFragProgFuncs( struct dd_function_table *functions );
 878
 879 int brw_get_scratch_size(int size);
 880 void brw_get_scratch_bo(struct intel_context *intel,
 881                         drm_intel_bo **scratch_bo, int size);
 882
 883
 884 /* brw_urb.c
 885  */
 886 void brw_upload_urb_fence(struct brw_context *brw);
 887
 888 /* brw_curbe.c
 889  */
 890 void brw_upload_cs_urb_state(struct brw_context *brw);
 891
 892 /* brw_disasm.c */
 893 int brw_disasm (FILE *file, struct brw_instruction *inst, int gen);
 894
 895 /*======================================================================
 896  * Inline conversion functions.  These are better-typed than the
 897  * macros used previously:
 898  */
 899 static INLINE struct brw_context *
 900 brw_context( struct gl_context *ctx )
 901 {
 902    return (struct brw_context *)ctx;
 903 }
 904
 905 static INLINE struct brw_vertex_program *
 906 brw_vertex_program(struct gl_vertex_program *p)
 907 {
 908    return (struct brw_vertex_program *) p;
 909 }
 910
 911 static INLINE const struct brw_vertex_program *
 912 brw_vertex_program_const(const struct gl_vertex_program *p)
 913 {
 914    return (const struct brw_vertex_program *) p;
 915 }
 916
 917 static INLINE struct brw_fragment_program *
 918 brw_fragment_program(struct gl_fragment_program *p)
 919 {
 920    return (struct brw_fragment_program *) p;
 921 }
 922
 923 static INLINE const struct brw_fragment_program *
 924 brw_fragment_program_const(const struct gl_fragment_program *p)
 925 {
 926    return (const struct brw_fragment_program *) p;
 927 }
 928
 929 static inline
 930 float convert_param(enum param_conversion conversion, const float *param)
 931 {
 932    union {
 933       float f;
 934       uint32_t u;
 935       int32_t i;
 936    } fi;
 937
 938    switch (conversion) {
 939    case PARAM_NO_CONVERT:
 940       return *param;
 941    case PARAM_CONVERT_F2I:
 942       fi.i = *param;
 943       return fi.f;
 944    case PARAM_CONVERT_F2U:
 945       fi.u = *param;
 946       return fi.f;
 947    case PARAM_CONVERT_F2B:
 948       if (*param != 0.0)
 949          fi.i = 1;
 950       else
 951          fi.i = 0;
 952       return fi.f;
 953    case PARAM_CONVERT_ZERO:
 954       return 0.0;
 955    default:
 956       return *param;
 957    }
 958 }
 959
 960 /**
 961  * Pre-gen6, the register file of the EUs was shared between threads,
 962  * and each thread used some subset allocated on a 16-register block
 963  * granularity.  The unit states wanted these block counts.
 964  */
 965 static inline int
 966 brw_register_blocks(int reg_count)
 967 {
 968    return ALIGN(reg_count, 16) / 16 - 1;
 969 }
 970
 971 static inline uint32_t
 972 brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
 973                   uint32_t prog_offset)
 974 {
 975    struct intel_context *intel = &brw->intel;
 976
 977    if (intel->gen >= 5) {
 978       /* Using state base address. */
 979       return prog_offset;
 980    }
 981
 982    drm_intel_bo_emit_reloc(intel->batch.bo,
 983                            state_offset,
 984                            brw->cache.bo,
 985                            prog_offset,
 986                            I915_GEM_DOMAIN_INSTRUCTION, 0);
 987
 988    return brw->cache.bo->offset + prog_offset;
 989 }
 990
 991 GLboolean brw_do_cubemap_normalize(struct exec_list *instructions);
 992
 993 #endif