2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #ifndef BRWCONTEXT_INC
34 #define BRWCONTEXT_INC
36 #include "brw_structs.h"
37 #include "brw_winsys.h"
39 #include "pipe/p_state.h"
40 #include "pipe/p_context.h"
41 #include "tgsi/tgsi_scan.h"
46 * URB - uniform resource buffer. A mid-sized buffer which is
47 * partitioned between the fixed function units and used for passing
48 * values (vertices, primitives, constants) between them.
50 * CURBE - constant URB entry. An urb region (entry) used to hold
51 * constant values which the fixed function units can be instructed to
52 * preload into the GRF when spawning a thread.
54 * VUE - vertex URB entry. An urb entry holding a vertex and usually
55 * a vertex header. The header contains control information and
56 * things like primitive type, Begin/end flags and clip codes.
58 * PUE - primitive URB entry. An urb entry produced by the setup (SF)
59 * unit holding rasterization and interpolation parameters.
61 * GRF - general register file. One of several register files
62 * addressable by programmed threads. The inputs (r0, payload, curbe,
63 * urb) of the thread are preloaded to this area before the thread is
64 * spawned. The registers are individually 8 dwords wide and suitable
65 * for general usage. Registers holding thread input values are not
66 * special and may be overwritten.
68 * MRF - message register file. Threads communicate (and terminate)
69 * by sending messages. Message parameters are placed in contiguous
70 * MRF registers. All program output is via these messages. URB
71 * entries are populated by sending a message to the shared URB
72 * function containing the new data, together with a control word,
73 * often an unmodified copy of R0.
75 * R0 - GRF register 0. Typically holds control information used when
76 * sending messages to other threads.
78 * EU or GEN4 EU: The name of the programmable subsystem of the
79 * i965 hardware. Threads are executed by the EU, the registers
80 * described above are part of the EU architecture.
82 * Fixed function units:
84 * CS - Command streamer. Notional first unit, little software
85 * interaction. Holds the URB entries used for constant data, ie the
88 * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of
89 * this unit is responsible for pulling vertices out of vertex buffers
90 * in vram and injecting them into the processing pipe as VUEs. If
91 * enabled, it first passes them to a VS thread which is a good place
92 * for the driver to implement any active vertex shader.
94 * GS - Geometry Shader. This corresponds to a new DX10 concept. If
95 * enabled, incoming strips etc are passed to GS threads in individual
96 * line/triangle/point units. The GS thread may perform arbitary
97 * computation and emit whatever primtives with whatever vertices it
98 * chooses. This makes GS an excellent place to implement GL's
99 * unfilled polygon modes, though of course it is capable of much
100 * more. Additionally, GS is used to translate away primitives not
101 * handled by latter units, including Quads and Lineloops.
103 * CS - Clipper. Mesa's clipping algorithms are imported to run on
104 * this unit. The fixed function part performs cliptesting against
105 * the 6 fixed clipplanes and makes descisions on whether or not the
106 * incoming primitive needs to be passed to a thread for clipping.
107 * User clip planes are handled via cooperation with the VS thread.
109 * SF - Strips Fans or Setup: Triangles are prepared for
110 * rasterization. Interpolation coefficients are calculated.
111 * Flatshading and two-side lighting usually performed here.
113 * WM - Windower. Interpolation of vertex attributes performed here.
114 * Fragment shader implemented here. SIMD aspects of EU taken full
115 * advantage of, as pixels are processed in blocks of 16.
117 * CC - Color Calculator. No EU threads associated with this unit.
118 * Handles blending and (presumably) depth and stencil testing.
121 #define BRW_MAX_CURBE (32*16)
125 struct brw_depth_stencil_state
{
126 //struct pipe_depth_stencil_alpha_state templ; /* for draw module */
128 /* Precalculated hardware state:
138 struct brw_blend_state
{
139 //struct pipe_depth_stencil_alpha_state templ; /* for draw module */
141 /* Precalculated hardware state:
150 struct brw_rasterizer_state
;
153 struct brw_vertex_shader
{
154 const struct tgsi_token
*tokens
;
155 struct tgsi_shader_info info
;
157 struct brw_winsys_buffer
*const_buffer
; /** Program constant buffer/surface */
158 GLboolean use_const_buffer
;
162 struct brw_fragment_shader
{
163 const struct tgsi_token
*tokens
;
164 struct tgsi_shader_info info
;
168 struct brw_winsys_buffer
*const_buffer
; /** Program constant buffer/surface */
169 GLboolean use_const_buffer
;
174 #define PIPE_NEW_DEPTH_STENCIL_ALPHA 0x1
175 #define PIPE_NEW_RAST 0x2
176 #define PIPE_NEW_BLEND 0x4
177 #define PIPE_NEW_VIEWPORT 0x8
178 #define PIPE_NEW_SAMPLERS 0x10
179 #define PIPE_NEW_VERTEX_BUFFER 0x20
180 #define PIPE_NEW_VERTEX_ELEMENT 0x40
181 #define PIPE_NEW_FRAGMENT_SHADER 0x80
182 #define PIPE_NEW_VERTEX_SHADER 0x100
183 #define PIPE_NEW_FRAGMENT_CONSTANTS 0x200
184 #define PIPE_NEW_VERTEX_CONSTANTS 0x400
185 #define PIPE_NEW_CLIP 0x800
186 #define PIPE_NEW_INDEX_BUFFER 0x1000
187 #define PIPE_NEW_INDEX_RANGE 0x2000
188 #define PIPE_NEW_BLEND_COLOR 0x4000
189 #define PIPE_NEW_POLYGON_STIPPLE 0x8000
190 #define PIPE_NEW_FRAMEBUFFER_DIMENSIONS 0x10000
191 #define PIPE_NEW_DEPTH_BUFFER 0x20000
192 #define PIPE_NEW_COLOR_BUFFERS 0x40000
196 #define BRW_NEW_URB_FENCE 0x1
197 #define BRW_NEW_FRAGMENT_PROGRAM 0x2
198 #define BRW_NEW_VERTEX_PROGRAM 0x4
199 #define BRW_NEW_INPUT_DIMENSIONS 0x8
200 #define BRW_NEW_CURBE_OFFSETS 0x10
201 #define BRW_NEW_REDUCED_PRIMITIVE 0x20
202 #define BRW_NEW_PRIMITIVE 0x40
203 #define BRW_NEW_CONTEXT 0x80
204 #define BRW_NEW_WM_INPUT_DIMENSIONS 0x100
205 #define BRW_NEW_PSP 0x800
206 #define BRW_NEW_WM_SURFACES 0x1000
207 #define BRW_NEW_FENCE 0x2000
208 #define BRW_NEW_INDICES 0x4000
209 #define BRW_NEW_VERTICES 0x8000
211 * Used for any batch entry with a relocated pointer that will be used
212 * by any 3D rendering. Need to re-emit these fresh in each
213 * batchbuffer as the referenced buffers may be relocated in the
216 #define BRW_NEW_BATCH 0x10000
217 #define BRW_NEW_NR_WM_SURFACES 0x40000
218 #define BRW_NEW_NR_VS_SURFACES 0x80000
219 #define BRW_NEW_INDEX_BUFFER 0x100000
221 struct brw_state_flags
{
222 /** State update flags signalled by mesa internals */
225 * State update flags signalled as the result of brw_tracked_state updates
228 /** State update flags signalled by brw_state_cache.c searches */
234 /* Data about a particular attempt to compile a program. Note that
235 * there can be many of these, each in a different GL state
236 * corresponding to a different brw_wm_prog_key struct, with different
239 struct brw_wm_prog_data
{
240 GLuint curb_read_length
;
241 GLuint urb_read_length
;
243 GLuint first_curbe_grf
;
245 GLuint total_scratch
;
247 GLuint nr_params
; /**< number of float params/constants */
250 /* Pointer to tracked values (only valid once
251 * _mesa_load_state_parameters has been called at runtime).
253 const GLfloat
*param
[BRW_MAX_CURBE
];
256 struct brw_sf_prog_data
{
257 GLuint urb_read_length
;
260 /* Each vertex may have upto 12 attributes, 4 components each,
261 * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11
264 * Actually we use 4 for each, so call it 12 rows.
266 GLuint urb_entry_size
;
270 struct brw_clip_prog_data
;
272 struct brw_gs_prog_data
{
273 GLuint urb_read_length
;
277 struct brw_vs_prog_data
{
278 GLuint curb_read_length
;
279 GLuint urb_read_length
;
281 GLuint nr_outputs_written
;
282 GLuint nr_params
; /**< number of float params/constants */
286 /* Used for calculating urb partitions:
288 GLuint urb_entry_size
;
292 /* Size == 0 if output either not written, or always [0,0,0,1]
294 struct brw_vs_ouput_sizes
{
295 GLubyte output_size
[PIPE_MAX_SHADER_OUTPUTS
];
299 /** Number of texture sampler units */
300 #define BRW_MAX_TEX_UNIT 16
303 * Size of our surface binding table for the WM.
304 * This contains pointers to the drawing surfaces and current texture
305 * objects and shader constant buffers (+2).
307 #define BRW_WM_MAX_SURF (PIPE_MAX_COLOR_BUFS + BRW_MAX_TEX_UNIT + 1)
310 * Helpers to convert drawing buffers, textures and constant buffers
311 * to surface binding table indexes, for WM.
313 #define SURF_INDEX_DRAW(d) (d)
314 #define SURF_INDEX_FRAG_CONST_BUFFER (PIPE_MAX_COLOR_BUFS)
315 #define SURF_INDEX_TEXTURE(t) (PIPE_MAX_COLOR_BUFS + 1 + (t))
318 * Size of surface binding table for the VS.
319 * Only one constant buffer for now.
321 #define BRW_VS_MAX_SURF 1
324 * Only a VS constant buffer
326 #define SURF_INDEX_VERT_CONST_BUFFER 0
333 BRW_SAMPLER_DEFAULT_COLOR
,
352 struct brw_cache_item
{
354 * Effectively part of the key, cache_id identifies what kind of state
355 * buffer is involved, and also which brw->state.dirty.cache flag should
356 * be set when this cache item is chosen.
358 enum brw_cache_id cache_id
;
359 /** 32-bit hash of the key data */
361 GLuint key_size
; /* for variable-sized keys */
363 struct brw_winsys_buffer
**reloc_bufs
;
364 GLuint nr_reloc_bufs
;
366 struct brw_winsys_buffer
*bo
;
369 struct brw_cache_item
*next
;
375 struct brw_context
*brw
;
377 struct brw_cache_item
**items
;
378 GLuint size
, n_items
;
380 GLuint key_size
[BRW_MAX_CACHE
]; /* for fixed-size keys */
381 GLuint aux_size
[BRW_MAX_CACHE
];
382 char *name
[BRW_MAX_CACHE
];
384 /* Record of the last BOs chosen for each cache_id. Used to set
385 * brw->state.dirty.cache when a new cache item is chosen.
387 struct brw_winsys_buffer
*last_bo
[BRW_MAX_CACHE
];
391 struct brw_tracked_state
{
392 struct brw_state_flags dirty
;
393 int (*prepare
)( struct brw_context
*brw
);
394 int (*emit
)( struct brw_context
*brw
);
397 /* Flags for brw->state.cache.
399 #define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
400 #define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
401 #define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
402 #define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR)
403 #define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER)
404 #define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT)
405 #define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG)
406 #define CACHE_NEW_SF_VP (1<<BRW_SF_VP)
407 #define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT)
408 #define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT)
409 #define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG)
410 #define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT)
411 #define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG)
412 #define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP)
413 #define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT)
414 #define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG)
415 #define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE)
416 #define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND)
418 struct brw_cached_batch_item
{
419 struct header
*header
;
421 struct brw_cached_batch_item
*next
;
426 /* Protect against a future where VERT_ATTRIB_MAX > 32. Wouldn't life
427 * be easier if C allowed arrays of packed elements?
429 #define VS_INPUT_BITMASK_DWORDS ((PIPE_MAX_SHADER_INPUTS+31)/32)
434 struct brw_vertex_info
{
435 GLuint sizes
[VS_INPUT_BITMASK_DWORDS
* 2]; /* sizes:2[VERT_ATTRIB_MAX] */
439 struct brw_query_object
{
440 /** Doubly linked list of active query objects in the context. */
441 struct brw_query_object
*prev
, *next
;
443 /** Last query BO associated with this query. */
444 struct brw_winsys_buffer
*bo
;
445 /** First index in bo with query data for this object. */
447 /** Last index in bo with query data for this object. */
450 /* Total count of pixels from previous BOs */
456 * brw_context is derived from pipe_context
460 struct pipe_context base
;
461 struct brw_chipset chipset
;
463 struct brw_screen
*brw_screen
;
464 struct brw_winsys_screen
*sws
;
466 struct brw_batchbuffer
*batch
;
469 GLuint reduced_primitive
;
471 /* Active state from the state tracker:
474 const struct brw_vertex_shader
*vertex_shader
;
475 const struct brw_fragment_shader
*fragment_shader
;
476 const struct brw_blend_state
*blend
;
477 const struct brw_rasterizer_state
*rast
;
478 const struct brw_depth_stencil_state
*zstencil
;
480 struct pipe_vertex_element vertex_element
[PIPE_MAX_ATTRIBS
];
481 struct pipe_vertex_buffer vertex_buffer
[PIPE_MAX_ATTRIBS
];
482 unsigned num_vertex_elements
;
483 unsigned num_vertex_buffers
;
485 struct pipe_framebuffer_state fb
;
486 struct pipe_viewport_state vp
;
487 struct pipe_clip_state ucp
;
488 struct pipe_buffer
*vertex_constants
;
489 struct pipe_buffer
*fragment_constants
;
491 struct pipe_viewport_state viewport
;
492 struct brw_blend_constant_color bcc
;
493 struct brw_polygon_stipple bps
;
496 * Index buffer for this draw_prims call.
498 * Updates are signaled by PIPE_NEW_INDEX_BUFFER.
500 struct pipe_buffer
*index_buffer
;
503 /* Updates are signalled by PIPE_NEW_INDEX_RANGE:
511 struct brw_state_flags dirty
;
514 * List of buffers accumulated in brw_validate_state to receive
515 * dri_bo_check_aperture treatment before exec, so we can know if we
516 * should flush the batch and try again before emitting primitives.
518 * This can be a fixed number as we only have a limited number of
519 * objects referenced from the batchbuffer in a primitive emit,
520 * consisting of the vertex buffers, pipelined state pointers,
521 * the CURBE, the depth buffer, and a query BO.
523 struct brw_winsys_buffer
*validated_bos
[PIPE_MAX_SHADER_INPUTS
+ 16];
524 int validated_bo_count
;
527 struct brw_cache cache
; /** non-surface items */
528 struct brw_cache surface_cache
; /* surface items */
529 struct brw_cached_batch_item
*cached_batch_items
;
532 struct u_upload_mgr
*upload_vertex
;
533 struct u_upload_mgr
*upload_index
;
535 /* Information on uploaded vertex buffers:
538 unsigned stride
; /* in bytes between successive vertices */
539 unsigned offset
; /* in bytes, of first vertex in bo */
540 unsigned vertex_count
; /* count of valid vertices which may be accessed */
541 struct brw_winsys_buffer
*bo
;
542 } vb
[PIPE_MAX_ATTRIBS
];
545 } ve
[PIPE_MAX_ATTRIBS
];
547 unsigned nr_vb
; /* currently the same as curr.num_vertex_buffers */
548 unsigned nr_ve
; /* currently the same as curr.num_vertex_elements */
552 /* Updates to these fields are signaled by BRW_NEW_INDEX_BUFFER. */
553 struct brw_winsys_buffer
*bo
;
556 /* Offset to index buffer index to use in CMD_3D_PRIM so that we can
557 * avoid re-uploading the IB packet over and over if we're actually
558 * referencing the same index buffer.
560 unsigned int start_vertex_offset
;
564 /* BRW_NEW_URB_ALLOCATIONS:
567 GLuint vsize
; /* vertex size plus header in urb registers */
568 GLuint csize
; /* constant buffer size in urb registers */
569 GLuint sfsize
; /* setup data size in urb registers */
571 GLboolean constrained
;
573 GLuint nr_vs_entries
;
574 GLuint nr_gs_entries
;
575 GLuint nr_clip_entries
;
576 GLuint nr_sf_entries
;
577 GLuint nr_cs_entries
;
587 /* BRW_NEW_CURBE_OFFSETS:
590 GLuint wm_start
; /**< pos of first wm const in CURBE buffer */
591 GLuint wm_size
; /**< number of float[4] consts, multiple of 16 */
598 struct brw_winsys_buffer
*curbe_bo
;
599 /** Offset within curbe_bo of space for current curbe entry */
601 /** Offset within curbe_bo of space for next curbe entry */
602 GLuint curbe_next_offset
;
607 * Whether we should create a new bo instead of reusing the old one
608 * (if we just dispatch the batch pointing at the old one.
610 GLboolean need_new_bo
;
614 struct brw_vs_prog_data
*prog_data
;
616 struct brw_winsys_buffer
*prog_bo
;
617 struct brw_winsys_buffer
*state_bo
;
619 /** Binding table of pointers to surf_bo entries */
620 struct brw_winsys_buffer
*bind_bo
;
621 struct brw_winsys_buffer
*surf_bo
[BRW_VS_MAX_SURF
];
626 struct brw_gs_prog_data
*prog_data
;
628 GLboolean prog_active
;
629 struct brw_winsys_buffer
*prog_bo
;
630 struct brw_winsys_buffer
*state_bo
;
634 struct brw_clip_prog_data
*prog_data
;
636 struct brw_winsys_buffer
*prog_bo
;
637 struct brw_winsys_buffer
*state_bo
;
638 struct brw_winsys_buffer
*vp_bo
;
643 struct brw_sf_prog_data
*prog_data
;
645 struct brw_winsys_buffer
*prog_bo
;
646 struct brw_winsys_buffer
*state_bo
;
647 struct brw_winsys_buffer
*vp_bo
;
651 struct brw_wm_prog_data
*prog_data
;
652 struct brw_wm_compile
*compile_data
;
654 /** Input sizes, calculated from active vertex program.
655 * One bit per fragment program input attribute.
657 //GLbitfield input_size_masks[4];
659 /** Array of surface default colors (texture border color) */
660 struct brw_winsys_buffer
*sdc_bo
[BRW_MAX_TEX_UNIT
];
666 struct brw_winsys_buffer
*scratch_bo
;
668 GLuint sampler_count
;
669 struct brw_winsys_buffer
*sampler_bo
;
671 /** Binding table of pointers to surf_bo entries */
672 struct brw_winsys_buffer
*bind_bo
;
673 struct brw_winsys_buffer
*surf_bo
[PIPE_MAX_COLOR_BUFS
];
675 struct brw_winsys_buffer
*prog_bo
;
676 struct brw_winsys_buffer
*state_bo
;
681 struct brw_winsys_buffer
*prog_bo
;
682 struct brw_winsys_buffer
*state_bo
;
683 struct brw_winsys_buffer
*vp_bo
;
687 struct brw_query_object active_head
;
688 struct brw_winsys_buffer
*bo
;
694 unsigned always_emit_state
:1;
695 unsigned always_flush_batch
:1;
696 unsigned force_swtnl
:1;
700 /* Used to give every program string a unique id
707 /*======================================================================
710 void brw_init_query(struct brw_context
*brw
);
711 void brw_prepare_query_begin(struct brw_context
*brw
);
712 void brw_emit_query_begin(struct brw_context
*brw
);
713 void brw_emit_query_end(struct brw_context
*brw
);
715 /*======================================================================
718 void brw_debug_batch(struct brw_context
*intel
);
721 /*======================================================================
724 void brw_pipe_blend_init( struct brw_context
*brw
);
725 void brw_pipe_depth_stencil_init( struct brw_context
*brw
);
726 void brw_pipe_framebuffer_init( struct brw_context
*brw
);
727 void brw_pipe_flush_init( struct brw_context
*brw
);
728 void brw_pipe_misc_init( struct brw_context
*brw
);
729 void brw_pipe_query_init( struct brw_context
*brw
);
730 void brw_pipe_rast_init( struct brw_context
*brw
);
731 void brw_pipe_sampler_init( struct brw_context
*brw
);
732 void brw_pipe_shader_init( struct brw_context
*brw
);
733 void brw_pipe_vertex_init( struct brw_context
*brw
);
735 void brw_pipe_blend_cleanup( struct brw_context
*brw
);
736 void brw_pipe_depth_stencil_cleanup( struct brw_context
*brw
);
737 void brw_pipe_framebuffer_cleanup( struct brw_context
*brw
);
738 void brw_pipe_flush_cleanup( struct brw_context
*brw
);
739 void brw_pipe_misc_cleanup( struct brw_context
*brw
);
740 void brw_pipe_query_cleanup( struct brw_context
*brw
);
741 void brw_pipe_rast_cleanup( struct brw_context
*brw
);
742 void brw_pipe_sampler_cleanup( struct brw_context
*brw
);
743 void brw_pipe_shader_cleanup( struct brw_context
*brw
);
744 void brw_pipe_vertex_cleanup( struct brw_context
*brw
);
749 int brw_upload_urb_fence(struct brw_context
*brw
);
753 int brw_upload_cs_urb_state(struct brw_context
*brw
);
756 int brw_disasm (FILE *file
, struct brw_instruction
*inst
);
758 /*======================================================================
759 * Inline conversion functions. These are better-typed than the
760 * macros used previously:
762 static INLINE
struct brw_context
*
763 brw_context( struct pipe_context
*ctx
)
765 return (struct brw_context
*)ctx
;
769 #define BRW_IS_965(brw) ((brw)->chipset.is_965)
770 #define BRW_IS_IGDNG(brw) ((brw)->chipset.is_igdng)
771 #define BRW_IS_G4X(brw) ((brw)->chipset.is_g4x)