Merge commit 'origin/gallium-master-merge'
[mesa.git] / src / gallium / drivers / i965simple / brw_context.h
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #ifndef BRWCONTEXT_INC
34 #define BRWCONTEXT_INC
35
36
37 #include "pipe/p_context.h"
38 #include "pipe/p_defines.h"
39 #include "pipe/p_state.h"
40
41 #include "tgsi/tgsi_scan.h"
42
43 #include "brw_structs.h"
44 #include "brw_winsys.h"
45
46
47 /* Glossary:
48 *
49 * URB - uniform resource buffer. A mid-sized buffer which is
50 * partitioned between the fixed function units and used for passing
51 * values (vertices, primitives, constants) between them.
52 *
53 * CURBE - constant URB entry. An urb region (entry) used to hold
54 * constant values which the fixed function units can be instructed to
55 * preload into the GRF when spawining a thread.
56 *
57 * VUE - vertex URB entry. An urb entry holding a vertex and usually
58 * a vertex header. The header contains control information and
59 * things like primitive type, Begin/end flags and clip codes.
60 *
61 * PUE - primitive URB entry. An urb entry produced by the setup (SF)
62 * unit holding rasterization and interpolation parameters.
63 *
64 * GRF - general register file. One of several register files
65 * addressable by programmed threads. The inputs (r0, payload, curbe,
66 * urb) of the thread are preloaded to this area before the thread is
67 * spawned. The registers are individually 8 dwords wide and suitable
68 * for general usage. Registers holding thread input values are not
69 * special and may be overwritten.
70 *
71 * MRF - message register file. Threads communicate (and terminate)
72 * by sending messages. Message parameters are placed in contigous
73 * MRF registers. All program output is via these messages. URB
74 * entries are populated by sending a message to the shared URB
75 * function containing the new data, together with a control word,
76 * often an unmodified copy of R0.
77 *
78 * R0 - GRF register 0. Typically holds control information used when
79 * sending messages to other threads.
80 *
81 * EU or GEN4 EU: The name of the programmable subsystem of the
82 * i965 hardware. Threads are executed by the EU, the registers
83 * described above are part of the EU architecture.
84 *
85 * Fixed function units:
86 *
87 * CS - Command streamer. Notional first unit, little software
88 * interaction. Holds the URB entries used for constant data, ie the
89 * CURBEs.
90 *
91 * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of
92 * this unit is responsible for pulling vertices out of vertex buffers
93 * in vram and injecting them into the processing pipe as VUEs. If
94 * enabled, it first passes them to a VS thread which is a good place
95 * for the driver to implement any active vertex shader.
96 *
97 * GS - Geometry Shader. This corresponds to a new DX10 concept. If
98 * enabled, incoming strips etc are passed to GS threads in individual
99 * line/triangle/point units. The GS thread may perform arbitary
100 * computation and emit whatever primtives with whatever vertices it
101 * chooses. This makes GS an excellent place to implement GL's
102 * unfilled polygon modes, though of course it is capable of much
103 * more. Additionally, GS is used to translate away primitives not
104 * handled by latter units, including Quads and Lineloops.
105 *
106 * CS - Clipper. Mesa's clipping algorithms are imported to run on
107 * this unit. The fixed function part performs cliptesting against
108 * the 6 fixed clipplanes and makes descisions on whether or not the
109 * incoming primitive needs to be passed to a thread for clipping.
110 * User clip planes are handled via cooperation with the VS thread.
111 *
112 * SF - Strips Fans or Setup: Triangles are prepared for
113 * rasterization. Interpolation coefficients are calculated.
114 * Flatshading and two-side lighting usually performed here.
115 *
116 * WM - Windower. Interpolation of vertex attributes performed here.
117 * Fragment shader implemented here. SIMD aspects of EU taken full
118 * advantage of, as pixels are processed in blocks of 16.
119 *
120 * CC - Color Calculator. No EU threads associated with this unit.
121 * Handles blending and (presumably) depth and stencil testing.
122 */
123
124 #define BRW_MAX_CURBE (32*16)
125
126 struct brw_context;
127 struct brw_winsys;
128
129
130 /* Raised when we receive new state across the pipe interface:
131 */
132 #define BRW_NEW_VIEWPORT 0x1
133 #define BRW_NEW_RASTERIZER 0x2
134 #define BRW_NEW_FS 0x4
135 #define BRW_NEW_BLEND 0x8
136 #define BRW_NEW_CLIP 0x10
137 #define BRW_NEW_SCISSOR 0x20
138 #define BRW_NEW_STIPPLE 0x40
139 #define BRW_NEW_FRAMEBUFFER 0x80
140 #define BRW_NEW_ALPHA_TEST 0x100
141 #define BRW_NEW_DEPTH_STENCIL 0x200
142 #define BRW_NEW_SAMPLER 0x400
143 #define BRW_NEW_TEXTURE 0x800
144 #define BRW_NEW_CONSTANTS 0x1000
145 #define BRW_NEW_VBO 0x2000
146 #define BRW_NEW_VS 0x4000
147
148 /* Raised for other internal events:
149 */
150 #define BRW_NEW_URB_FENCE 0x10000
151 #define BRW_NEW_PSP 0x20000
152 #define BRW_NEW_CURBE_OFFSETS 0x40000
153 #define BRW_NEW_REDUCED_PRIMITIVE 0x80000
154 #define BRW_NEW_PRIMITIVE 0x100000
155 #define BRW_NEW_SCENE 0x200000
156 #define BRW_NEW_SF_LINKAGE 0x400000
157
158 extern int BRW_DEBUG;
159
160 #define DEBUG_TEXTURE 0x1
161 #define DEBUG_STATE 0x2
162 #define DEBUG_IOCTL 0x4
163 #define DEBUG_PRIMS 0x8
164 #define DEBUG_VERTS 0x10
165 #define DEBUG_FALLBACKS 0x20
166 #define DEBUG_VERBOSE 0x40
167 #define DEBUG_DRI 0x80
168 #define DEBUG_DMA 0x100
169 #define DEBUG_SANITY 0x200
170 #define DEBUG_SYNC 0x400
171 #define DEBUG_SLEEP 0x800
172 #define DEBUG_PIXEL 0x1000
173 #define DEBUG_STATS 0x2000
174 #define DEBUG_TILE 0x4000
175 #define DEBUG_SINGLE_THREAD 0x8000
176 #define DEBUG_WM 0x10000
177 #define DEBUG_URB 0x20000
178 #define DEBUG_VS 0x40000
179 #define DEBUG_BATCH 0x80000
180 #define DEBUG_BUFMGR 0x100000
181 #define DEBUG_BLIT 0x200000
182 #define DEBUG_REGION 0x400000
183 #define DEBUG_MIPTREE 0x800000
184
185 #define DBG(...) do { \
186 if (BRW_DEBUG & FILE_DEBUG_FLAG) \
187 debug_printf(__VA_ARGS__); \
188 } while(0)
189
190 #define PRINT(...) do { \
191 debug_printf(__VA_ARGS__); \
192 } while(0)
193
194 struct brw_state_flags {
195 unsigned cache;
196 unsigned brw;
197 };
198
199
200 struct brw_vertex_program {
201 struct pipe_shader_state program;
202 struct tgsi_shader_info info;
203 int id;
204 };
205
206
207 struct brw_fragment_program {
208 struct pipe_shader_state program;
209 struct tgsi_shader_info info;
210
211 boolean UsesDepth; /* XXX add this to tgsi_shader_info? */
212 int id;
213 };
214
215
216 struct pipe_setup_linkage {
217 struct {
218 unsigned vp_output:5;
219 unsigned interp_mode:4;
220 unsigned bf_vp_output:5;
221 } fp_input[PIPE_MAX_SHADER_INPUTS];
222
223 unsigned fp_input_count:5;
224 unsigned max_vp_output:5;
225 };
226
227
228
229 struct brw_texture {
230 struct pipe_texture base;
231
232 /* Derived from the above:
233 */
234 unsigned stride;
235 unsigned depth_pitch; /* per-image on i945? */
236 unsigned total_nblocksy;
237
238 unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS];
239
240 /* Explicitly store the offset of each image for each cube face or
241 * depth value. Pretty much have to accept that hardware formats
242 * are going to be so diverse that there is no unified way to
243 * compute the offsets of depth/cube images within a mipmap level,
244 * so have to store them as a lookup table:
245 */
246 unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */
247
248 /* Includes image offset tables:
249 */
250 unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS];
251
252 /* The data is held here:
253 */
254 struct pipe_buffer *buffer;
255 };
256
257 /* Data about a particular attempt to compile a program. Note that
258 * there can be many of these, each in a different GL state
259 * corresponding to a different brw_wm_prog_key struct, with different
260 * compiled programs:
261 */
262 /* Data about a particular attempt to compile a program. Note that
263 * there can be many of these, each in a different GL state
264 * corresponding to a different brw_wm_prog_key struct, with different
265 * compiled programs:
266 */
267
268 struct brw_wm_prog_data {
269 unsigned curb_read_length;
270 unsigned urb_read_length;
271
272 unsigned first_curbe_grf;
273 unsigned total_grf;
274 unsigned total_scratch;
275
276 /* Internally generated constants for the CURBE. These are loaded
277 * ahead of the data from the constant buffer.
278 */
279 const float internal_const[8];
280 unsigned nr_internal_consts;
281 unsigned max_const;
282
283 boolean error;
284 };
285
286 struct brw_sf_prog_data {
287 unsigned urb_read_length;
288 unsigned total_grf;
289
290 /* Each vertex may have upto 12 attributes, 4 components each,
291 * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11
292 * rows.
293 *
294 * Actually we use 4 for each, so call it 12 rows.
295 */
296 unsigned urb_entry_size;
297 };
298
299 struct brw_clip_prog_data {
300 unsigned curb_read_length; /* user planes? */
301 unsigned clip_mode;
302 unsigned urb_read_length;
303 unsigned total_grf;
304 };
305
306 struct brw_gs_prog_data {
307 unsigned urb_read_length;
308 unsigned total_grf;
309 };
310
311 struct brw_vs_prog_data {
312 unsigned curb_read_length;
313 unsigned urb_read_length;
314 unsigned total_grf;
315 unsigned outputs_written;
316
317 unsigned inputs_read;
318
319 unsigned max_const;
320
321 float imm_buf[PIPE_MAX_CONSTANT][4];
322 unsigned num_imm;
323 unsigned num_consts;
324
325 /* Used for calculating urb partitions:
326 */
327 unsigned urb_entry_size;
328 };
329
330
331 #define BRW_MAX_TEX_UNIT 8
332 #define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1
333
334 /* Create a fixed sized struct for caching binding tables:
335 */
336 struct brw_surface_binding_table {
337 unsigned surf_ss_offset[BRW_WM_MAX_SURF];
338 };
339
340
341 struct brw_cache;
342
343 struct brw_mem_pool {
344 struct pipe_buffer *buffer;
345
346 unsigned size;
347 unsigned offset; /* offset of first free byte */
348
349 struct brw_context *brw;
350 };
351
352 struct brw_cache_item {
353 unsigned hash;
354 unsigned key_size; /* for variable-sized keys */
355 const void *key;
356
357 unsigned offset; /* offset within pool's buffer */
358 unsigned data_size;
359
360 struct brw_cache_item *next;
361 };
362
363
364
365 struct brw_cache {
366 unsigned id;
367
368 const char *name;
369
370 struct brw_context *brw;
371 struct brw_mem_pool *pool;
372
373 struct brw_cache_item **items;
374 unsigned size, n_items;
375
376 unsigned key_size; /* for fixed-size keys */
377 unsigned aux_size;
378
379 unsigned last_addr; /* offset of active item */
380 };
381
382
383
384
385 /* Considered adding a member to this struct to document which flags
386 * an update might raise so that ordering of the state atoms can be
387 * checked or derived at runtime. Dropped the idea in favor of having
388 * a debug mode where the state is monitored for flags which are
389 * raised that have already been tested against.
390 */
391 struct brw_tracked_state {
392 struct brw_state_flags dirty;
393 void (*update)( struct brw_context *brw );
394 };
395
396
397 /* Flags for brw->state.cache.
398 */
399 #define CACHE_NEW_CC_VP (1<<BRW_CC_VP)
400 #define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT)
401 #define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG)
402 #define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR)
403 #define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER)
404 #define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT)
405 #define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG)
406 #define CACHE_NEW_SF_VP (1<<BRW_SF_VP)
407 #define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT)
408 #define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT)
409 #define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG)
410 #define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT)
411 #define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG)
412 #define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP)
413 #define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT)
414 #define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG)
415 #define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE)
416 #define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND)
417
418
419
420
421 enum brw_mempool_id {
422 BRW_GS_POOL,
423 BRW_SS_POOL,
424 BRW_MAX_POOL
425 };
426
427
428 struct brw_cached_batch_item {
429 struct header *header;
430 unsigned sz;
431 struct brw_cached_batch_item *next;
432 };
433
434
435
436 /* Protect against a future where PIPE_MAX_ATTRIBS > 32. Wouldn't life
437 * be easier if C allowed arrays of packed elements?
438 */
439 #define ATTRIB_BIT_DWORDS ((PIPE_MAX_ATTRIBS+31)/32)
440
441
442
443
444 struct brw_vertex_info {
445 unsigned varying; /* varying:1[PIPE_MAX_ATTRIBS] */
446 unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_MAX_ATTRIBS] */
447 };
448
449
450
451
452
453 struct brw_context
454 {
455 struct pipe_context pipe;
456 struct brw_winsys *winsys;
457
458 unsigned primitive;
459 unsigned reduced_primitive;
460
461 boolean emit_state_always;
462
463 struct {
464 struct brw_state_flags dirty;
465 } state;
466
467
468 struct {
469 const struct pipe_blend_state *Blend;
470 const struct pipe_depth_stencil_alpha_state *DepthStencil;
471 const struct pipe_poly_stipple *PolygonStipple;
472 const struct pipe_rasterizer_state *Raster;
473 const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS];
474 const struct brw_vertex_program *VertexProgram;
475 const struct brw_fragment_program *FragmentProgram;
476
477 struct pipe_clip_state Clip;
478 struct pipe_blend_color BlendColor;
479 struct pipe_scissor_state Scissor;
480 struct pipe_viewport_state Viewport;
481 struct pipe_framebuffer_state FrameBuffer;
482
483 const struct pipe_constant_buffer *Constants[2];
484 const struct brw_texture *Texture[PIPE_MAX_SAMPLERS];
485 } attribs;
486
487 unsigned num_samplers;
488 unsigned num_textures;
489
490 struct brw_mem_pool pool[BRW_MAX_POOL];
491 struct brw_cache cache[BRW_MAX_CACHE];
492 struct brw_cached_batch_item *cached_batch_items;
493
494 struct {
495
496 /* Arrays with buffer objects to copy non-bufferobj arrays into
497 * for upload:
498 */
499 const struct pipe_vertex_buffer *vbo_array[PIPE_MAX_ATTRIBS];
500
501 struct brw_vertex_element_state inputs[PIPE_MAX_ATTRIBS];
502
503 #define BRW_NR_UPLOAD_BUFS 17
504 #define BRW_UPLOAD_INIT_SIZE (128*1024)
505
506 /* Summary of size and varying of active arrays, so we can check
507 * for changes to this state:
508 */
509 struct brw_vertex_info info;
510 } vb;
511
512
513 unsigned hardware_dirty;
514 unsigned dirty;
515 unsigned pci_id;
516 /* BRW_NEW_URB_ALLOCATIONS:
517 */
518 struct {
519 unsigned vsize; /* vertex size plus header in urb registers */
520 unsigned csize; /* constant buffer size in urb registers */
521 unsigned sfsize; /* setup data size in urb registers */
522
523 boolean constrained;
524
525 unsigned nr_vs_entries;
526 unsigned nr_gs_entries;
527 unsigned nr_clip_entries;
528 unsigned nr_sf_entries;
529 unsigned nr_cs_entries;
530
531 /* unsigned vs_size; */
532 /* unsigned gs_size; */
533 /* unsigned clip_size; */
534 /* unsigned sf_size; */
535 /* unsigned cs_size; */
536
537 unsigned vs_start;
538 unsigned gs_start;
539 unsigned clip_start;
540 unsigned sf_start;
541 unsigned cs_start;
542 } urb;
543
544
545 /* BRW_NEW_CURBE_OFFSETS:
546 */
547 struct {
548 unsigned wm_start;
549 unsigned wm_size;
550 unsigned clip_start;
551 unsigned clip_size;
552 unsigned vs_start;
553 unsigned vs_size;
554 unsigned total_size;
555
556 unsigned gs_offset;
557
558 float *last_buf;
559 unsigned last_bufsz;
560 } curbe;
561
562 struct {
563 struct brw_vs_prog_data *prog_data;
564
565 unsigned prog_gs_offset;
566 unsigned state_gs_offset;
567 } vs;
568
569 struct {
570 struct brw_gs_prog_data *prog_data;
571
572 boolean prog_active;
573 unsigned prog_gs_offset;
574 unsigned state_gs_offset;
575 } gs;
576
577 struct {
578 struct brw_clip_prog_data *prog_data;
579
580 unsigned prog_gs_offset;
581 unsigned vp_gs_offset;
582 unsigned state_gs_offset;
583 } clip;
584
585
586 struct {
587 struct brw_sf_prog_data *prog_data;
588
589 struct pipe_setup_linkage linkage;
590
591 unsigned prog_gs_offset;
592 unsigned vp_gs_offset;
593 unsigned state_gs_offset;
594 } sf;
595
596 struct {
597 struct brw_wm_prog_data *prog_data;
598
599 // struct brw_wm_compiler *compile_data;
600
601
602 /**
603 * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER
604 * cache
605 */
606 struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT];
607
608 unsigned render_surf;
609 unsigned nr_surfaces;
610
611 unsigned max_threads;
612 struct pipe_buffer *scratch_buffer;
613 unsigned scratch_buffer_size;
614
615 unsigned sampler_count;
616 unsigned sampler_gs_offset;
617
618 struct brw_surface_binding_table bind;
619 unsigned bind_ss_offset;
620
621 unsigned prog_gs_offset;
622 unsigned state_gs_offset;
623 } wm;
624
625
626 struct {
627 unsigned vp_gs_offset;
628 unsigned state_gs_offset;
629 } cc;
630
631
632 /* Used to give every program string a unique id
633 */
634 unsigned program_id;
635 };
636
637
638 #define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a)
639
640
641 /*======================================================================
642 * brw_vtbl.c
643 */
644 void brw_do_flush( struct brw_context *brw,
645 unsigned flags );
646
647
648 /*======================================================================
649 * brw_state.c
650 */
651 void brw_validate_state(struct brw_context *brw);
652 void brw_init_state(struct brw_context *brw);
653 void brw_destroy_state(struct brw_context *brw);
654
655
656 /*======================================================================
657 * brw_tex.c
658 */
659 void brwUpdateTextureState( struct brw_context *brw );
660
661
662 /* brw_urb.c
663 */
664 void brw_upload_urb_fence(struct brw_context *brw);
665
666 void brw_upload_constant_buffer_state(struct brw_context *brw);
667
668 void brw_init_surface_functions(struct brw_context *brw);
669 void brw_init_state_functions(struct brw_context *brw);
670 void brw_init_flush_functions(struct brw_context *brw);
671 void brw_init_string_functions(struct brw_context *brw);
672
673 /*======================================================================
674 * Inline conversion functions. These are better-typed than the
675 * macros used previously:
676 */
677 static inline struct brw_context *
678 brw_context( struct pipe_context *ctx )
679 {
680 return (struct brw_context *)ctx;
681 }
682
683 #endif
684