2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
32 #include "util/u_memory.h"
33 #include "util/u_math.h"
35 #include "brw_batchbuffer.h"
36 #include "brw_context.h"
37 #include "brw_defines.h"
38 #include "brw_state.h"
40 #include "brw_debug.h"
41 #include "brw_screen.h"
45 * Partition the CURBE between the various users of constant values:
46 * Note that vertex and fragment shaders can now fetch constants out
47 * of constant buffers. We no longer allocatea block of the GRF for
48 * constants. That greatly reduces the demand for space in the CURBE.
49 * Some of the comments within are dated...
51 static int calculate_curbe_offsets( struct brw_context
*brw
)
53 /* CACHE_NEW_WM_PROG */
54 const GLuint nr_fp_regs
= (brw
->wm
.prog_data
->nr_params
+ 15) / 16;
56 /* BRW_NEW_VERTEX_PROGRAM */
57 const GLuint nr_vp_regs
= (brw
->vs
.prog_data
->nr_params
+ 15) / 16;
58 GLuint nr_clip_regs
= 0;
62 if (brw
->curr
.ucp
.nr
) {
63 GLuint nr_planes
= 6 + brw
->curr
.ucp
.nr
;
64 nr_clip_regs
= (nr_planes
* 4 + 15) / 16;
68 total_regs
= nr_fp_regs
+ nr_vp_regs
+ nr_clip_regs
;
70 /* When this is > 32, want to use a true constant buffer to hold
71 * the extra constants.
73 assert(total_regs
<= 32);
77 if (nr_fp_regs
> brw
->curbe
.wm_size
||
78 nr_vp_regs
> brw
->curbe
.vs_size
||
79 nr_clip_regs
!= brw
->curbe
.clip_size
||
80 (total_regs
< brw
->curbe
.total_size
/ 4 &&
81 brw
->curbe
.total_size
> 16)) {
85 /* Calculate a new layout:
88 brw
->curbe
.wm_start
= reg
;
89 brw
->curbe
.wm_size
= nr_fp_regs
; reg
+= nr_fp_regs
;
90 brw
->curbe
.clip_start
= reg
;
91 brw
->curbe
.clip_size
= nr_clip_regs
; reg
+= nr_clip_regs
;
92 brw
->curbe
.vs_start
= reg
;
93 brw
->curbe
.vs_size
= nr_vp_regs
; reg
+= nr_vp_regs
;
94 brw
->curbe
.total_size
= reg
;
96 if (BRW_DEBUG
& DEBUG_CURBE
)
97 debug_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
100 brw
->curbe
.clip_start
,
101 brw
->curbe
.clip_size
,
103 brw
->curbe
.vs_size
);
105 brw
->state
.dirty
.brw
|= BRW_NEW_CURBE_OFFSETS
;
112 const struct brw_tracked_state brw_curbe_offsets
= {
114 .mesa
= PIPE_NEW_CLIP
,
115 .brw
= BRW_NEW_VERTEX_PROGRAM
,
116 .cache
= CACHE_NEW_WM_PROG
118 .prepare
= calculate_curbe_offsets
124 /* Define the number of curbes within CS's urb allocation. Multiple
125 * urb entries -> multiple curbes. These will be used by
126 * fixed-function hardware in a double-buffering scheme to avoid a
127 * pipeline stall each time the contents of the curbe is changed.
129 int brw_upload_cs_urb_state(struct brw_context
*brw
)
131 struct brw_cs_urb_state cs_urb
;
132 memset(&cs_urb
, 0, sizeof(cs_urb
));
134 /* It appears that this is the state packet for the CS unit, ie. the
135 * urb entries detailed here are housed in the CS range from the
138 cs_urb
.header
.opcode
= CMD_CS_URB_STATE
;
139 cs_urb
.header
.length
= sizeof(cs_urb
)/4 - 2;
141 /* BRW_NEW_URB_FENCE */
142 cs_urb
.bits0
.nr_urb_entries
= brw
->urb
.nr_cs_entries
;
143 cs_urb
.bits0
.urb_entry_size
= brw
->urb
.csize
- 1;
145 assert(brw
->urb
.nr_cs_entries
);
146 BRW_CACHED_BATCH_STRUCT(brw
, &cs_urb
);
150 static GLfloat fixed_plane
[6][4] = {
159 /* Upload a new set of constants. Too much variability to go into the
160 * cache mechanism, but maybe would benefit from a comparison against
161 * the current uploaded set of constants.
163 static enum pipe_error
prepare_curbe_buffer(struct brw_context
*brw
)
165 const GLuint sz
= brw
->curbe
.total_size
;
166 const GLuint bufsz
= sz
* 16 * sizeof(GLfloat
);
172 if (brw
->curbe
.last_buf
) {
173 free(brw
->curbe
.last_buf
);
174 brw
->curbe
.last_buf
= NULL
;
175 brw
->curbe
.last_bufsz
= 0;
180 buf
= (GLfloat
*) CALLOC(bufsz
, 1);
182 /* fragment shader constants */
183 if (brw
->curbe
.wm_size
) {
184 GLuint offset
= brw
->curbe
.wm_start
* 16;
186 /* map fs constant buffer */
188 /* copy float constants */
189 for (i
= 0; i
< brw
->wm
.prog_data
->nr_params
; i
++)
190 buf
[offset
+ i
] = *brw
->wm
.prog_data
->param
[i
];
192 /* unmap fs constant buffer */
196 /* The clipplanes are actually delivered to both CLIP and VS units.
197 * VS uses them to calculate the outcode bitmasks.
199 if (brw
->curbe
.clip_size
) {
200 GLuint offset
= brw
->curbe
.clip_start
* 16;
203 /* If any planes are going this way, send them all this way:
205 for (i
= 0; i
< 6; i
++) {
206 buf
[offset
+ i
* 4 + 0] = fixed_plane
[i
][0];
207 buf
[offset
+ i
* 4 + 1] = fixed_plane
[i
][1];
208 buf
[offset
+ i
* 4 + 2] = fixed_plane
[i
][2];
209 buf
[offset
+ i
* 4 + 3] = fixed_plane
[i
][3];
214 assert(brw
->curr
.ucp
.nr
<= 6);
215 for (j
= 0; j
< brw
->curr
.ucp
.nr
; j
++) {
216 buf
[offset
+ i
* 4 + 0] = brw
->curr
.ucp
.ucp
[j
][0];
217 buf
[offset
+ i
* 4 + 1] = brw
->curr
.ucp
.ucp
[j
][1];
218 buf
[offset
+ i
* 4 + 2] = brw
->curr
.ucp
.ucp
[j
][2];
219 buf
[offset
+ i
* 4 + 3] = brw
->curr
.ucp
.ucp
[j
][3];
224 /* vertex shader constants */
225 if (brw
->curbe
.vs_size
) {
226 GLuint offset
= brw
->curbe
.vs_start
* 16;
227 GLuint nr
= brw
->curr
.vertex_shader
->info
.file_max
[TGSI_FILE_CONSTANT
];
228 struct pipe_screen
*screen
= brw
->base
.screen
;
230 const GLfloat
*value
= screen
->buffer_map( screen
,
231 brw
->curr
.vertex_constants
,
232 PIPE_BUFFER_USAGE_CPU_READ
);
234 /* XXX: what if user's constant buffer is too small?
236 memcpy(&buf
[offset
], value
, nr
* 4 * sizeof(float));
238 screen
->buffer_unmap( screen
, brw
->curr
.vertex_constants
);
241 if (BRW_DEBUG
& DEBUG_CURBE
) {
242 for (i
= 0; i
< sz
*16; i
+=4)
243 debug_printf("curbe %d.%d: %f %f %f %f\n", i
/8, i
&4,
244 buf
[i
+0], buf
[i
+1], buf
[i
+2], buf
[i
+3]);
246 debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
247 (void *)brw
->curbe
.last_buf
, (void *)buf
,
248 bufsz
, brw
->curbe
.last_bufsz
,
249 brw
->curbe
.last_buf
? memcmp(buf
, brw
->curbe
.last_buf
, bufsz
) : -1);
252 if (brw
->curbe
.curbe_bo
!= NULL
&&
253 brw
->curbe
.last_buf
&&
254 bufsz
== brw
->curbe
.last_bufsz
&&
255 memcmp(buf
, brw
->curbe
.last_buf
, bufsz
) == 0) {
256 /* constants have not changed */
260 /* constants have changed */
261 if (brw
->curbe
.last_buf
)
262 FREE(brw
->curbe
.last_buf
);
264 brw
->curbe
.last_buf
= buf
;
265 brw
->curbe
.last_bufsz
= bufsz
;
267 if (brw
->curbe
.curbe_bo
!= NULL
&&
268 (brw
->curbe
.need_new_bo
||
269 brw
->curbe
.curbe_next_offset
+ bufsz
> brw
->curbe
.curbe_bo
->size
))
271 bo_reference(&brw
->curbe
.curbe_bo
, NULL
);
274 if (brw
->curbe
.curbe_bo
== NULL
) {
275 /* Allocate a single page for CURBE entries for this batchbuffer.
276 * They're generally around 64b.
278 ret
= brw
->sws
->bo_alloc(brw
->sws
,
279 BRW_BUFFER_TYPE_CURBE
,
281 &brw
->curbe
.curbe_bo
);
285 brw
->curbe
.curbe_next_offset
= 0;
288 brw
->curbe
.curbe_offset
= brw
->curbe
.curbe_next_offset
;
289 brw
->curbe
.curbe_next_offset
+= bufsz
;
290 brw
->curbe
.curbe_next_offset
= align(brw
->curbe
.curbe_next_offset
, 64);
292 /* Copy data to the buffer:
294 brw
->sws
->bo_subdata(brw
->curbe
.curbe_bo
,
295 brw
->curbe
.curbe_offset
,
301 brw_add_validated_bo(brw
, brw
->curbe
.curbe_bo
);
303 /* Because this provokes an action (ie copy the constants into the
304 * URB), it shouldn't be shortcircuited if identical to the
305 * previous time - because eg. the urb destination may have
306 * changed, or the urb contents different to last time.
308 * Note that the data referred to is actually copied internally,
309 * not just used in place according to passed pointer.
311 * It appears that the CS unit takes care of using each available
312 * URB entry (Const URB Entry == CURBE) in turn, and issuing
313 * flushes as necessary when doublebuffering of CURBEs isn't
320 static enum pipe_error
emit_curbe_buffer(struct brw_context
*brw
)
322 GLuint sz
= brw
->curbe
.total_size
;
324 BEGIN_BATCH(2, IGNORE_CLIPRECTS
);
326 OUT_BATCH((CMD_CONST_BUFFER
<< 16) | (2 - 2));
329 OUT_BATCH((CMD_CONST_BUFFER
<< 16) | (1 << 8) | (2 - 2));
330 OUT_RELOC(brw
->curbe
.curbe_bo
,
332 (sz
- 1) + brw
->curbe
.curbe_offset
);
338 const struct brw_tracked_state brw_curbe_buffer
= {
340 .mesa
= (PIPE_NEW_FRAGMENT_CONSTANTS
|
341 PIPE_NEW_VERTEX_CONSTANTS
|
343 .brw
= (BRW_NEW_FRAGMENT_PROGRAM
|
344 BRW_NEW_VERTEX_PROGRAM
|
345 BRW_NEW_URB_FENCE
| /* Implicit - hardware requires this, not used above */
346 BRW_NEW_PSP
| /* Implicit - hardware requires this, not used above */
347 BRW_NEW_CURBE_OFFSETS
|
349 .cache
= (CACHE_NEW_WM_PROG
)
351 .prepare
= prepare_curbe_buffer
,
352 .emit
= emit_curbe_buffer
,