i965g: add lots of error checks and early returns
[mesa.git] / src / gallium / drivers / i965 / brw_curbe.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32 #include "util/u_memory.h"
33 #include "util/u_math.h"
34
35 #include "brw_batchbuffer.h"
36 #include "brw_context.h"
37 #include "brw_defines.h"
38 #include "brw_state.h"
39 #include "brw_util.h"
40 #include "brw_debug.h"
41 #include "brw_screen.h"
42
43
44 /**
45 * Partition the CURBE between the various users of constant values:
46 * Note that vertex and fragment shaders can now fetch constants out
47 * of constant buffers. We no longer allocatea block of the GRF for
48 * constants. That greatly reduces the demand for space in the CURBE.
49 * Some of the comments within are dated...
50 */
51 static int calculate_curbe_offsets( struct brw_context *brw )
52 {
53 /* CACHE_NEW_WM_PROG */
54 const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16;
55
56 /* BRW_NEW_VERTEX_PROGRAM */
57 const GLuint nr_vp_regs = (brw->vs.prog_data->nr_params + 15) / 16;
58 GLuint nr_clip_regs = 0;
59 GLuint total_regs;
60
61 /* PIPE_NEW_CLIP */
62 if (brw->curr.ucp.nr) {
63 GLuint nr_planes = 6 + brw->curr.ucp.nr;
64 nr_clip_regs = (nr_planes * 4 + 15) / 16;
65 }
66
67
68 total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
69
70 /* When this is > 32, want to use a true constant buffer to hold
71 * the extra constants.
72 */
73 assert(total_regs <= 32);
74
75 /* Lazy resize:
76 */
77 if (nr_fp_regs > brw->curbe.wm_size ||
78 nr_vp_regs > brw->curbe.vs_size ||
79 nr_clip_regs != brw->curbe.clip_size ||
80 (total_regs < brw->curbe.total_size / 4 &&
81 brw->curbe.total_size > 16)) {
82
83 GLuint reg = 0;
84
85 /* Calculate a new layout:
86 */
87 reg = 0;
88 brw->curbe.wm_start = reg;
89 brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
90 brw->curbe.clip_start = reg;
91 brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
92 brw->curbe.vs_start = reg;
93 brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
94 brw->curbe.total_size = reg;
95
96 if (BRW_DEBUG & DEBUG_CURBE)
97 debug_printf("curbe wm %d+%d clip %d+%d vs %d+%d\n",
98 brw->curbe.wm_start,
99 brw->curbe.wm_size,
100 brw->curbe.clip_start,
101 brw->curbe.clip_size,
102 brw->curbe.vs_start,
103 brw->curbe.vs_size );
104
105 brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
106 }
107
108 return 0;
109 }
110
111
112 const struct brw_tracked_state brw_curbe_offsets = {
113 .dirty = {
114 .mesa = PIPE_NEW_CLIP,
115 .brw = BRW_NEW_VERTEX_PROGRAM,
116 .cache = CACHE_NEW_WM_PROG
117 },
118 .prepare = calculate_curbe_offsets
119 };
120
121
122
123
124 /* Define the number of curbes within CS's urb allocation. Multiple
125 * urb entries -> multiple curbes. These will be used by
126 * fixed-function hardware in a double-buffering scheme to avoid a
127 * pipeline stall each time the contents of the curbe is changed.
128 */
129 int brw_upload_cs_urb_state(struct brw_context *brw)
130 {
131 struct brw_cs_urb_state cs_urb;
132 memset(&cs_urb, 0, sizeof(cs_urb));
133
134 /* It appears that this is the state packet for the CS unit, ie. the
135 * urb entries detailed here are housed in the CS range from the
136 * URB_FENCE command.
137 */
138 cs_urb.header.opcode = CMD_CS_URB_STATE;
139 cs_urb.header.length = sizeof(cs_urb)/4 - 2;
140
141 /* BRW_NEW_URB_FENCE */
142 cs_urb.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
143 cs_urb.bits0.urb_entry_size = brw->urb.csize - 1;
144
145 assert(brw->urb.nr_cs_entries);
146 BRW_CACHED_BATCH_STRUCT(brw, &cs_urb);
147 return 0;
148 }
149
150 static GLfloat fixed_plane[6][4] = {
151 { 0, 0, -1, 1 },
152 { 0, 0, 1, 1 },
153 { 0, -1, 0, 1 },
154 { 0, 1, 0, 1 },
155 {-1, 0, 0, 1 },
156 { 1, 0, 0, 1 }
157 };
158
159 /* Upload a new set of constants. Too much variability to go into the
160 * cache mechanism, but maybe would benefit from a comparison against
161 * the current uploaded set of constants.
162 */
163 static enum pipe_error prepare_curbe_buffer(struct brw_context *brw)
164 {
165 const GLuint sz = brw->curbe.total_size;
166 const GLuint bufsz = sz * 16 * sizeof(GLfloat);
167 enum pipe_error ret;
168 GLfloat *buf;
169 GLuint i;
170
171 if (sz == 0) {
172 if (brw->curbe.last_buf) {
173 free(brw->curbe.last_buf);
174 brw->curbe.last_buf = NULL;
175 brw->curbe.last_bufsz = 0;
176 }
177 return 0;
178 }
179
180 buf = (GLfloat *) CALLOC(bufsz, 1);
181
182 /* fragment shader constants */
183 if (brw->curbe.wm_size) {
184 GLuint offset = brw->curbe.wm_start * 16;
185
186 /* map fs constant buffer */
187
188 /* copy float constants */
189 for (i = 0; i < brw->wm.prog_data->nr_params; i++)
190 buf[offset + i] = *brw->wm.prog_data->param[i];
191
192 /* unmap fs constant buffer */
193 }
194
195
196 /* The clipplanes are actually delivered to both CLIP and VS units.
197 * VS uses them to calculate the outcode bitmasks.
198 */
199 if (brw->curbe.clip_size) {
200 GLuint offset = brw->curbe.clip_start * 16;
201 GLuint j;
202
203 /* If any planes are going this way, send them all this way:
204 */
205 for (i = 0; i < 6; i++) {
206 buf[offset + i * 4 + 0] = fixed_plane[i][0];
207 buf[offset + i * 4 + 1] = fixed_plane[i][1];
208 buf[offset + i * 4 + 2] = fixed_plane[i][2];
209 buf[offset + i * 4 + 3] = fixed_plane[i][3];
210 }
211
212 /* Clip planes:
213 */
214 assert(brw->curr.ucp.nr <= 6);
215 for (j = 0; j < brw->curr.ucp.nr; j++) {
216 buf[offset + i * 4 + 0] = brw->curr.ucp.ucp[j][0];
217 buf[offset + i * 4 + 1] = brw->curr.ucp.ucp[j][1];
218 buf[offset + i * 4 + 2] = brw->curr.ucp.ucp[j][2];
219 buf[offset + i * 4 + 3] = brw->curr.ucp.ucp[j][3];
220 i++;
221 }
222 }
223
224 /* vertex shader constants */
225 if (brw->curbe.vs_size) {
226 GLuint offset = brw->curbe.vs_start * 16;
227 GLuint nr = brw->curr.vertex_shader->info.file_max[TGSI_FILE_CONSTANT];
228 struct pipe_screen *screen = brw->base.screen;
229
230 const GLfloat *value = screen->buffer_map( screen,
231 brw->curr.vertex_constants,
232 PIPE_BUFFER_USAGE_CPU_READ);
233
234 /* XXX: what if user's constant buffer is too small?
235 */
236 memcpy(&buf[offset], value, nr * 4 * sizeof(float));
237
238 screen->buffer_unmap( screen, brw->curr.vertex_constants );
239 }
240
241 if (BRW_DEBUG & DEBUG_CURBE) {
242 for (i = 0; i < sz*16; i+=4)
243 debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
244 buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
245
246 debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
247 (void *)brw->curbe.last_buf, (void *)buf,
248 bufsz, brw->curbe.last_bufsz,
249 brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
250 }
251
252 if (brw->curbe.curbe_bo != NULL &&
253 brw->curbe.last_buf &&
254 bufsz == brw->curbe.last_bufsz &&
255 memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
256 /* constants have not changed */
257 FREE(buf);
258 }
259 else {
260 /* constants have changed */
261 if (brw->curbe.last_buf)
262 FREE(brw->curbe.last_buf);
263
264 brw->curbe.last_buf = buf;
265 brw->curbe.last_bufsz = bufsz;
266
267 if (brw->curbe.curbe_bo != NULL &&
268 (brw->curbe.need_new_bo ||
269 brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size))
270 {
271 bo_reference(&brw->curbe.curbe_bo, NULL);
272 }
273
274 if (brw->curbe.curbe_bo == NULL) {
275 /* Allocate a single page for CURBE entries for this batchbuffer.
276 * They're generally around 64b.
277 */
278 ret = brw->sws->bo_alloc(brw->sws,
279 BRW_BUFFER_TYPE_CURBE,
280 4096, 1 << 6,
281 &brw->curbe.curbe_bo);
282 if (ret)
283 return ret;
284
285 brw->curbe.curbe_next_offset = 0;
286 }
287
288 brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
289 brw->curbe.curbe_next_offset += bufsz;
290 brw->curbe.curbe_next_offset = align(brw->curbe.curbe_next_offset, 64);
291
292 /* Copy data to the buffer:
293 */
294 brw->sws->bo_subdata(brw->curbe.curbe_bo,
295 brw->curbe.curbe_offset,
296 BRW_DATA_OTHER,
297 bufsz,
298 buf);
299 }
300
301 brw_add_validated_bo(brw, brw->curbe.curbe_bo);
302
303 /* Because this provokes an action (ie copy the constants into the
304 * URB), it shouldn't be shortcircuited if identical to the
305 * previous time - because eg. the urb destination may have
306 * changed, or the urb contents different to last time.
307 *
308 * Note that the data referred to is actually copied internally,
309 * not just used in place according to passed pointer.
310 *
311 * It appears that the CS unit takes care of using each available
312 * URB entry (Const URB Entry == CURBE) in turn, and issuing
313 * flushes as necessary when doublebuffering of CURBEs isn't
314 * possible.
315 */
316
317 return 0;
318 }
319
320 static enum pipe_error emit_curbe_buffer(struct brw_context *brw)
321 {
322 GLuint sz = brw->curbe.total_size;
323
324 BEGIN_BATCH(2, IGNORE_CLIPRECTS);
325 if (sz == 0) {
326 OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
327 OUT_BATCH(0);
328 } else {
329 OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
330 OUT_RELOC(brw->curbe.curbe_bo,
331 BRW_USAGE_STATE,
332 (sz - 1) + brw->curbe.curbe_offset);
333 }
334 ADVANCE_BATCH();
335 return 0;
336 }
337
338 const struct brw_tracked_state brw_curbe_buffer = {
339 .dirty = {
340 .mesa = (PIPE_NEW_FRAGMENT_CONSTANTS |
341 PIPE_NEW_VERTEX_CONSTANTS |
342 PIPE_NEW_CLIP),
343 .brw = (BRW_NEW_FRAGMENT_PROGRAM |
344 BRW_NEW_VERTEX_PROGRAM |
345 BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
346 BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
347 BRW_NEW_CURBE_OFFSETS |
348 BRW_NEW_BATCH),
349 .cache = (CACHE_NEW_WM_PROG)
350 },
351 .prepare = prepare_curbe_buffer,
352 .emit = emit_curbe_buffer,
353 };
354