i965: Use unreachable() instead of unconditional assert().
[mesa.git] / src / mesa / drivers / dri / i965 / brw_curbe.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33
34 #include "main/glheader.h"
35 #include "main/context.h"
36 #include "main/macros.h"
37 #include "main/enums.h"
38 #include "program/prog_parameter.h"
39 #include "program/prog_print.h"
40 #include "program/prog_statevars.h"
41 #include "intel_batchbuffer.h"
42 #include "brw_context.h"
43 #include "brw_defines.h"
44 #include "brw_state.h"
45 #include "brw_util.h"
46
47
48 /**
49 * Partition the CURBE between the various users of constant values:
50 * Note that vertex and fragment shaders can now fetch constants out
51 * of constant buffers. We no longer allocatea block of the GRF for
52 * constants. That greatly reduces the demand for space in the CURBE.
53 * Some of the comments within are dated...
54 */
55 static void calculate_curbe_offsets( struct brw_context *brw )
56 {
57 struct gl_context *ctx = &brw->ctx;
58 /* CACHE_NEW_WM_PROG */
59 const GLuint nr_fp_regs = (brw->wm.prog_data->base.nr_params + 15) / 16;
60
61 /* BRW_NEW_VERTEX_PROGRAM */
62 const GLuint nr_vp_regs = (brw->vs.prog_data->base.base.nr_params + 15) / 16;
63 GLuint nr_clip_regs = 0;
64 GLuint total_regs;
65
66 /* _NEW_TRANSFORM */
67 if (ctx->Transform.ClipPlanesEnabled) {
68 GLuint nr_planes = 6 + _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled);
69 nr_clip_regs = (nr_planes * 4 + 15) / 16;
70 }
71
72
73 total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
74
75 /* This can happen - what to do? Probably rather than falling
76 * back, the best thing to do is emit programs which code the
77 * constants as immediate values. Could do this either as a static
78 * cap on WM and VS, or adaptively.
79 *
80 * Unfortunately, this is currently dependent on the results of the
81 * program generation process (in the case of wm), so this would
82 * introduce the need to re-generate programs in the event of a
83 * curbe allocation failure.
84 */
85 /* Max size is 32 - just large enough to
86 * hold the 128 parameters allowed by
87 * the fragment and vertex program
88 * api's. It's not clear what happens
89 * when both VP and FP want to use 128
90 * parameters, though.
91 */
92 assert(total_regs <= 32);
93
94 /* Lazy resize:
95 */
96 if (nr_fp_regs > brw->curbe.wm_size ||
97 nr_vp_regs > brw->curbe.vs_size ||
98 nr_clip_regs != brw->curbe.clip_size ||
99 (total_regs < brw->curbe.total_size / 4 &&
100 brw->curbe.total_size > 16)) {
101
102 GLuint reg = 0;
103
104 /* Calculate a new layout:
105 */
106 reg = 0;
107 brw->curbe.wm_start = reg;
108 brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
109 brw->curbe.clip_start = reg;
110 brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
111 brw->curbe.vs_start = reg;
112 brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
113 brw->curbe.total_size = reg;
114
115 if (0)
116 fprintf(stderr, "curbe wm %d+%d clip %d+%d vs %d+%d\n",
117 brw->curbe.wm_start,
118 brw->curbe.wm_size,
119 brw->curbe.clip_start,
120 brw->curbe.clip_size,
121 brw->curbe.vs_start,
122 brw->curbe.vs_size );
123
124 brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
125 }
126 }
127
128
129 const struct brw_tracked_state brw_curbe_offsets = {
130 .dirty = {
131 .mesa = _NEW_TRANSFORM,
132 .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_CONTEXT,
133 .cache = CACHE_NEW_WM_PROG
134 },
135 .emit = calculate_curbe_offsets
136 };
137
138
139
140
141 /* Define the number of curbes within CS's urb allocation. Multiple
142 * urb entries -> multiple curbes. These will be used by
143 * fixed-function hardware in a double-buffering scheme to avoid a
144 * pipeline stall each time the contents of the curbe is changed.
145 */
146 void brw_upload_cs_urb_state(struct brw_context *brw)
147 {
148 BEGIN_BATCH(2);
149 /* It appears that this is the state packet for the CS unit, ie. the
150 * urb entries detailed here are housed in the CS range from the
151 * URB_FENCE command.
152 */
153 OUT_BATCH(CMD_CS_URB_STATE << 16 | (2-2));
154
155 /* BRW_NEW_URB_FENCE */
156 if (brw->urb.csize == 0) {
157 OUT_BATCH(0);
158 } else {
159 /* BRW_NEW_URB_FENCE */
160 assert(brw->urb.nr_cs_entries);
161 OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries);
162 }
163 ADVANCE_BATCH();
164 }
165
166 static GLfloat fixed_plane[6][4] = {
167 { 0, 0, -1, 1 },
168 { 0, 0, 1, 1 },
169 { 0, -1, 0, 1 },
170 { 0, 1, 0, 1 },
171 {-1, 0, 0, 1 },
172 { 1, 0, 0, 1 }
173 };
174
175 /* Upload a new set of constants. Too much variability to go into the
176 * cache mechanism, but maybe would benefit from a comparison against
177 * the current uploaded set of constants.
178 */
179 static void
180 brw_upload_constant_buffer(struct brw_context *brw)
181 {
182 struct gl_context *ctx = &brw->ctx;
183 const GLuint sz = brw->curbe.total_size;
184 const GLuint bufsz = sz * 16 * sizeof(GLfloat);
185 GLfloat *buf;
186 GLuint i;
187 gl_clip_plane *clip_planes;
188
189 if (sz == 0) {
190 brw->curbe.last_bufsz = 0;
191 goto emit;
192 }
193
194 buf = brw->curbe.next_buf;
195
196 /* fragment shader constants */
197 if (brw->curbe.wm_size) {
198 GLuint offset = brw->curbe.wm_start * 16;
199
200 /* copy float constants */
201 for (i = 0; i < brw->wm.prog_data->base.nr_params; i++) {
202 buf[offset + i] = *brw->wm.prog_data->base.param[i];
203 }
204 }
205
206 /* clipper constants */
207 if (brw->curbe.clip_size) {
208 GLuint offset = brw->curbe.clip_start * 16;
209 GLuint j;
210
211 /* If any planes are going this way, send them all this way:
212 */
213 for (i = 0; i < 6; i++) {
214 buf[offset + i * 4 + 0] = fixed_plane[i][0];
215 buf[offset + i * 4 + 1] = fixed_plane[i][1];
216 buf[offset + i * 4 + 2] = fixed_plane[i][2];
217 buf[offset + i * 4 + 3] = fixed_plane[i][3];
218 }
219
220 /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to
221 * clip-space:
222 */
223 clip_planes = brw_select_clip_planes(ctx);
224 for (j = 0; j < MAX_CLIP_PLANES; j++) {
225 if (ctx->Transform.ClipPlanesEnabled & (1<<j)) {
226 buf[offset + i * 4 + 0] = clip_planes[j][0];
227 buf[offset + i * 4 + 1] = clip_planes[j][1];
228 buf[offset + i * 4 + 2] = clip_planes[j][2];
229 buf[offset + i * 4 + 3] = clip_planes[j][3];
230 i++;
231 }
232 }
233 }
234
235 /* vertex shader constants */
236 if (brw->curbe.vs_size) {
237 GLuint offset = brw->curbe.vs_start * 16;
238
239 for (i = 0; i < brw->vs.prog_data->base.base.nr_params; i++) {
240 buf[offset + i] = *brw->vs.prog_data->base.base.param[i];
241 }
242 }
243
244 if (0) {
245 for (i = 0; i < sz*16; i+=4)
246 fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
247 buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
248
249 fprintf(stderr, "last_buf %p buf %p sz %d/%d cmp %d\n",
250 brw->curbe.last_buf, buf,
251 bufsz, brw->curbe.last_bufsz,
252 brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
253 }
254
255 if (brw->curbe.curbe_bo != NULL &&
256 bufsz == brw->curbe.last_bufsz &&
257 memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
258 /* constants have not changed */
259 } else {
260 /* Update the record of what our last set of constants was. We
261 * don't just flip the pointers because we don't fill in the
262 * data in the padding between the entries.
263 */
264 memcpy(brw->curbe.last_buf, buf, bufsz);
265 brw->curbe.last_bufsz = bufsz;
266
267 if (brw->curbe.curbe_bo != NULL &&
268 brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size)
269 {
270 drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo);
271 drm_intel_bo_unreference(brw->curbe.curbe_bo);
272 brw->curbe.curbe_bo = NULL;
273 }
274
275 if (brw->curbe.curbe_bo == NULL) {
276 /* Allocate a single page for CURBE entries for this batchbuffer.
277 * They're generally around 64b.
278 */
279 brw->curbe.curbe_bo = drm_intel_bo_alloc(brw->bufmgr, "CURBE",
280 4096, 1 << 6);
281 brw->curbe.curbe_next_offset = 0;
282 drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo);
283 assert(bufsz < 4096);
284 }
285
286 brw->curbe.curbe_offset = brw->curbe.curbe_next_offset;
287 brw->curbe.curbe_next_offset += bufsz;
288 brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64);
289
290 /* Copy data to the buffer:
291 */
292 memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset,
293 buf,
294 bufsz);
295 }
296
297 /* Because this provokes an action (ie copy the constants into the
298 * URB), it shouldn't be shortcircuited if identical to the
299 * previous time - because eg. the urb destination may have
300 * changed, or the urb contents different to last time.
301 *
302 * Note that the data referred to is actually copied internally,
303 * not just used in place according to passed pointer.
304 *
305 * It appears that the CS unit takes care of using each available
306 * URB entry (Const URB Entry == CURBE) in turn, and issuing
307 * flushes as necessary when doublebuffering of CURBEs isn't
308 * possible.
309 */
310
311 emit:
312 BEGIN_BATCH(2);
313 if (brw->curbe.total_size == 0) {
314 OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2));
315 OUT_BATCH(0);
316 } else {
317 OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2));
318 OUT_RELOC(brw->curbe.curbe_bo,
319 I915_GEM_DOMAIN_INSTRUCTION, 0,
320 (brw->curbe.total_size - 1) + brw->curbe.curbe_offset);
321 }
322 ADVANCE_BATCH();
323 }
324
325 const struct brw_tracked_state brw_constant_buffer = {
326 .dirty = {
327 .mesa = _NEW_PROGRAM_CONSTANTS,
328 .brw = (BRW_NEW_FRAGMENT_PROGRAM |
329 BRW_NEW_VERTEX_PROGRAM |
330 BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
331 BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
332 BRW_NEW_CURBE_OFFSETS |
333 BRW_NEW_BATCH),
334 .cache = (CACHE_NEW_WM_PROG)
335 },
336 .emit = brw_upload_constant_buffer,
337 };
338