i965: Use unreachable() instead of unconditional assert().
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 unreachable("not reached");
77 }
78 }
79
80 uint32_t
81 brw_get_surface_tiling_bits(uint32_t tiling)
82 {
83 switch (tiling) {
84 case I915_TILING_X:
85 return BRW_SURFACE_TILED;
86 case I915_TILING_Y:
87 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
88 default:
89 return 0;
90 }
91 }
92
93
94 uint32_t
95 brw_get_surface_num_multisamples(unsigned num_samples)
96 {
97 if (num_samples > 1)
98 return BRW_SURFACE_MULTISAMPLECOUNT_4;
99 else
100 return BRW_SURFACE_MULTISAMPLECOUNT_1;
101 }
102
103 void
104 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
105 bool is_render_target,
106 unsigned *width, unsigned *height,
107 unsigned *pitch, uint32_t *tiling, unsigned *format)
108 {
109 static const unsigned halign_stencil = 8;
110
111 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
112 * there are half as many rows.
113 * In addition, mip-levels are accessed manually by the program and
114 * therefore the surface is setup to cover all the mip-levels for one slice.
115 * (Hardware is still used to access individual slices).
116 */
117 *tiling = I915_TILING_Y;
118 *pitch = mt->pitch * 2;
119 *width = ALIGN(mt->total_width, halign_stencil) * 2;
120 *height = (mt->total_height / mt->physical_depth0) / 2;
121
122 if (is_render_target) {
123 *format = BRW_SURFACEFORMAT_R8_UINT;
124 }
125 }
126
127
128 /**
129 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
130 * swizzling.
131 */
132 int
133 brw_get_texture_swizzle(const struct gl_context *ctx,
134 const struct gl_texture_object *t)
135 {
136 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
137
138 int swizzles[SWIZZLE_NIL + 1] = {
139 SWIZZLE_X,
140 SWIZZLE_Y,
141 SWIZZLE_Z,
142 SWIZZLE_W,
143 SWIZZLE_ZERO,
144 SWIZZLE_ONE,
145 SWIZZLE_NIL
146 };
147
148 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
149 img->_BaseFormat == GL_DEPTH_STENCIL) {
150 GLenum depth_mode = t->DepthMode;
151
152 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
153 * with depth component data specified with a sized internal format.
154 * Otherwise, it's left at the old default, GL_LUMINANCE.
155 */
156 if (_mesa_is_gles3(ctx) &&
157 img->InternalFormat != GL_DEPTH_COMPONENT &&
158 img->InternalFormat != GL_DEPTH_STENCIL) {
159 depth_mode = GL_RED;
160 }
161
162 switch (depth_mode) {
163 case GL_ALPHA:
164 swizzles[0] = SWIZZLE_ZERO;
165 swizzles[1] = SWIZZLE_ZERO;
166 swizzles[2] = SWIZZLE_ZERO;
167 swizzles[3] = SWIZZLE_X;
168 break;
169 case GL_LUMINANCE:
170 swizzles[0] = SWIZZLE_X;
171 swizzles[1] = SWIZZLE_X;
172 swizzles[2] = SWIZZLE_X;
173 swizzles[3] = SWIZZLE_ONE;
174 break;
175 case GL_INTENSITY:
176 swizzles[0] = SWIZZLE_X;
177 swizzles[1] = SWIZZLE_X;
178 swizzles[2] = SWIZZLE_X;
179 swizzles[3] = SWIZZLE_X;
180 break;
181 case GL_RED:
182 swizzles[0] = SWIZZLE_X;
183 swizzles[1] = SWIZZLE_ZERO;
184 swizzles[2] = SWIZZLE_ZERO;
185 swizzles[3] = SWIZZLE_ONE;
186 break;
187 }
188 }
189
190 /* If the texture's format is alpha-only, force R, G, and B to
191 * 0.0. Similarly, if the texture's format has no alpha channel,
192 * force the alpha value read to 1.0. This allows for the
193 * implementation to use an RGBA texture for any of these formats
194 * without leaking any unexpected values.
195 */
196 switch (img->_BaseFormat) {
197 case GL_ALPHA:
198 swizzles[0] = SWIZZLE_ZERO;
199 swizzles[1] = SWIZZLE_ZERO;
200 swizzles[2] = SWIZZLE_ZERO;
201 break;
202 case GL_RED:
203 case GL_RG:
204 case GL_RGB:
205 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
206 swizzles[3] = SWIZZLE_ONE;
207 break;
208 }
209
210 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
211 swizzles[GET_SWZ(t->_Swizzle, 1)],
212 swizzles[GET_SWZ(t->_Swizzle, 2)],
213 swizzles[GET_SWZ(t->_Swizzle, 3)]);
214 }
215
216 static void
217 gen4_emit_buffer_surface_state(struct brw_context *brw,
218 uint32_t *out_offset,
219 drm_intel_bo *bo,
220 unsigned buffer_offset,
221 unsigned surface_format,
222 unsigned buffer_size,
223 unsigned pitch,
224 unsigned mocs,
225 bool rw)
226 {
227 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
228 6 * 4, 32, out_offset);
229 memset(surf, 0, 6 * 4);
230
231 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
232 surface_format << BRW_SURFACE_FORMAT_SHIFT |
233 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
234 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
235 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
236 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
237 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
238 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
239
240 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
241 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
242 * physical cache. It is mapped in hardware to the sampler cache."
243 */
244 if (bo) {
245 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
246 bo, buffer_offset,
247 I915_GEM_DOMAIN_SAMPLER,
248 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
249 }
250 }
251
252 void
253 brw_update_buffer_texture_surface(struct gl_context *ctx,
254 unsigned unit,
255 uint32_t *surf_offset)
256 {
257 struct brw_context *brw = brw_context(ctx);
258 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
259 struct intel_buffer_object *intel_obj =
260 intel_buffer_object(tObj->BufferObject);
261 uint32_t size = tObj->BufferSize;
262 drm_intel_bo *bo = NULL;
263 mesa_format format = tObj->_BufferObjectFormat;
264 uint32_t brw_format = brw_format_for_mesa_format(format);
265 int texel_size = _mesa_get_format_bytes(format);
266
267 if (intel_obj) {
268 size = MIN2(size, intel_obj->Base.Size);
269 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
270 }
271
272 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
273 _mesa_problem(NULL, "bad format %s for texture buffer\n",
274 _mesa_get_format_name(format));
275 }
276
277 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
278 tObj->BufferOffset,
279 brw_format,
280 size / texel_size,
281 texel_size,
282 0, /* mocs */
283 false /* rw */);
284 }
285
286 static void
287 brw_update_texture_surface(struct gl_context *ctx,
288 unsigned unit,
289 uint32_t *surf_offset,
290 bool for_gather)
291 {
292 struct brw_context *brw = brw_context(ctx);
293 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
294 struct intel_texture_object *intelObj = intel_texture_object(tObj);
295 struct intel_mipmap_tree *mt = intelObj->mt;
296 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
297 uint32_t *surf;
298
299 /* BRW_NEW_UNIFORM_BUFFER */
300 if (tObj->Target == GL_TEXTURE_BUFFER) {
301 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
302 return;
303 }
304
305 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
306 6 * 4, 32, surf_offset);
307
308 uint32_t tex_format = translate_tex_format(brw, mt->format,
309 sampler->sRGBDecode);
310
311 if (for_gather) {
312 /* Sandybridge's gather4 message is broken for integer formats.
313 * To work around this, we pretend the surface is UNORM for
314 * 8 or 16-bit formats, and emit shader instructions to recover
315 * the real INT/UINT value. For 32-bit formats, we pretend
316 * the surface is FLOAT, and simply reinterpret the resulting
317 * bits.
318 */
319 switch (tex_format) {
320 case BRW_SURFACEFORMAT_R8_SINT:
321 case BRW_SURFACEFORMAT_R8_UINT:
322 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
323 break;
324
325 case BRW_SURFACEFORMAT_R16_SINT:
326 case BRW_SURFACEFORMAT_R16_UINT:
327 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
328 break;
329
330 case BRW_SURFACEFORMAT_R32_SINT:
331 case BRW_SURFACEFORMAT_R32_UINT:
332 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
333 break;
334
335 default:
336 break;
337 }
338 }
339
340 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
341 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
342 BRW_SURFACE_CUBEFACE_ENABLES |
343 tex_format << BRW_SURFACE_FORMAT_SHIFT);
344
345 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
346
347 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
348 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
349 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
350
351 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
352 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
353 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
354
355 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
356 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
357
358 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
359
360 /* Emit relocation to surface contents */
361 drm_intel_bo_emit_reloc(brw->batch.bo,
362 *surf_offset + 4,
363 mt->bo,
364 surf[1] - mt->bo->offset64,
365 I915_GEM_DOMAIN_SAMPLER, 0);
366 }
367
368 /**
369 * Create the constant buffer surface. Vertex/fragment shader constants will be
370 * read from this buffer with Data Port Read instructions/messages.
371 */
372 void
373 brw_create_constant_surface(struct brw_context *brw,
374 drm_intel_bo *bo,
375 uint32_t offset,
376 uint32_t size,
377 uint32_t *out_offset,
378 bool dword_pitch)
379 {
380 uint32_t stride = dword_pitch ? 4 : 16;
381 uint32_t elements = ALIGN(size, stride) / stride;
382
383 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
384 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
385 elements, stride, 0, false);
386 }
387
388 /**
389 * Set up a binding table entry for use by stream output logic (transform
390 * feedback).
391 *
392 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
393 */
394 void
395 brw_update_sol_surface(struct brw_context *brw,
396 struct gl_buffer_object *buffer_obj,
397 uint32_t *out_offset, unsigned num_vector_components,
398 unsigned stride_dwords, unsigned offset_dwords)
399 {
400 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
401 uint32_t offset_bytes = 4 * offset_dwords;
402 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
403 offset_bytes,
404 buffer_obj->Size - offset_bytes);
405 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
406 out_offset);
407 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
408 size_t size_dwords = buffer_obj->Size / 4;
409 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
410
411 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
412 * too big to map using a single binding table entry?
413 */
414 assert((size_dwords - offset_dwords) / stride_dwords
415 <= BRW_MAX_NUM_BUFFER_ENTRIES);
416
417 if (size_dwords > offset_dwords + num_vector_components) {
418 /* There is room for at least 1 transform feedback output in the buffer.
419 * Compute the number of additional transform feedback outputs the
420 * buffer has room for.
421 */
422 buffer_size_minus_1 =
423 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
424 } else {
425 /* There isn't even room for a single transform feedback output in the
426 * buffer. We can't configure the binding table entry to prevent output
427 * entirely; we'll have to rely on the geometry shader to detect
428 * overflow. But to minimize the damage in case of a bug, set up the
429 * binding table entry to just allow a single output.
430 */
431 buffer_size_minus_1 = 0;
432 }
433 width = buffer_size_minus_1 & 0x7f;
434 height = (buffer_size_minus_1 & 0xfff80) >> 7;
435 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
436
437 switch (num_vector_components) {
438 case 1:
439 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
440 break;
441 case 2:
442 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
443 break;
444 case 3:
445 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
446 break;
447 case 4:
448 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
449 break;
450 default:
451 unreachable("Invalid vector size for transform feedback output");
452 }
453
454 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
455 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
456 surface_format << BRW_SURFACE_FORMAT_SHIFT |
457 BRW_SURFACE_RC_READ_WRITE;
458 surf[1] = bo->offset64 + offset_bytes; /* reloc */
459 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
460 height << BRW_SURFACE_HEIGHT_SHIFT);
461 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
462 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
463 surf[4] = 0;
464 surf[5] = 0;
465
466 /* Emit relocation to surface contents. */
467 drm_intel_bo_emit_reloc(brw->batch.bo,
468 *out_offset + 4,
469 bo, offset_bytes,
470 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
471 }
472
473 /* Creates a new WM constant buffer reflecting the current fragment program's
474 * constants, if needed by the fragment program.
475 *
476 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
477 * state atom.
478 */
479 static void
480 brw_upload_wm_pull_constants(struct brw_context *brw)
481 {
482 struct gl_context *ctx = &brw->ctx;
483 /* BRW_NEW_FRAGMENT_PROGRAM */
484 struct brw_fragment_program *fp =
485 (struct brw_fragment_program *) brw->fragment_program;
486 struct gl_program_parameter_list *params = fp->program.Base.Parameters;
487 const int size = brw->wm.prog_data->base.nr_pull_params * sizeof(float);
488 const int surf_index =
489 brw->wm.prog_data->base.binding_table.pull_constants_start;
490 unsigned int i;
491
492 _mesa_load_state_parameters(ctx, params);
493
494 /* CACHE_NEW_WM_PROG */
495 if (brw->wm.prog_data->base.nr_pull_params == 0) {
496 if (brw->wm.base.surf_offset[surf_index]) {
497 brw->wm.base.surf_offset[surf_index] = 0;
498 brw->state.dirty.brw |= BRW_NEW_SURFACES;
499 }
500 return;
501 }
502
503 /* _NEW_PROGRAM_CONSTANTS */
504 drm_intel_bo *const_bo = NULL;
505 uint32_t const_offset;
506 float *constants = intel_upload_space(brw, size, 64,
507 &const_bo, &const_offset);
508 for (i = 0; i < brw->wm.prog_data->base.nr_pull_params; i++) {
509 constants[i] = *brw->wm.prog_data->base.pull_param[i];
510 }
511
512 brw_create_constant_surface(brw, const_bo, const_offset, size,
513 &brw->wm.base.surf_offset[surf_index],
514 true);
515 drm_intel_bo_unreference(const_bo);
516
517 brw->state.dirty.brw |= BRW_NEW_SURFACES;
518 }
519
520 const struct brw_tracked_state brw_wm_pull_constants = {
521 .dirty = {
522 .mesa = (_NEW_PROGRAM_CONSTANTS),
523 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
524 .cache = CACHE_NEW_WM_PROG,
525 },
526 .emit = brw_upload_wm_pull_constants,
527 };
528
529 static void
530 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
531 {
532 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
533 * Notes):
534 *
535 * A null surface will be used in instances where an actual surface is
536 * not bound. When a write message is generated to a null surface, no
537 * actual surface is written to. When a read message (including any
538 * sampling engine message) is generated to a null surface, the result
539 * is all zeros. Note that a null surface type is allowed to be used
540 * with all messages, even if it is not specificially indicated as
541 * supported. All of the remaining fields in surface state are ignored
542 * for null surfaces, with the following exceptions:
543 *
544 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
545 * depth buffer’s corresponding state for all render target surfaces,
546 * including null.
547 *
548 * - Surface Format must be R8G8B8A8_UNORM.
549 */
550 struct gl_context *ctx = &brw->ctx;
551 uint32_t *surf;
552 unsigned surface_type = BRW_SURFACE_NULL;
553 drm_intel_bo *bo = NULL;
554 unsigned pitch_minus_1 = 0;
555 uint32_t multisampling_state = 0;
556 uint32_t surf_index =
557 brw->wm.prog_data->binding_table.render_target_start + unit;
558
559 /* _NEW_BUFFERS */
560 const struct gl_framebuffer *fb = ctx->DrawBuffer;
561
562 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
563 &brw->wm.base.surf_offset[surf_index]);
564
565 if (fb->Visual.samples > 1) {
566 /* On Gen6, null render targets seem to cause GPU hangs when
567 * multisampling. So work around this problem by rendering into dummy
568 * color buffer.
569 *
570 * To decrease the amount of memory needed by the workaround buffer, we
571 * set its pitch to 128 bytes (the width of a Y tile). This means that
572 * the amount of memory needed for the workaround buffer is
573 * (width_in_tiles + height_in_tiles - 1) tiles.
574 *
575 * Note that since the workaround buffer will be interpreted by the
576 * hardware as an interleaved multisampled buffer, we need to compute
577 * width_in_tiles and height_in_tiles by dividing the width and height
578 * by 16 rather than the normal Y-tile size of 32.
579 */
580 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
581 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
582 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
583 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
584 size_needed);
585 bo = brw->wm.multisampled_null_render_target_bo;
586 surface_type = BRW_SURFACE_2D;
587 pitch_minus_1 = 127;
588 multisampling_state =
589 brw_get_surface_num_multisamples(fb->Visual.samples);
590 }
591
592 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
593 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
594 if (brw->gen < 6) {
595 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
596 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
597 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
598 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
599 }
600 surf[1] = bo ? bo->offset64 : 0;
601 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
602 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
603
604 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
605 * Notes):
606 *
607 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
608 */
609 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
610 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
611 surf[4] = multisampling_state;
612 surf[5] = 0;
613
614 if (bo) {
615 drm_intel_bo_emit_reloc(brw->batch.bo,
616 brw->wm.base.surf_offset[surf_index] + 4,
617 bo, 0,
618 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
619 }
620 }
621
622 /**
623 * Sets up a surface state structure to point at the given region.
624 * While it is only used for the front/back buffer currently, it should be
625 * usable for further buffers when doing ARB_draw_buffer support.
626 */
627 static void
628 brw_update_renderbuffer_surface(struct brw_context *brw,
629 struct gl_renderbuffer *rb,
630 bool layered,
631 unsigned int unit)
632 {
633 struct gl_context *ctx = &brw->ctx;
634 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
635 struct intel_mipmap_tree *mt = irb->mt;
636 uint32_t *surf;
637 uint32_t tile_x, tile_y;
638 uint32_t format = 0;
639 /* _NEW_BUFFERS */
640 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
641 uint32_t surf_index =
642 brw->wm.prog_data->binding_table.render_target_start + unit;
643
644 assert(!layered);
645
646 if (rb->TexImage && !brw->has_surface_tile_offset) {
647 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
648
649 if (tile_x != 0 || tile_y != 0) {
650 /* Original gen4 hardware couldn't draw to a non-tile-aligned
651 * destination in a miptree unless you actually setup your renderbuffer
652 * as a miptree and used the fragile lod/array_index/etc. controls to
653 * select the image. So, instead, we just make a new single-level
654 * miptree and render into that.
655 */
656 intel_renderbuffer_move_to_temp(brw, irb, false);
657 mt = irb->mt;
658 }
659 }
660
661 intel_miptree_used_for_rendering(irb->mt);
662
663 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
664 &brw->wm.base.surf_offset[surf_index]);
665
666 format = brw->render_target_format[rb_format];
667 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
668 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
669 __FUNCTION__, _mesa_get_format_name(rb_format));
670 }
671
672 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
673 format << BRW_SURFACE_FORMAT_SHIFT);
674
675 /* reloc */
676 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
677 mt->bo->offset64);
678
679 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
680 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
681
682 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
683 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
684
685 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
686
687 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
688 /* Note that the low bits of these fields are missing, so
689 * there's the possibility of getting in trouble.
690 */
691 assert(tile_x % 4 == 0);
692 assert(tile_y % 2 == 0);
693 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
694 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
695 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
696
697 if (brw->gen < 6) {
698 /* _NEW_COLOR */
699 if (!ctx->Color.ColorLogicOpEnabled &&
700 (ctx->Color.BlendEnabled & (1 << unit)))
701 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
702
703 if (!ctx->Color.ColorMask[unit][0])
704 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
705 if (!ctx->Color.ColorMask[unit][1])
706 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
707 if (!ctx->Color.ColorMask[unit][2])
708 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
709
710 /* As mentioned above, disable writes to the alpha component when the
711 * renderbuffer is XRGB.
712 */
713 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
714 !ctx->Color.ColorMask[unit][3]) {
715 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
716 }
717 }
718
719 drm_intel_bo_emit_reloc(brw->batch.bo,
720 brw->wm.base.surf_offset[surf_index] + 4,
721 mt->bo,
722 surf[1] - mt->bo->offset64,
723 I915_GEM_DOMAIN_RENDER,
724 I915_GEM_DOMAIN_RENDER);
725 }
726
727 /**
728 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
729 */
730 static void
731 brw_update_renderbuffer_surfaces(struct brw_context *brw)
732 {
733 struct gl_context *ctx = &brw->ctx;
734 GLuint i;
735
736 /* _NEW_BUFFERS | _NEW_COLOR */
737 /* Update surfaces for drawing buffers */
738 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
739 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
740 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
741 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
742 ctx->DrawBuffer->MaxNumLayers > 0, i);
743 } else {
744 brw->vtbl.update_null_renderbuffer_surface(brw, i);
745 }
746 }
747 } else {
748 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
749 }
750 brw->state.dirty.brw |= BRW_NEW_SURFACES;
751 }
752
753 const struct brw_tracked_state brw_renderbuffer_surfaces = {
754 .dirty = {
755 .mesa = (_NEW_COLOR |
756 _NEW_BUFFERS),
757 .brw = BRW_NEW_BATCH,
758 .cache = 0
759 },
760 .emit = brw_update_renderbuffer_surfaces,
761 };
762
763 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
764 .dirty = {
765 .mesa = _NEW_BUFFERS,
766 .brw = BRW_NEW_BATCH,
767 .cache = 0
768 },
769 .emit = brw_update_renderbuffer_surfaces,
770 };
771
772
773 static void
774 update_stage_texture_surfaces(struct brw_context *brw,
775 const struct gl_program *prog,
776 struct brw_stage_state *stage_state,
777 bool for_gather)
778 {
779 if (!prog)
780 return;
781
782 struct gl_context *ctx = &brw->ctx;
783
784 uint32_t *surf_offset = stage_state->surf_offset;
785 if (for_gather)
786 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
787 else
788 surf_offset += stage_state->prog_data->binding_table.texture_start;
789
790 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
791 for (unsigned s = 0; s < num_samplers; s++) {
792 surf_offset[s] = 0;
793
794 if (prog->SamplersUsed & (1 << s)) {
795 const unsigned unit = prog->SamplerUnits[s];
796
797 /* _NEW_TEXTURE */
798 if (ctx->Texture.Unit[unit]._Current) {
799 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
800 }
801 }
802 }
803 }
804
805
806 /**
807 * Construct SURFACE_STATE objects for enabled textures.
808 */
809 static void
810 brw_update_texture_surfaces(struct brw_context *brw)
811 {
812 /* BRW_NEW_VERTEX_PROGRAM */
813 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
814
815 /* BRW_NEW_GEOMETRY_PROGRAM */
816 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
817
818 /* BRW_NEW_FRAGMENT_PROGRAM */
819 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
820
821 /* _NEW_TEXTURE */
822 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
823 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
824 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
825
826 /* emit alternate set of surface state for gather. this
827 * allows the surface format to be overriden for only the
828 * gather4 messages. */
829 if (brw->gen < 8) {
830 if (vs && vs->UsesGather)
831 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
832 if (gs && gs->UsesGather)
833 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
834 if (fs && fs->UsesGather)
835 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
836 }
837
838 brw->state.dirty.brw |= BRW_NEW_SURFACES;
839 }
840
841 const struct brw_tracked_state brw_texture_surfaces = {
842 .dirty = {
843 .mesa = _NEW_TEXTURE,
844 .brw = BRW_NEW_BATCH |
845 BRW_NEW_UNIFORM_BUFFER |
846 BRW_NEW_VERTEX_PROGRAM |
847 BRW_NEW_GEOMETRY_PROGRAM |
848 BRW_NEW_FRAGMENT_PROGRAM,
849 .cache = 0
850 },
851 .emit = brw_update_texture_surfaces,
852 };
853
854 void
855 brw_upload_ubo_surfaces(struct brw_context *brw,
856 struct gl_shader *shader,
857 struct brw_stage_state *stage_state,
858 struct brw_stage_prog_data *prog_data)
859 {
860 struct gl_context *ctx = &brw->ctx;
861
862 if (!shader)
863 return;
864
865 uint32_t *surf_offsets =
866 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
867
868 for (int i = 0; i < shader->NumUniformBlocks; i++) {
869 struct gl_uniform_buffer_binding *binding;
870 struct intel_buffer_object *intel_bo;
871
872 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
873 intel_bo = intel_buffer_object(binding->BufferObject);
874 drm_intel_bo *bo =
875 intel_bufferobj_buffer(brw, intel_bo,
876 binding->Offset,
877 binding->BufferObject->Size - binding->Offset);
878
879 /* Because behavior for referencing outside of the binding's size in the
880 * glBindBufferRange case is undefined, we can just bind the whole buffer
881 * glBindBufferBase wants and be a correct implementation.
882 */
883 brw_create_constant_surface(brw, bo, binding->Offset,
884 bo->size - binding->Offset,
885 &surf_offsets[i],
886 shader->Stage == MESA_SHADER_FRAGMENT);
887 }
888
889 if (shader->NumUniformBlocks)
890 brw->state.dirty.brw |= BRW_NEW_SURFACES;
891 }
892
893 static void
894 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
895 {
896 struct gl_context *ctx = &brw->ctx;
897 /* _NEW_PROGRAM */
898 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
899
900 if (!prog)
901 return;
902
903 /* CACHE_NEW_WM_PROG */
904 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
905 &brw->wm.base, &brw->wm.prog_data->base);
906 }
907
908 const struct brw_tracked_state brw_wm_ubo_surfaces = {
909 .dirty = {
910 .mesa = _NEW_PROGRAM,
911 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
912 .cache = CACHE_NEW_WM_PROG,
913 },
914 .emit = brw_upload_wm_ubo_surfaces,
915 };
916
917 void
918 brw_upload_abo_surfaces(struct brw_context *brw,
919 struct gl_shader_program *prog,
920 struct brw_stage_state *stage_state,
921 struct brw_stage_prog_data *prog_data)
922 {
923 struct gl_context *ctx = &brw->ctx;
924 uint32_t *surf_offsets =
925 &stage_state->surf_offset[prog_data->binding_table.abo_start];
926
927 for (int i = 0; i < prog->NumAtomicBuffers; i++) {
928 struct gl_atomic_buffer_binding *binding =
929 &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
930 struct intel_buffer_object *intel_bo =
931 intel_buffer_object(binding->BufferObject);
932 drm_intel_bo *bo = intel_bufferobj_buffer(
933 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
934
935 brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
936 bo->size - binding->Offset,
937 &surf_offsets[i], true);
938 }
939
940 if (prog->NumUniformBlocks)
941 brw->state.dirty.brw |= BRW_NEW_SURFACES;
942 }
943
944 static void
945 brw_upload_wm_abo_surfaces(struct brw_context *brw)
946 {
947 struct gl_context *ctx = &brw->ctx;
948 /* _NEW_PROGRAM */
949 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
950
951 if (prog) {
952 /* CACHE_NEW_WM_PROG */
953 brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
954 &brw->wm.prog_data->base);
955 }
956 }
957
958 const struct brw_tracked_state brw_wm_abo_surfaces = {
959 .dirty = {
960 .mesa = _NEW_PROGRAM,
961 .brw = BRW_NEW_BATCH | BRW_NEW_ATOMIC_BUFFER,
962 .cache = CACHE_NEW_WM_PROG,
963 },
964 .emit = brw_upload_wm_abo_surfaces,
965 };
966
967 void
968 gen4_init_vtable_surface_functions(struct brw_context *brw)
969 {
970 brw->vtbl.update_texture_surface = brw_update_texture_surface;
971 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
972 brw->vtbl.update_null_renderbuffer_surface =
973 brw_update_null_renderbuffer_surface;
974 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
975 }