i965: Move up duplicated fields from stage-specific prog_data to brw_stage_prog_data.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 assert(0);
77 return 0;
78 }
79 }
80
81 uint32_t
82 brw_get_surface_tiling_bits(uint32_t tiling)
83 {
84 switch (tiling) {
85 case I915_TILING_X:
86 return BRW_SURFACE_TILED;
87 case I915_TILING_Y:
88 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89 default:
90 return 0;
91 }
92 }
93
94
95 uint32_t
96 brw_get_surface_num_multisamples(unsigned num_samples)
97 {
98 if (num_samples > 1)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4;
100 else
101 return BRW_SURFACE_MULTISAMPLECOUNT_1;
102 }
103
104
105 /**
106 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
107 * swizzling.
108 */
109 int
110 brw_get_texture_swizzle(const struct gl_context *ctx,
111 const struct gl_texture_object *t)
112 {
113 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
114
115 int swizzles[SWIZZLE_NIL + 1] = {
116 SWIZZLE_X,
117 SWIZZLE_Y,
118 SWIZZLE_Z,
119 SWIZZLE_W,
120 SWIZZLE_ZERO,
121 SWIZZLE_ONE,
122 SWIZZLE_NIL
123 };
124
125 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
126 img->_BaseFormat == GL_DEPTH_STENCIL) {
127 GLenum depth_mode = t->DepthMode;
128
129 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
130 * with depth component data specified with a sized internal format.
131 * Otherwise, it's left at the old default, GL_LUMINANCE.
132 */
133 if (_mesa_is_gles3(ctx) &&
134 img->InternalFormat != GL_DEPTH_COMPONENT &&
135 img->InternalFormat != GL_DEPTH_STENCIL) {
136 depth_mode = GL_RED;
137 }
138
139 switch (depth_mode) {
140 case GL_ALPHA:
141 swizzles[0] = SWIZZLE_ZERO;
142 swizzles[1] = SWIZZLE_ZERO;
143 swizzles[2] = SWIZZLE_ZERO;
144 swizzles[3] = SWIZZLE_X;
145 break;
146 case GL_LUMINANCE:
147 swizzles[0] = SWIZZLE_X;
148 swizzles[1] = SWIZZLE_X;
149 swizzles[2] = SWIZZLE_X;
150 swizzles[3] = SWIZZLE_ONE;
151 break;
152 case GL_INTENSITY:
153 swizzles[0] = SWIZZLE_X;
154 swizzles[1] = SWIZZLE_X;
155 swizzles[2] = SWIZZLE_X;
156 swizzles[3] = SWIZZLE_X;
157 break;
158 case GL_RED:
159 swizzles[0] = SWIZZLE_X;
160 swizzles[1] = SWIZZLE_ZERO;
161 swizzles[2] = SWIZZLE_ZERO;
162 swizzles[3] = SWIZZLE_ONE;
163 break;
164 }
165 }
166
167 /* If the texture's format is alpha-only, force R, G, and B to
168 * 0.0. Similarly, if the texture's format has no alpha channel,
169 * force the alpha value read to 1.0. This allows for the
170 * implementation to use an RGBA texture for any of these formats
171 * without leaking any unexpected values.
172 */
173 switch (img->_BaseFormat) {
174 case GL_ALPHA:
175 swizzles[0] = SWIZZLE_ZERO;
176 swizzles[1] = SWIZZLE_ZERO;
177 swizzles[2] = SWIZZLE_ZERO;
178 break;
179 case GL_RED:
180 case GL_RG:
181 case GL_RGB:
182 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
183 swizzles[3] = SWIZZLE_ONE;
184 break;
185 }
186
187 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
188 swizzles[GET_SWZ(t->_Swizzle, 1)],
189 swizzles[GET_SWZ(t->_Swizzle, 2)],
190 swizzles[GET_SWZ(t->_Swizzle, 3)]);
191 }
192
193 static void
194 gen4_emit_buffer_surface_state(struct brw_context *brw,
195 uint32_t *out_offset,
196 drm_intel_bo *bo,
197 unsigned buffer_offset,
198 unsigned surface_format,
199 unsigned buffer_size,
200 unsigned pitch,
201 unsigned mocs,
202 bool rw)
203 {
204 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
205 6 * 4, 32, out_offset);
206 memset(surf, 0, 6 * 4);
207
208 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
209 surface_format << BRW_SURFACE_FORMAT_SHIFT |
210 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
211 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
212 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
213 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
214 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
215 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
216
217 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
218 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
219 * physical cache. It is mapped in hardware to the sampler cache."
220 */
221 if (bo) {
222 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
223 bo, buffer_offset,
224 I915_GEM_DOMAIN_SAMPLER,
225 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
226 }
227 }
228
229 void
230 brw_update_buffer_texture_surface(struct gl_context *ctx,
231 unsigned unit,
232 uint32_t *surf_offset)
233 {
234 struct brw_context *brw = brw_context(ctx);
235 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
236 struct intel_buffer_object *intel_obj =
237 intel_buffer_object(tObj->BufferObject);
238 uint32_t size = tObj->BufferSize;
239 drm_intel_bo *bo = NULL;
240 mesa_format format = tObj->_BufferObjectFormat;
241 uint32_t brw_format = brw_format_for_mesa_format(format);
242 int texel_size = _mesa_get_format_bytes(format);
243
244 if (intel_obj) {
245 size = MIN2(size, intel_obj->Base.Size);
246 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
247 }
248
249 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
250 _mesa_problem(NULL, "bad format %s for texture buffer\n",
251 _mesa_get_format_name(format));
252 }
253
254 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
255 tObj->BufferOffset,
256 brw_format,
257 size / texel_size,
258 texel_size,
259 0, /* mocs */
260 false /* rw */);
261 }
262
263 static void
264 brw_update_texture_surface(struct gl_context *ctx,
265 unsigned unit,
266 uint32_t *surf_offset,
267 bool for_gather)
268 {
269 struct brw_context *brw = brw_context(ctx);
270 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
271 struct intel_texture_object *intelObj = intel_texture_object(tObj);
272 struct intel_mipmap_tree *mt = intelObj->mt;
273 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
274 uint32_t *surf;
275
276 /* BRW_NEW_UNIFORM_BUFFER */
277 if (tObj->Target == GL_TEXTURE_BUFFER) {
278 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
279 return;
280 }
281
282 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
283 6 * 4, 32, surf_offset);
284
285 uint32_t tex_format = translate_tex_format(brw, mt->format,
286 sampler->sRGBDecode);
287
288 if (for_gather) {
289 /* Sandybridge's gather4 message is broken for integer formats.
290 * To work around this, we pretend the surface is UNORM for
291 * 8 or 16-bit formats, and emit shader instructions to recover
292 * the real INT/UINT value. For 32-bit formats, we pretend
293 * the surface is FLOAT, and simply reinterpret the resulting
294 * bits.
295 */
296 switch (tex_format) {
297 case BRW_SURFACEFORMAT_R8_SINT:
298 case BRW_SURFACEFORMAT_R8_UINT:
299 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
300 break;
301
302 case BRW_SURFACEFORMAT_R16_SINT:
303 case BRW_SURFACEFORMAT_R16_UINT:
304 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
305 break;
306
307 case BRW_SURFACEFORMAT_R32_SINT:
308 case BRW_SURFACEFORMAT_R32_UINT:
309 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
310 break;
311
312 default:
313 break;
314 }
315 }
316
317 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
318 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
319 BRW_SURFACE_CUBEFACE_ENABLES |
320 tex_format << BRW_SURFACE_FORMAT_SHIFT);
321
322 surf[1] = intelObj->mt->region->bo->offset64 + intelObj->mt->offset; /* reloc */
323
324 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
325 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
326 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
327
328 surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
329 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
330 (intelObj->mt->region->pitch - 1) <<
331 BRW_SURFACE_PITCH_SHIFT);
332
333 surf[4] = (brw_get_surface_num_multisamples(intelObj->mt->num_samples) |
334 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
335
336 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
337
338 /* Emit relocation to surface contents */
339 drm_intel_bo_emit_reloc(brw->batch.bo,
340 *surf_offset + 4,
341 intelObj->mt->region->bo,
342 surf[1] - intelObj->mt->region->bo->offset64,
343 I915_GEM_DOMAIN_SAMPLER, 0);
344 }
345
346 /**
347 * Create the constant buffer surface. Vertex/fragment shader constants will be
348 * read from this buffer with Data Port Read instructions/messages.
349 */
350 void
351 brw_create_constant_surface(struct brw_context *brw,
352 drm_intel_bo *bo,
353 uint32_t offset,
354 uint32_t size,
355 uint32_t *out_offset,
356 bool dword_pitch)
357 {
358 uint32_t stride = dword_pitch ? 4 : 16;
359 uint32_t elements = ALIGN(size, stride) / stride;
360
361 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
362 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
363 elements, stride, 0, false);
364 }
365
366 /**
367 * Set up a binding table entry for use by stream output logic (transform
368 * feedback).
369 *
370 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
371 */
372 void
373 brw_update_sol_surface(struct brw_context *brw,
374 struct gl_buffer_object *buffer_obj,
375 uint32_t *out_offset, unsigned num_vector_components,
376 unsigned stride_dwords, unsigned offset_dwords)
377 {
378 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
379 uint32_t offset_bytes = 4 * offset_dwords;
380 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
381 offset_bytes,
382 buffer_obj->Size - offset_bytes);
383 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
384 out_offset);
385 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
386 size_t size_dwords = buffer_obj->Size / 4;
387 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
388
389 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
390 * too big to map using a single binding table entry?
391 */
392 assert((size_dwords - offset_dwords) / stride_dwords
393 <= BRW_MAX_NUM_BUFFER_ENTRIES);
394
395 if (size_dwords > offset_dwords + num_vector_components) {
396 /* There is room for at least 1 transform feedback output in the buffer.
397 * Compute the number of additional transform feedback outputs the
398 * buffer has room for.
399 */
400 buffer_size_minus_1 =
401 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
402 } else {
403 /* There isn't even room for a single transform feedback output in the
404 * buffer. We can't configure the binding table entry to prevent output
405 * entirely; we'll have to rely on the geometry shader to detect
406 * overflow. But to minimize the damage in case of a bug, set up the
407 * binding table entry to just allow a single output.
408 */
409 buffer_size_minus_1 = 0;
410 }
411 width = buffer_size_minus_1 & 0x7f;
412 height = (buffer_size_minus_1 & 0xfff80) >> 7;
413 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
414
415 switch (num_vector_components) {
416 case 1:
417 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
418 break;
419 case 2:
420 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
421 break;
422 case 3:
423 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
424 break;
425 case 4:
426 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
427 break;
428 default:
429 assert(!"Invalid vector size for transform feedback output");
430 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
431 break;
432 }
433
434 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
435 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
436 surface_format << BRW_SURFACE_FORMAT_SHIFT |
437 BRW_SURFACE_RC_READ_WRITE;
438 surf[1] = bo->offset64 + offset_bytes; /* reloc */
439 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
440 height << BRW_SURFACE_HEIGHT_SHIFT);
441 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
442 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
443 surf[4] = 0;
444 surf[5] = 0;
445
446 /* Emit relocation to surface contents. */
447 drm_intel_bo_emit_reloc(brw->batch.bo,
448 *out_offset + 4,
449 bo, offset_bytes,
450 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
451 }
452
453 /* Creates a new WM constant buffer reflecting the current fragment program's
454 * constants, if needed by the fragment program.
455 *
456 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
457 * state atom.
458 */
459 static void
460 brw_upload_wm_pull_constants(struct brw_context *brw)
461 {
462 struct gl_context *ctx = &brw->ctx;
463 /* BRW_NEW_FRAGMENT_PROGRAM */
464 struct brw_fragment_program *fp =
465 (struct brw_fragment_program *) brw->fragment_program;
466 struct gl_program_parameter_list *params = fp->program.Base.Parameters;
467 const int size = brw->wm.prog_data->base.nr_pull_params * sizeof(float);
468 const int surf_index =
469 brw->wm.prog_data->base.binding_table.pull_constants_start;
470 float *constants;
471 unsigned int i;
472
473 _mesa_load_state_parameters(ctx, params);
474
475 /* CACHE_NEW_WM_PROG */
476 if (brw->wm.prog_data->base.nr_pull_params == 0) {
477 if (brw->wm.base.const_bo) {
478 drm_intel_bo_unreference(brw->wm.base.const_bo);
479 brw->wm.base.const_bo = NULL;
480 brw->wm.base.surf_offset[surf_index] = 0;
481 brw->state.dirty.brw |= BRW_NEW_SURFACES;
482 }
483 return;
484 }
485
486 drm_intel_bo_unreference(brw->wm.base.const_bo);
487 brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
488 size, 64);
489
490 /* _NEW_PROGRAM_CONSTANTS */
491 drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
492 constants = brw->wm.base.const_bo->virtual;
493 for (i = 0; i < brw->wm.prog_data->base.nr_pull_params; i++) {
494 constants[i] = *brw->wm.prog_data->base.pull_param[i];
495 }
496 drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
497
498 brw_create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
499 &brw->wm.base.surf_offset[surf_index],
500 true);
501
502 brw->state.dirty.brw |= BRW_NEW_SURFACES;
503 }
504
505 const struct brw_tracked_state brw_wm_pull_constants = {
506 .dirty = {
507 .mesa = (_NEW_PROGRAM_CONSTANTS),
508 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
509 .cache = CACHE_NEW_WM_PROG,
510 },
511 .emit = brw_upload_wm_pull_constants,
512 };
513
514 static void
515 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
516 {
517 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
518 * Notes):
519 *
520 * A null surface will be used in instances where an actual surface is
521 * not bound. When a write message is generated to a null surface, no
522 * actual surface is written to. When a read message (including any
523 * sampling engine message) is generated to a null surface, the result
524 * is all zeros. Note that a null surface type is allowed to be used
525 * with all messages, even if it is not specificially indicated as
526 * supported. All of the remaining fields in surface state are ignored
527 * for null surfaces, with the following exceptions:
528 *
529 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
530 * depth buffer’s corresponding state for all render target surfaces,
531 * including null.
532 *
533 * - Surface Format must be R8G8B8A8_UNORM.
534 */
535 struct gl_context *ctx = &brw->ctx;
536 uint32_t *surf;
537 unsigned surface_type = BRW_SURFACE_NULL;
538 drm_intel_bo *bo = NULL;
539 unsigned pitch_minus_1 = 0;
540 uint32_t multisampling_state = 0;
541 uint32_t surf_index =
542 brw->wm.prog_data->binding_table.render_target_start + unit;
543
544 /* _NEW_BUFFERS */
545 const struct gl_framebuffer *fb = ctx->DrawBuffer;
546
547 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
548 &brw->wm.base.surf_offset[surf_index]);
549
550 if (fb->Visual.samples > 1) {
551 /* On Gen6, null render targets seem to cause GPU hangs when
552 * multisampling. So work around this problem by rendering into dummy
553 * color buffer.
554 *
555 * To decrease the amount of memory needed by the workaround buffer, we
556 * set its pitch to 128 bytes (the width of a Y tile). This means that
557 * the amount of memory needed for the workaround buffer is
558 * (width_in_tiles + height_in_tiles - 1) tiles.
559 *
560 * Note that since the workaround buffer will be interpreted by the
561 * hardware as an interleaved multisampled buffer, we need to compute
562 * width_in_tiles and height_in_tiles by dividing the width and height
563 * by 16 rather than the normal Y-tile size of 32.
564 */
565 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
566 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
567 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
568 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
569 size_needed);
570 bo = brw->wm.multisampled_null_render_target_bo;
571 surface_type = BRW_SURFACE_2D;
572 pitch_minus_1 = 127;
573 multisampling_state =
574 brw_get_surface_num_multisamples(fb->Visual.samples);
575 }
576
577 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
578 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
579 if (brw->gen < 6) {
580 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
581 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
582 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
583 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
584 }
585 surf[1] = bo ? bo->offset64 : 0;
586 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
587 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
588
589 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
590 * Notes):
591 *
592 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
593 */
594 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
595 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
596 surf[4] = multisampling_state;
597 surf[5] = 0;
598
599 if (bo) {
600 drm_intel_bo_emit_reloc(brw->batch.bo,
601 brw->wm.base.surf_offset[surf_index] + 4,
602 bo, 0,
603 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
604 }
605 }
606
607 /**
608 * Sets up a surface state structure to point at the given region.
609 * While it is only used for the front/back buffer currently, it should be
610 * usable for further buffers when doing ARB_draw_buffer support.
611 */
612 static void
613 brw_update_renderbuffer_surface(struct brw_context *brw,
614 struct gl_renderbuffer *rb,
615 bool layered,
616 unsigned int unit)
617 {
618 struct gl_context *ctx = &brw->ctx;
619 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
620 struct intel_mipmap_tree *mt = irb->mt;
621 struct intel_region *region;
622 uint32_t *surf;
623 uint32_t tile_x, tile_y;
624 uint32_t format = 0;
625 /* _NEW_BUFFERS */
626 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
627 uint32_t surf_index =
628 brw->wm.prog_data->binding_table.render_target_start + unit;
629
630 assert(!layered);
631
632 if (rb->TexImage && !brw->has_surface_tile_offset) {
633 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
634
635 if (tile_x != 0 || tile_y != 0) {
636 /* Original gen4 hardware couldn't draw to a non-tile-aligned
637 * destination in a miptree unless you actually setup your renderbuffer
638 * as a miptree and used the fragile lod/array_index/etc. controls to
639 * select the image. So, instead, we just make a new single-level
640 * miptree and render into that.
641 */
642 intel_renderbuffer_move_to_temp(brw, irb, false);
643 mt = irb->mt;
644 }
645 }
646
647 intel_miptree_used_for_rendering(irb->mt);
648
649 region = irb->mt->region;
650
651 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
652 &brw->wm.base.surf_offset[surf_index]);
653
654 format = brw->render_target_format[rb_format];
655 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
656 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
657 __FUNCTION__, _mesa_get_format_name(rb_format));
658 }
659
660 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
661 format << BRW_SURFACE_FORMAT_SHIFT);
662
663 /* reloc */
664 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
665 region->bo->offset64);
666
667 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
668 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
669
670 surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
671 (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
672
673 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
674
675 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
676 /* Note that the low bits of these fields are missing, so
677 * there's the possibility of getting in trouble.
678 */
679 assert(tile_x % 4 == 0);
680 assert(tile_y % 2 == 0);
681 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
682 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
683 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
684
685 if (brw->gen < 6) {
686 /* _NEW_COLOR */
687 if (!ctx->Color.ColorLogicOpEnabled &&
688 (ctx->Color.BlendEnabled & (1 << unit)))
689 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
690
691 if (!ctx->Color.ColorMask[unit][0])
692 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
693 if (!ctx->Color.ColorMask[unit][1])
694 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
695 if (!ctx->Color.ColorMask[unit][2])
696 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
697
698 /* As mentioned above, disable writes to the alpha component when the
699 * renderbuffer is XRGB.
700 */
701 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
702 !ctx->Color.ColorMask[unit][3]) {
703 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
704 }
705 }
706
707 drm_intel_bo_emit_reloc(brw->batch.bo,
708 brw->wm.base.surf_offset[surf_index] + 4,
709 region->bo,
710 surf[1] - region->bo->offset64,
711 I915_GEM_DOMAIN_RENDER,
712 I915_GEM_DOMAIN_RENDER);
713 }
714
715 /**
716 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
717 */
718 static void
719 brw_update_renderbuffer_surfaces(struct brw_context *brw)
720 {
721 struct gl_context *ctx = &brw->ctx;
722 GLuint i;
723
724 /* _NEW_BUFFERS | _NEW_COLOR */
725 /* Update surfaces for drawing buffers */
726 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
727 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
728 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
729 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
730 ctx->DrawBuffer->MaxNumLayers > 0, i);
731 } else {
732 brw->vtbl.update_null_renderbuffer_surface(brw, i);
733 }
734 }
735 } else {
736 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
737 }
738 brw->state.dirty.brw |= BRW_NEW_SURFACES;
739 }
740
741 const struct brw_tracked_state brw_renderbuffer_surfaces = {
742 .dirty = {
743 .mesa = (_NEW_COLOR |
744 _NEW_BUFFERS),
745 .brw = BRW_NEW_BATCH,
746 .cache = 0
747 },
748 .emit = brw_update_renderbuffer_surfaces,
749 };
750
751 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
752 .dirty = {
753 .mesa = _NEW_BUFFERS,
754 .brw = BRW_NEW_BATCH,
755 .cache = 0
756 },
757 .emit = brw_update_renderbuffer_surfaces,
758 };
759
760
761 static void
762 update_stage_texture_surfaces(struct brw_context *brw,
763 const struct gl_program *prog,
764 struct brw_stage_state *stage_state,
765 bool for_gather)
766 {
767 if (!prog)
768 return;
769
770 struct gl_context *ctx = &brw->ctx;
771
772 uint32_t *surf_offset = stage_state->surf_offset;
773 if (for_gather)
774 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
775 else
776 surf_offset += stage_state->prog_data->binding_table.texture_start;
777
778 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
779 for (unsigned s = 0; s < num_samplers; s++) {
780 surf_offset[s] = 0;
781
782 if (prog->SamplersUsed & (1 << s)) {
783 const unsigned unit = prog->SamplerUnits[s];
784
785 /* _NEW_TEXTURE */
786 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
787 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
788 }
789 }
790 }
791 }
792
793
794 /**
795 * Construct SURFACE_STATE objects for enabled textures.
796 */
797 static void
798 brw_update_texture_surfaces(struct brw_context *brw)
799 {
800 /* BRW_NEW_VERTEX_PROGRAM */
801 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
802
803 /* BRW_NEW_GEOMETRY_PROGRAM */
804 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
805
806 /* BRW_NEW_FRAGMENT_PROGRAM */
807 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
808
809 /* _NEW_TEXTURE */
810 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
811 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
812 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
813
814 /* emit alternate set of surface state for gather. this
815 * allows the surface format to be overriden for only the
816 * gather4 messages. */
817 if (vs && vs->UsesGather)
818 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
819 if (gs && gs->UsesGather)
820 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
821 if (fs && fs->UsesGather)
822 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
823
824 brw->state.dirty.brw |= BRW_NEW_SURFACES;
825 }
826
827 const struct brw_tracked_state brw_texture_surfaces = {
828 .dirty = {
829 .mesa = _NEW_TEXTURE,
830 .brw = BRW_NEW_BATCH |
831 BRW_NEW_UNIFORM_BUFFER |
832 BRW_NEW_VERTEX_PROGRAM |
833 BRW_NEW_GEOMETRY_PROGRAM |
834 BRW_NEW_FRAGMENT_PROGRAM,
835 .cache = 0
836 },
837 .emit = brw_update_texture_surfaces,
838 };
839
840 void
841 brw_upload_ubo_surfaces(struct brw_context *brw,
842 struct gl_shader *shader,
843 struct brw_stage_state *stage_state,
844 struct brw_stage_prog_data *prog_data)
845 {
846 struct gl_context *ctx = &brw->ctx;
847
848 if (!shader)
849 return;
850
851 uint32_t *surf_offsets =
852 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
853
854 for (int i = 0; i < shader->NumUniformBlocks; i++) {
855 struct gl_uniform_buffer_binding *binding;
856 struct intel_buffer_object *intel_bo;
857
858 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
859 intel_bo = intel_buffer_object(binding->BufferObject);
860 drm_intel_bo *bo =
861 intel_bufferobj_buffer(brw, intel_bo,
862 binding->Offset,
863 binding->BufferObject->Size - binding->Offset);
864
865 /* Because behavior for referencing outside of the binding's size in the
866 * glBindBufferRange case is undefined, we can just bind the whole buffer
867 * glBindBufferBase wants and be a correct implementation.
868 */
869 brw_create_constant_surface(brw, bo, binding->Offset,
870 bo->size - binding->Offset,
871 &surf_offsets[i],
872 shader->Stage == MESA_SHADER_FRAGMENT);
873 }
874
875 if (shader->NumUniformBlocks)
876 brw->state.dirty.brw |= BRW_NEW_SURFACES;
877 }
878
879 static void
880 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
881 {
882 struct gl_context *ctx = &brw->ctx;
883 /* _NEW_PROGRAM */
884 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
885
886 if (!prog)
887 return;
888
889 /* CACHE_NEW_WM_PROG */
890 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
891 &brw->wm.base, &brw->wm.prog_data->base);
892 }
893
894 const struct brw_tracked_state brw_wm_ubo_surfaces = {
895 .dirty = {
896 .mesa = _NEW_PROGRAM,
897 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
898 .cache = CACHE_NEW_WM_PROG,
899 },
900 .emit = brw_upload_wm_ubo_surfaces,
901 };
902
903 void
904 brw_upload_abo_surfaces(struct brw_context *brw,
905 struct gl_shader_program *prog,
906 struct brw_stage_state *stage_state,
907 struct brw_stage_prog_data *prog_data)
908 {
909 struct gl_context *ctx = &brw->ctx;
910 uint32_t *surf_offsets =
911 &stage_state->surf_offset[prog_data->binding_table.abo_start];
912
913 for (int i = 0; i < prog->NumAtomicBuffers; i++) {
914 struct gl_atomic_buffer_binding *binding =
915 &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
916 struct intel_buffer_object *intel_bo =
917 intel_buffer_object(binding->BufferObject);
918 drm_intel_bo *bo = intel_bufferobj_buffer(
919 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
920
921 brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
922 bo->size - binding->Offset,
923 &surf_offsets[i], true);
924 }
925
926 if (prog->NumUniformBlocks)
927 brw->state.dirty.brw |= BRW_NEW_SURFACES;
928 }
929
930 static void
931 brw_upload_wm_abo_surfaces(struct brw_context *brw)
932 {
933 struct gl_context *ctx = &brw->ctx;
934 /* _NEW_PROGRAM */
935 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
936
937 if (prog) {
938 /* CACHE_NEW_WM_PROG */
939 brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
940 &brw->wm.prog_data->base);
941 }
942 }
943
944 const struct brw_tracked_state brw_wm_abo_surfaces = {
945 .dirty = {
946 .mesa = _NEW_PROGRAM,
947 .brw = BRW_NEW_BATCH | BRW_NEW_ATOMIC_BUFFER,
948 .cache = CACHE_NEW_WM_PROG,
949 },
950 .emit = brw_upload_wm_abo_surfaces,
951 };
952
953 void
954 gen4_init_vtable_surface_functions(struct brw_context *brw)
955 {
956 brw->vtbl.update_texture_surface = brw_update_texture_surface;
957 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
958 brw->vtbl.update_null_renderbuffer_surface =
959 brw_update_null_renderbuffer_surface;
960 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
961 }