i965: Make a brw_stage_prog_data for storing the SURF_INDEX information.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 assert(0);
77 return 0;
78 }
79 }
80
81 uint32_t
82 brw_get_surface_tiling_bits(uint32_t tiling)
83 {
84 switch (tiling) {
85 case I915_TILING_X:
86 return BRW_SURFACE_TILED;
87 case I915_TILING_Y:
88 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89 default:
90 return 0;
91 }
92 }
93
94
95 uint32_t
96 brw_get_surface_num_multisamples(unsigned num_samples)
97 {
98 if (num_samples > 1)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4;
100 else
101 return BRW_SURFACE_MULTISAMPLECOUNT_1;
102 }
103
104
105 /**
106 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
107 * swizzling.
108 */
109 int
110 brw_get_texture_swizzle(const struct gl_context *ctx,
111 const struct gl_texture_object *t)
112 {
113 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
114
115 int swizzles[SWIZZLE_NIL + 1] = {
116 SWIZZLE_X,
117 SWIZZLE_Y,
118 SWIZZLE_Z,
119 SWIZZLE_W,
120 SWIZZLE_ZERO,
121 SWIZZLE_ONE,
122 SWIZZLE_NIL
123 };
124
125 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
126 img->_BaseFormat == GL_DEPTH_STENCIL) {
127 GLenum depth_mode = t->DepthMode;
128
129 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
130 * with depth component data specified with a sized internal format.
131 * Otherwise, it's left at the old default, GL_LUMINANCE.
132 */
133 if (_mesa_is_gles3(ctx) &&
134 img->InternalFormat != GL_DEPTH_COMPONENT &&
135 img->InternalFormat != GL_DEPTH_STENCIL) {
136 depth_mode = GL_RED;
137 }
138
139 switch (depth_mode) {
140 case GL_ALPHA:
141 swizzles[0] = SWIZZLE_ZERO;
142 swizzles[1] = SWIZZLE_ZERO;
143 swizzles[2] = SWIZZLE_ZERO;
144 swizzles[3] = SWIZZLE_X;
145 break;
146 case GL_LUMINANCE:
147 swizzles[0] = SWIZZLE_X;
148 swizzles[1] = SWIZZLE_X;
149 swizzles[2] = SWIZZLE_X;
150 swizzles[3] = SWIZZLE_ONE;
151 break;
152 case GL_INTENSITY:
153 swizzles[0] = SWIZZLE_X;
154 swizzles[1] = SWIZZLE_X;
155 swizzles[2] = SWIZZLE_X;
156 swizzles[3] = SWIZZLE_X;
157 break;
158 case GL_RED:
159 swizzles[0] = SWIZZLE_X;
160 swizzles[1] = SWIZZLE_ZERO;
161 swizzles[2] = SWIZZLE_ZERO;
162 swizzles[3] = SWIZZLE_ONE;
163 break;
164 }
165 }
166
167 /* If the texture's format is alpha-only, force R, G, and B to
168 * 0.0. Similarly, if the texture's format has no alpha channel,
169 * force the alpha value read to 1.0. This allows for the
170 * implementation to use an RGBA texture for any of these formats
171 * without leaking any unexpected values.
172 */
173 switch (img->_BaseFormat) {
174 case GL_ALPHA:
175 swizzles[0] = SWIZZLE_ZERO;
176 swizzles[1] = SWIZZLE_ZERO;
177 swizzles[2] = SWIZZLE_ZERO;
178 break;
179 case GL_RED:
180 case GL_RG:
181 case GL_RGB:
182 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
183 swizzles[3] = SWIZZLE_ONE;
184 break;
185 }
186
187 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
188 swizzles[GET_SWZ(t->_Swizzle, 1)],
189 swizzles[GET_SWZ(t->_Swizzle, 2)],
190 swizzles[GET_SWZ(t->_Swizzle, 3)]);
191 }
192
193 static void
194 gen4_emit_buffer_surface_state(struct brw_context *brw,
195 uint32_t *out_offset,
196 drm_intel_bo *bo,
197 unsigned buffer_offset,
198 unsigned surface_format,
199 unsigned buffer_size,
200 unsigned pitch)
201 {
202 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
203 6 * 4, 32, out_offset);
204 memset(surf, 0, 6 * 4);
205
206 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
207 surface_format << BRW_SURFACE_FORMAT_SHIFT |
208 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
209 surf[1] = (bo ? bo->offset : 0) + buffer_offset; /* reloc */
210 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
211 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
212 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
213 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
214
215 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
216 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
217 * physical cache. It is mapped in hardware to the sampler cache."
218 */
219 if (bo) {
220 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
221 bo, buffer_offset,
222 I915_GEM_DOMAIN_SAMPLER, 0);
223 }
224 }
225
226 static void
227 brw_update_buffer_texture_surface(struct gl_context *ctx,
228 unsigned unit,
229 uint32_t *surf_offset)
230 {
231 struct brw_context *brw = brw_context(ctx);
232 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
233 struct intel_buffer_object *intel_obj =
234 intel_buffer_object(tObj->BufferObject);
235 drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
236 gl_format format = tObj->_BufferObjectFormat;
237 uint32_t brw_format = brw_format_for_mesa_format(format);
238 int texel_size = _mesa_get_format_bytes(format);
239 int w = intel_obj ? intel_obj->Base.Size / texel_size : 0;
240
241 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
242 _mesa_problem(NULL, "bad format %s for texture buffer\n",
243 _mesa_get_format_name(format));
244 }
245
246 gen4_emit_buffer_surface_state(brw, surf_offset, bo, 0,
247 brw_format,
248 w, texel_size);
249 }
250
251 static void
252 brw_update_texture_surface(struct gl_context *ctx,
253 unsigned unit,
254 uint32_t *surf_offset,
255 bool for_gather)
256 {
257 struct brw_context *brw = brw_context(ctx);
258 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
259 struct intel_texture_object *intelObj = intel_texture_object(tObj);
260 struct intel_mipmap_tree *mt = intelObj->mt;
261 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
262 uint32_t *surf;
263
264 if (tObj->Target == GL_TEXTURE_BUFFER) {
265 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
266 return;
267 }
268
269 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
270 6 * 4, 32, surf_offset);
271
272 (void) for_gather; /* no w/a to apply for this gen */
273
274 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
275 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
276 BRW_SURFACE_CUBEFACE_ENABLES |
277 (translate_tex_format(brw,
278 mt->format,
279 tObj->DepthMode,
280 sampler->sRGBDecode) <<
281 BRW_SURFACE_FORMAT_SHIFT));
282
283 surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
284
285 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
286 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
287 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
288
289 surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
290 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
291 (intelObj->mt->region->pitch - 1) <<
292 BRW_SURFACE_PITCH_SHIFT);
293
294 surf[4] = (brw_get_surface_num_multisamples(intelObj->mt->num_samples) |
295 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
296
297 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
298
299 /* Emit relocation to surface contents */
300 drm_intel_bo_emit_reloc(brw->batch.bo,
301 *surf_offset + 4,
302 intelObj->mt->region->bo,
303 surf[1] - intelObj->mt->region->bo->offset,
304 I915_GEM_DOMAIN_SAMPLER, 0);
305 }
306
307 /**
308 * Create the constant buffer surface. Vertex/fragment shader constants will be
309 * read from this buffer with Data Port Read instructions/messages.
310 */
311 static void
312 brw_create_constant_surface(struct brw_context *brw,
313 drm_intel_bo *bo,
314 uint32_t offset,
315 uint32_t size,
316 uint32_t *out_offset,
317 bool dword_pitch)
318 {
319 uint32_t stride = dword_pitch ? 4 : 16;
320 uint32_t elements = ALIGN(size, stride) / stride;
321
322 gen4_emit_buffer_surface_state(brw, out_offset, bo, offset,
323 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
324 elements, stride);
325 }
326
327 /**
328 * Set up a binding table entry for use by stream output logic (transform
329 * feedback).
330 *
331 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
332 */
333 void
334 brw_update_sol_surface(struct brw_context *brw,
335 struct gl_buffer_object *buffer_obj,
336 uint32_t *out_offset, unsigned num_vector_components,
337 unsigned stride_dwords, unsigned offset_dwords)
338 {
339 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
340 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
341 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
342 out_offset);
343 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
344 uint32_t offset_bytes = 4 * offset_dwords;
345 size_t size_dwords = buffer_obj->Size / 4;
346 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
347
348 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
349 * too big to map using a single binding table entry?
350 */
351 assert((size_dwords - offset_dwords) / stride_dwords
352 <= BRW_MAX_NUM_BUFFER_ENTRIES);
353
354 if (size_dwords > offset_dwords + num_vector_components) {
355 /* There is room for at least 1 transform feedback output in the buffer.
356 * Compute the number of additional transform feedback outputs the
357 * buffer has room for.
358 */
359 buffer_size_minus_1 =
360 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
361 } else {
362 /* There isn't even room for a single transform feedback output in the
363 * buffer. We can't configure the binding table entry to prevent output
364 * entirely; we'll have to rely on the geometry shader to detect
365 * overflow. But to minimize the damage in case of a bug, set up the
366 * binding table entry to just allow a single output.
367 */
368 buffer_size_minus_1 = 0;
369 }
370 width = buffer_size_minus_1 & 0x7f;
371 height = (buffer_size_minus_1 & 0xfff80) >> 7;
372 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
373
374 switch (num_vector_components) {
375 case 1:
376 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
377 break;
378 case 2:
379 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
380 break;
381 case 3:
382 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
383 break;
384 case 4:
385 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
386 break;
387 default:
388 assert(!"Invalid vector size for transform feedback output");
389 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
390 break;
391 }
392
393 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
394 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
395 surface_format << BRW_SURFACE_FORMAT_SHIFT |
396 BRW_SURFACE_RC_READ_WRITE;
397 surf[1] = bo->offset + offset_bytes; /* reloc */
398 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
399 height << BRW_SURFACE_HEIGHT_SHIFT);
400 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
401 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
402 surf[4] = 0;
403 surf[5] = 0;
404
405 /* Emit relocation to surface contents. */
406 drm_intel_bo_emit_reloc(brw->batch.bo,
407 *out_offset + 4,
408 bo, offset_bytes,
409 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
410 }
411
412 /* Creates a new WM constant buffer reflecting the current fragment program's
413 * constants, if needed by the fragment program.
414 *
415 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
416 * state atom.
417 */
418 static void
419 brw_upload_wm_pull_constants(struct brw_context *brw)
420 {
421 struct gl_context *ctx = &brw->ctx;
422 /* BRW_NEW_FRAGMENT_PROGRAM */
423 struct brw_fragment_program *fp =
424 (struct brw_fragment_program *) brw->fragment_program;
425 struct gl_program_parameter_list *params = fp->program.Base.Parameters;
426 const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
427 const int surf_index =
428 brw->wm.prog_data->base.binding_table.pull_constants_start;
429 float *constants;
430 unsigned int i;
431
432 _mesa_load_state_parameters(ctx, params);
433
434 /* CACHE_NEW_WM_PROG */
435 if (brw->wm.prog_data->nr_pull_params == 0) {
436 if (brw->wm.base.const_bo) {
437 drm_intel_bo_unreference(brw->wm.base.const_bo);
438 brw->wm.base.const_bo = NULL;
439 brw->wm.base.surf_offset[surf_index] = 0;
440 brw->state.dirty.brw |= BRW_NEW_SURFACES;
441 }
442 return;
443 }
444
445 drm_intel_bo_unreference(brw->wm.base.const_bo);
446 brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
447 size, 64);
448
449 /* _NEW_PROGRAM_CONSTANTS */
450 drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
451 constants = brw->wm.base.const_bo->virtual;
452 for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
453 constants[i] = *brw->wm.prog_data->pull_param[i];
454 }
455 drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
456
457 brw->vtbl.create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
458 &brw->wm.base.surf_offset[surf_index],
459 true);
460
461 brw->state.dirty.brw |= BRW_NEW_SURFACES;
462 }
463
464 const struct brw_tracked_state brw_wm_pull_constants = {
465 .dirty = {
466 .mesa = (_NEW_PROGRAM_CONSTANTS),
467 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
468 .cache = CACHE_NEW_WM_PROG,
469 },
470 .emit = brw_upload_wm_pull_constants,
471 };
472
473 static void
474 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
475 {
476 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
477 * Notes):
478 *
479 * A null surface will be used in instances where an actual surface is
480 * not bound. When a write message is generated to a null surface, no
481 * actual surface is written to. When a read message (including any
482 * sampling engine message) is generated to a null surface, the result
483 * is all zeros. Note that a null surface type is allowed to be used
484 * with all messages, even if it is not specificially indicated as
485 * supported. All of the remaining fields in surface state are ignored
486 * for null surfaces, with the following exceptions:
487 *
488 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
489 * depth buffer’s corresponding state for all render target surfaces,
490 * including null.
491 *
492 * - Surface Format must be R8G8B8A8_UNORM.
493 */
494 struct gl_context *ctx = &brw->ctx;
495 uint32_t *surf;
496 unsigned surface_type = BRW_SURFACE_NULL;
497 drm_intel_bo *bo = NULL;
498 unsigned pitch_minus_1 = 0;
499 uint32_t multisampling_state = 0;
500 uint32_t surf_index =
501 brw->wm.prog_data->binding_table.render_target_start + unit;
502
503 /* _NEW_BUFFERS */
504 const struct gl_framebuffer *fb = ctx->DrawBuffer;
505
506 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
507 &brw->wm.base.surf_offset[surf_index]);
508
509 if (fb->Visual.samples > 1) {
510 /* On Gen6, null render targets seem to cause GPU hangs when
511 * multisampling. So work around this problem by rendering into dummy
512 * color buffer.
513 *
514 * To decrease the amount of memory needed by the workaround buffer, we
515 * set its pitch to 128 bytes (the width of a Y tile). This means that
516 * the amount of memory needed for the workaround buffer is
517 * (width_in_tiles + height_in_tiles - 1) tiles.
518 *
519 * Note that since the workaround buffer will be interpreted by the
520 * hardware as an interleaved multisampled buffer, we need to compute
521 * width_in_tiles and height_in_tiles by dividing the width and height
522 * by 16 rather than the normal Y-tile size of 32.
523 */
524 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
525 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
526 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
527 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
528 size_needed);
529 bo = brw->wm.multisampled_null_render_target_bo;
530 surface_type = BRW_SURFACE_2D;
531 pitch_minus_1 = 127;
532 multisampling_state =
533 brw_get_surface_num_multisamples(fb->Visual.samples);
534 }
535
536 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
537 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
538 if (brw->gen < 6) {
539 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
540 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
541 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
542 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
543 }
544 surf[1] = bo ? bo->offset : 0;
545 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
546 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
547
548 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
549 * Notes):
550 *
551 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
552 */
553 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
554 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
555 surf[4] = multisampling_state;
556 surf[5] = 0;
557
558 if (bo) {
559 drm_intel_bo_emit_reloc(brw->batch.bo,
560 brw->wm.base.surf_offset[surf_index] + 4,
561 bo, 0,
562 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
563 }
564 }
565
566 /**
567 * Sets up a surface state structure to point at the given region.
568 * While it is only used for the front/back buffer currently, it should be
569 * usable for further buffers when doing ARB_draw_buffer support.
570 */
571 static void
572 brw_update_renderbuffer_surface(struct brw_context *brw,
573 struct gl_renderbuffer *rb,
574 bool layered,
575 unsigned int unit)
576 {
577 struct gl_context *ctx = &brw->ctx;
578 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
579 struct intel_mipmap_tree *mt = irb->mt;
580 struct intel_region *region;
581 uint32_t *surf;
582 uint32_t tile_x, tile_y;
583 uint32_t format = 0;
584 /* _NEW_BUFFERS */
585 gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
586 uint32_t surf_index =
587 brw->wm.prog_data->binding_table.render_target_start + unit;
588
589 assert(!layered);
590
591 if (rb->TexImage && !brw->has_surface_tile_offset) {
592 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
593
594 if (tile_x != 0 || tile_y != 0) {
595 /* Original gen4 hardware couldn't draw to a non-tile-aligned
596 * destination in a miptree unless you actually setup your renderbuffer
597 * as a miptree and used the fragile lod/array_index/etc. controls to
598 * select the image. So, instead, we just make a new single-level
599 * miptree and render into that.
600 */
601 intel_renderbuffer_move_to_temp(brw, irb, false);
602 mt = irb->mt;
603 }
604 }
605
606 intel_miptree_used_for_rendering(irb->mt);
607
608 region = irb->mt->region;
609
610 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
611 &brw->wm.base.surf_offset[surf_index]);
612
613 format = brw->render_target_format[rb_format];
614 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
615 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
616 __FUNCTION__, _mesa_get_format_name(rb_format));
617 }
618
619 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
620 format << BRW_SURFACE_FORMAT_SHIFT);
621
622 /* reloc */
623 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
624 region->bo->offset);
625
626 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
627 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
628
629 surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
630 (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
631
632 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
633
634 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
635 /* Note that the low bits of these fields are missing, so
636 * there's the possibility of getting in trouble.
637 */
638 assert(tile_x % 4 == 0);
639 assert(tile_y % 2 == 0);
640 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
641 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
642 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
643
644 if (brw->gen < 6) {
645 /* _NEW_COLOR */
646 if (!ctx->Color.ColorLogicOpEnabled &&
647 (ctx->Color.BlendEnabled & (1 << unit)))
648 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
649
650 if (!ctx->Color.ColorMask[unit][0])
651 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
652 if (!ctx->Color.ColorMask[unit][1])
653 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
654 if (!ctx->Color.ColorMask[unit][2])
655 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
656
657 /* As mentioned above, disable writes to the alpha component when the
658 * renderbuffer is XRGB.
659 */
660 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
661 !ctx->Color.ColorMask[unit][3]) {
662 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
663 }
664 }
665
666 drm_intel_bo_emit_reloc(brw->batch.bo,
667 brw->wm.base.surf_offset[surf_index] + 4,
668 region->bo,
669 surf[1] - region->bo->offset,
670 I915_GEM_DOMAIN_RENDER,
671 I915_GEM_DOMAIN_RENDER);
672 }
673
674 /**
675 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
676 */
677 static void
678 brw_update_renderbuffer_surfaces(struct brw_context *brw)
679 {
680 struct gl_context *ctx = &brw->ctx;
681 GLuint i;
682
683 /* _NEW_BUFFERS | _NEW_COLOR */
684 /* Update surfaces for drawing buffers */
685 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
686 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
687 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
688 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
689 ctx->DrawBuffer->Layered, i);
690 } else {
691 brw->vtbl.update_null_renderbuffer_surface(brw, i);
692 }
693 }
694 } else {
695 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
696 }
697 brw->state.dirty.brw |= BRW_NEW_SURFACES;
698 }
699
700 const struct brw_tracked_state brw_renderbuffer_surfaces = {
701 .dirty = {
702 .mesa = (_NEW_COLOR |
703 _NEW_BUFFERS),
704 .brw = BRW_NEW_BATCH,
705 .cache = 0
706 },
707 .emit = brw_update_renderbuffer_surfaces,
708 };
709
710 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
711 .dirty = {
712 .mesa = _NEW_BUFFERS,
713 .brw = BRW_NEW_BATCH,
714 .cache = 0
715 },
716 .emit = brw_update_renderbuffer_surfaces,
717 };
718
719
720 static void
721 update_stage_texture_surfaces(struct brw_context *brw,
722 const struct gl_program *prog,
723 struct brw_stage_state *stage_state,
724 bool for_gather)
725 {
726 if (!prog)
727 return;
728
729 struct gl_context *ctx = &brw->ctx;
730
731 uint32_t *surf_offset = stage_state->surf_offset;
732 if (for_gather)
733 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
734 else
735 surf_offset += stage_state->prog_data->binding_table.texture_start;
736
737 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
738 for (unsigned s = 0; s < num_samplers; s++) {
739 surf_offset[s] = 0;
740
741 if (prog->SamplersUsed & (1 << s)) {
742 const unsigned unit = prog->SamplerUnits[s];
743
744 /* _NEW_TEXTURE */
745 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
746 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
747 }
748 }
749 }
750 }
751
752
753 /**
754 * Construct SURFACE_STATE objects for enabled textures.
755 */
756 static void
757 brw_update_texture_surfaces(struct brw_context *brw)
758 {
759 /* BRW_NEW_VERTEX_PROGRAM */
760 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
761
762 /* BRW_NEW_GEOMETRY_PROGRAM */
763 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
764
765 /* BRW_NEW_FRAGMENT_PROGRAM */
766 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
767
768 /* _NEW_TEXTURE */
769 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
770 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
771 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
772
773 /* emit alternate set of surface state for gather. this
774 * allows the surface format to be overriden for only the
775 * gather4 messages. */
776 if (vs && vs->UsesGather)
777 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
778 if (gs && gs->UsesGather)
779 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
780 if (fs && fs->UsesGather)
781 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
782
783 brw->state.dirty.brw |= BRW_NEW_SURFACES;
784 }
785
786 const struct brw_tracked_state brw_texture_surfaces = {
787 .dirty = {
788 .mesa = _NEW_TEXTURE,
789 .brw = BRW_NEW_BATCH |
790 BRW_NEW_VERTEX_PROGRAM |
791 BRW_NEW_GEOMETRY_PROGRAM |
792 BRW_NEW_FRAGMENT_PROGRAM,
793 .cache = 0
794 },
795 .emit = brw_update_texture_surfaces,
796 };
797
798 void
799 brw_upload_ubo_surfaces(struct brw_context *brw,
800 struct gl_shader *shader,
801 struct brw_stage_state *stage_state,
802 struct brw_stage_prog_data *prog_data)
803 {
804 struct gl_context *ctx = &brw->ctx;
805
806 if (!shader)
807 return;
808
809 uint32_t *surf_offsets =
810 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
811
812 for (int i = 0; i < shader->NumUniformBlocks; i++) {
813 struct gl_uniform_buffer_binding *binding;
814 struct intel_buffer_object *intel_bo;
815
816 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
817 intel_bo = intel_buffer_object(binding->BufferObject);
818 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
819
820 /* Because behavior for referencing outside of the binding's size in the
821 * glBindBufferRange case is undefined, we can just bind the whole buffer
822 * glBindBufferBase wants and be a correct implementation.
823 */
824 brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
825 bo->size - binding->Offset,
826 &surf_offsets[i],
827 shader->Type == GL_FRAGMENT_SHADER);
828 }
829
830 if (shader->NumUniformBlocks)
831 brw->state.dirty.brw |= BRW_NEW_SURFACES;
832 }
833
834 static void
835 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
836 {
837 struct gl_context *ctx = &brw->ctx;
838 /* _NEW_PROGRAM */
839 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
840
841 if (!prog)
842 return;
843
844 /* CACHE_NEW_WM_PROG */
845 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
846 &brw->wm.base, &brw->wm.prog_data->base);
847 }
848
849 const struct brw_tracked_state brw_wm_ubo_surfaces = {
850 .dirty = {
851 .mesa = _NEW_PROGRAM,
852 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
853 .cache = CACHE_NEW_WM_PROG,
854 },
855 .emit = brw_upload_wm_ubo_surfaces,
856 };
857
858 void
859 gen4_init_vtable_surface_functions(struct brw_context *brw)
860 {
861 brw->vtbl.update_texture_surface = brw_update_texture_surface;
862 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
863 brw->vtbl.update_null_renderbuffer_surface =
864 brw_update_null_renderbuffer_surface;
865 brw->vtbl.create_constant_surface = brw_create_constant_surface;
866 }