i965: Refactor Gen4-6 SURFACE_STATE setup for buffer surfaces.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 assert(0);
77 return 0;
78 }
79 }
80
81 uint32_t
82 brw_get_surface_tiling_bits(uint32_t tiling)
83 {
84 switch (tiling) {
85 case I915_TILING_X:
86 return BRW_SURFACE_TILED;
87 case I915_TILING_Y:
88 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89 default:
90 return 0;
91 }
92 }
93
94
95 uint32_t
96 brw_get_surface_num_multisamples(unsigned num_samples)
97 {
98 if (num_samples > 1)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4;
100 else
101 return BRW_SURFACE_MULTISAMPLECOUNT_1;
102 }
103
104
105 /**
106 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
107 * swizzling.
108 */
109 int
110 brw_get_texture_swizzle(const struct gl_context *ctx,
111 const struct gl_texture_object *t)
112 {
113 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
114
115 int swizzles[SWIZZLE_NIL + 1] = {
116 SWIZZLE_X,
117 SWIZZLE_Y,
118 SWIZZLE_Z,
119 SWIZZLE_W,
120 SWIZZLE_ZERO,
121 SWIZZLE_ONE,
122 SWIZZLE_NIL
123 };
124
125 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
126 img->_BaseFormat == GL_DEPTH_STENCIL) {
127 GLenum depth_mode = t->DepthMode;
128
129 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
130 * with depth component data specified with a sized internal format.
131 * Otherwise, it's left at the old default, GL_LUMINANCE.
132 */
133 if (_mesa_is_gles3(ctx) &&
134 img->InternalFormat != GL_DEPTH_COMPONENT &&
135 img->InternalFormat != GL_DEPTH_STENCIL) {
136 depth_mode = GL_RED;
137 }
138
139 switch (depth_mode) {
140 case GL_ALPHA:
141 swizzles[0] = SWIZZLE_ZERO;
142 swizzles[1] = SWIZZLE_ZERO;
143 swizzles[2] = SWIZZLE_ZERO;
144 swizzles[3] = SWIZZLE_X;
145 break;
146 case GL_LUMINANCE:
147 swizzles[0] = SWIZZLE_X;
148 swizzles[1] = SWIZZLE_X;
149 swizzles[2] = SWIZZLE_X;
150 swizzles[3] = SWIZZLE_ONE;
151 break;
152 case GL_INTENSITY:
153 swizzles[0] = SWIZZLE_X;
154 swizzles[1] = SWIZZLE_X;
155 swizzles[2] = SWIZZLE_X;
156 swizzles[3] = SWIZZLE_X;
157 break;
158 case GL_RED:
159 swizzles[0] = SWIZZLE_X;
160 swizzles[1] = SWIZZLE_ZERO;
161 swizzles[2] = SWIZZLE_ZERO;
162 swizzles[3] = SWIZZLE_ONE;
163 break;
164 }
165 }
166
167 /* If the texture's format is alpha-only, force R, G, and B to
168 * 0.0. Similarly, if the texture's format has no alpha channel,
169 * force the alpha value read to 1.0. This allows for the
170 * implementation to use an RGBA texture for any of these formats
171 * without leaking any unexpected values.
172 */
173 switch (img->_BaseFormat) {
174 case GL_ALPHA:
175 swizzles[0] = SWIZZLE_ZERO;
176 swizzles[1] = SWIZZLE_ZERO;
177 swizzles[2] = SWIZZLE_ZERO;
178 break;
179 case GL_RED:
180 case GL_RG:
181 case GL_RGB:
182 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
183 swizzles[3] = SWIZZLE_ONE;
184 break;
185 }
186
187 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
188 swizzles[GET_SWZ(t->_Swizzle, 1)],
189 swizzles[GET_SWZ(t->_Swizzle, 2)],
190 swizzles[GET_SWZ(t->_Swizzle, 3)]);
191 }
192
193 static void
194 gen4_emit_buffer_surface_state(struct brw_context *brw,
195 uint32_t *out_offset,
196 drm_intel_bo *bo,
197 unsigned buffer_offset,
198 unsigned surface_format,
199 unsigned buffer_size,
200 unsigned pitch)
201 {
202 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
203 6 * 4, 32, out_offset);
204 memset(surf, 0, 6 * 4);
205
206 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
207 surface_format << BRW_SURFACE_FORMAT_SHIFT |
208 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
209 surf[1] = (bo ? bo->offset : 0) + buffer_offset; /* reloc */
210 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
211 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
212 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
213 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
214
215 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
216 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
217 * physical cache. It is mapped in hardware to the sampler cache."
218 */
219 if (bo) {
220 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
221 bo, buffer_offset,
222 I915_GEM_DOMAIN_SAMPLER, 0);
223 }
224 }
225
226 static void
227 brw_update_buffer_texture_surface(struct gl_context *ctx,
228 unsigned unit,
229 uint32_t *surf_offset)
230 {
231 struct brw_context *brw = brw_context(ctx);
232 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
233 struct intel_buffer_object *intel_obj =
234 intel_buffer_object(tObj->BufferObject);
235 drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
236 gl_format format = tObj->_BufferObjectFormat;
237 uint32_t brw_format = brw_format_for_mesa_format(format);
238 int texel_size = _mesa_get_format_bytes(format);
239 int w = intel_obj ? intel_obj->Base.Size / texel_size : 0;
240
241 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
242 _mesa_problem(NULL, "bad format %s for texture buffer\n",
243 _mesa_get_format_name(format));
244 }
245
246 gen4_emit_buffer_surface_state(brw, surf_offset, bo, 0,
247 brw_format,
248 w, texel_size);
249 }
250
251 static void
252 brw_update_texture_surface(struct gl_context *ctx,
253 unsigned unit,
254 uint32_t *surf_offset)
255 {
256 struct brw_context *brw = brw_context(ctx);
257 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
258 struct intel_texture_object *intelObj = intel_texture_object(tObj);
259 struct intel_mipmap_tree *mt = intelObj->mt;
260 struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
261 struct intel_texture_image *intel_image = intel_texture_image(firstImage);
262 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
263 uint32_t *surf;
264
265 if (tObj->Target == GL_TEXTURE_BUFFER) {
266 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
267 return;
268 }
269
270 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
271 6 * 4, 32, surf_offset);
272
273 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
274 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
275 BRW_SURFACE_CUBEFACE_ENABLES |
276 (translate_tex_format(brw,
277 mt->format,
278 tObj->DepthMode,
279 sampler->sRGBDecode) <<
280 BRW_SURFACE_FORMAT_SHIFT));
281
282 surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
283
284 surf[2] = ((intelObj->_MaxLevel - intel_image->mt->first_level) << BRW_SURFACE_LOD_SHIFT |
285 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
286 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
287
288 surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
289 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
290 (intelObj->mt->region->pitch - 1) <<
291 BRW_SURFACE_PITCH_SHIFT);
292
293 surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
294
295 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
296
297 /* Emit relocation to surface contents */
298 drm_intel_bo_emit_reloc(brw->batch.bo,
299 *surf_offset + 4,
300 intelObj->mt->region->bo,
301 surf[1] - intelObj->mt->region->bo->offset,
302 I915_GEM_DOMAIN_SAMPLER, 0);
303 }
304
305 /**
306 * Create the constant buffer surface. Vertex/fragment shader constants will be
307 * read from this buffer with Data Port Read instructions/messages.
308 */
309 static void
310 brw_create_constant_surface(struct brw_context *brw,
311 drm_intel_bo *bo,
312 uint32_t offset,
313 uint32_t size,
314 uint32_t *out_offset,
315 bool dword_pitch)
316 {
317 uint32_t stride = dword_pitch ? 4 : 16;
318 uint32_t elements = ALIGN(size, stride) / stride;
319
320 gen4_emit_buffer_surface_state(brw, out_offset, bo, offset,
321 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
322 elements, stride);
323 }
324
325 /**
326 * Set up a binding table entry for use by stream output logic (transform
327 * feedback).
328 *
329 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
330 */
331 void
332 brw_update_sol_surface(struct brw_context *brw,
333 struct gl_buffer_object *buffer_obj,
334 uint32_t *out_offset, unsigned num_vector_components,
335 unsigned stride_dwords, unsigned offset_dwords)
336 {
337 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
338 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
339 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
340 out_offset);
341 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
342 uint32_t offset_bytes = 4 * offset_dwords;
343 size_t size_dwords = buffer_obj->Size / 4;
344 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
345
346 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
347 * too big to map using a single binding table entry?
348 */
349 assert((size_dwords - offset_dwords) / stride_dwords
350 <= BRW_MAX_NUM_BUFFER_ENTRIES);
351
352 if (size_dwords > offset_dwords + num_vector_components) {
353 /* There is room for at least 1 transform feedback output in the buffer.
354 * Compute the number of additional transform feedback outputs the
355 * buffer has room for.
356 */
357 buffer_size_minus_1 =
358 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
359 } else {
360 /* There isn't even room for a single transform feedback output in the
361 * buffer. We can't configure the binding table entry to prevent output
362 * entirely; we'll have to rely on the geometry shader to detect
363 * overflow. But to minimize the damage in case of a bug, set up the
364 * binding table entry to just allow a single output.
365 */
366 buffer_size_minus_1 = 0;
367 }
368 width = buffer_size_minus_1 & 0x7f;
369 height = (buffer_size_minus_1 & 0xfff80) >> 7;
370 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
371
372 switch (num_vector_components) {
373 case 1:
374 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
375 break;
376 case 2:
377 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
378 break;
379 case 3:
380 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
381 break;
382 case 4:
383 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
384 break;
385 default:
386 assert(!"Invalid vector size for transform feedback output");
387 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
388 break;
389 }
390
391 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
392 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
393 surface_format << BRW_SURFACE_FORMAT_SHIFT |
394 BRW_SURFACE_RC_READ_WRITE;
395 surf[1] = bo->offset + offset_bytes; /* reloc */
396 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
397 height << BRW_SURFACE_HEIGHT_SHIFT);
398 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
399 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
400 surf[4] = 0;
401 surf[5] = 0;
402
403 /* Emit relocation to surface contents. */
404 drm_intel_bo_emit_reloc(brw->batch.bo,
405 *out_offset + 4,
406 bo, offset_bytes,
407 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
408 }
409
410 /* Creates a new WM constant buffer reflecting the current fragment program's
411 * constants, if needed by the fragment program.
412 *
413 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
414 * state atom.
415 */
416 static void
417 brw_upload_wm_pull_constants(struct brw_context *brw)
418 {
419 struct gl_context *ctx = &brw->ctx;
420 /* BRW_NEW_FRAGMENT_PROGRAM */
421 struct brw_fragment_program *fp =
422 (struct brw_fragment_program *) brw->fragment_program;
423 struct gl_program_parameter_list *params = fp->program.Base.Parameters;
424 const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
425 const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
426 float *constants;
427 unsigned int i;
428
429 _mesa_load_state_parameters(ctx, params);
430
431 /* CACHE_NEW_WM_PROG */
432 if (brw->wm.prog_data->nr_pull_params == 0) {
433 if (brw->wm.base.const_bo) {
434 drm_intel_bo_unreference(brw->wm.base.const_bo);
435 brw->wm.base.const_bo = NULL;
436 brw->wm.base.surf_offset[surf_index] = 0;
437 brw->state.dirty.brw |= BRW_NEW_SURFACES;
438 }
439 return;
440 }
441
442 drm_intel_bo_unreference(brw->wm.base.const_bo);
443 brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
444 size, 64);
445
446 /* _NEW_PROGRAM_CONSTANTS */
447 drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
448 constants = brw->wm.base.const_bo->virtual;
449 for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
450 constants[i] = *brw->wm.prog_data->pull_param[i];
451 }
452 drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
453
454 brw->vtbl.create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
455 &brw->wm.base.surf_offset[surf_index],
456 true);
457
458 brw->state.dirty.brw |= BRW_NEW_SURFACES;
459 }
460
461 const struct brw_tracked_state brw_wm_pull_constants = {
462 .dirty = {
463 .mesa = (_NEW_PROGRAM_CONSTANTS),
464 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
465 .cache = CACHE_NEW_WM_PROG,
466 },
467 .emit = brw_upload_wm_pull_constants,
468 };
469
470 static void
471 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
472 {
473 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
474 * Notes):
475 *
476 * A null surface will be used in instances where an actual surface is
477 * not bound. When a write message is generated to a null surface, no
478 * actual surface is written to. When a read message (including any
479 * sampling engine message) is generated to a null surface, the result
480 * is all zeros. Note that a null surface type is allowed to be used
481 * with all messages, even if it is not specificially indicated as
482 * supported. All of the remaining fields in surface state are ignored
483 * for null surfaces, with the following exceptions:
484 *
485 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
486 * depth buffer’s corresponding state for all render target surfaces,
487 * including null.
488 *
489 * - Surface Format must be R8G8B8A8_UNORM.
490 */
491 struct gl_context *ctx = &brw->ctx;
492 uint32_t *surf;
493 unsigned surface_type = BRW_SURFACE_NULL;
494 drm_intel_bo *bo = NULL;
495 unsigned pitch_minus_1 = 0;
496 uint32_t multisampling_state = 0;
497
498 /* _NEW_BUFFERS */
499 const struct gl_framebuffer *fb = ctx->DrawBuffer;
500
501 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
502 &brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)]);
503
504 if (fb->Visual.samples > 1) {
505 /* On Gen6, null render targets seem to cause GPU hangs when
506 * multisampling. So work around this problem by rendering into dummy
507 * color buffer.
508 *
509 * To decrease the amount of memory needed by the workaround buffer, we
510 * set its pitch to 128 bytes (the width of a Y tile). This means that
511 * the amount of memory needed for the workaround buffer is
512 * (width_in_tiles + height_in_tiles - 1) tiles.
513 *
514 * Note that since the workaround buffer will be interpreted by the
515 * hardware as an interleaved multisampled buffer, we need to compute
516 * width_in_tiles and height_in_tiles by dividing the width and height
517 * by 16 rather than the normal Y-tile size of 32.
518 */
519 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
520 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
521 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
522 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
523 size_needed);
524 bo = brw->wm.multisampled_null_render_target_bo;
525 surface_type = BRW_SURFACE_2D;
526 pitch_minus_1 = 127;
527 multisampling_state =
528 brw_get_surface_num_multisamples(fb->Visual.samples);
529 }
530
531 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
532 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
533 if (brw->gen < 6) {
534 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
535 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
536 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
537 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
538 }
539 surf[1] = bo ? bo->offset : 0;
540 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
541 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
542
543 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
544 * Notes):
545 *
546 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
547 */
548 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
549 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
550 surf[4] = multisampling_state;
551 surf[5] = 0;
552
553 if (bo) {
554 drm_intel_bo_emit_reloc(brw->batch.bo,
555 brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
556 bo, 0,
557 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
558 }
559 }
560
561 /**
562 * Sets up a surface state structure to point at the given region.
563 * While it is only used for the front/back buffer currently, it should be
564 * usable for further buffers when doing ARB_draw_buffer support.
565 */
566 static void
567 brw_update_renderbuffer_surface(struct brw_context *brw,
568 struct gl_renderbuffer *rb,
569 bool layered,
570 unsigned int unit)
571 {
572 struct gl_context *ctx = &brw->ctx;
573 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
574 struct intel_mipmap_tree *mt = irb->mt;
575 struct intel_region *region;
576 uint32_t *surf;
577 uint32_t tile_x, tile_y;
578 uint32_t format = 0;
579 /* _NEW_BUFFERS */
580 gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
581
582 assert(!layered);
583
584 if (rb->TexImage && !brw->has_surface_tile_offset) {
585 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
586
587 if (tile_x != 0 || tile_y != 0) {
588 /* Original gen4 hardware couldn't draw to a non-tile-aligned
589 * destination in a miptree unless you actually setup your renderbuffer
590 * as a miptree and used the fragile lod/array_index/etc. controls to
591 * select the image. So, instead, we just make a new single-level
592 * miptree and render into that.
593 */
594 intel_renderbuffer_move_to_temp(brw, irb, false);
595 mt = irb->mt;
596 }
597 }
598
599 intel_miptree_used_for_rendering(irb->mt);
600
601 region = irb->mt->region;
602
603 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
604 &brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)]);
605
606 format = brw->render_target_format[rb_format];
607 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
608 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
609 __FUNCTION__, _mesa_get_format_name(rb_format));
610 }
611
612 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
613 format << BRW_SURFACE_FORMAT_SHIFT);
614
615 /* reloc */
616 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
617 region->bo->offset);
618
619 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
620 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
621
622 surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
623 (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
624
625 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
626
627 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
628 /* Note that the low bits of these fields are missing, so
629 * there's the possibility of getting in trouble.
630 */
631 assert(tile_x % 4 == 0);
632 assert(tile_y % 2 == 0);
633 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
634 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
635 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
636
637 if (brw->gen < 6) {
638 /* _NEW_COLOR */
639 if (!ctx->Color.ColorLogicOpEnabled &&
640 (ctx->Color.BlendEnabled & (1 << unit)))
641 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
642
643 if (!ctx->Color.ColorMask[unit][0])
644 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
645 if (!ctx->Color.ColorMask[unit][1])
646 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
647 if (!ctx->Color.ColorMask[unit][2])
648 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
649
650 /* As mentioned above, disable writes to the alpha component when the
651 * renderbuffer is XRGB.
652 */
653 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
654 !ctx->Color.ColorMask[unit][3]) {
655 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
656 }
657 }
658
659 drm_intel_bo_emit_reloc(brw->batch.bo,
660 brw->wm.base.surf_offset[SURF_INDEX_DRAW(unit)] + 4,
661 region->bo,
662 surf[1] - region->bo->offset,
663 I915_GEM_DOMAIN_RENDER,
664 I915_GEM_DOMAIN_RENDER);
665 }
666
667 /**
668 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
669 */
670 static void
671 brw_update_renderbuffer_surfaces(struct brw_context *brw)
672 {
673 struct gl_context *ctx = &brw->ctx;
674 GLuint i;
675
676 /* _NEW_BUFFERS | _NEW_COLOR */
677 /* Update surfaces for drawing buffers */
678 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
679 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
680 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
681 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
682 ctx->DrawBuffer->Layered, i);
683 } else {
684 brw->vtbl.update_null_renderbuffer_surface(brw, i);
685 }
686 }
687 } else {
688 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
689 }
690 brw->state.dirty.brw |= BRW_NEW_SURFACES;
691 }
692
693 const struct brw_tracked_state brw_renderbuffer_surfaces = {
694 .dirty = {
695 .mesa = (_NEW_COLOR |
696 _NEW_BUFFERS),
697 .brw = BRW_NEW_BATCH,
698 .cache = 0
699 },
700 .emit = brw_update_renderbuffer_surfaces,
701 };
702
703 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
704 .dirty = {
705 .mesa = _NEW_BUFFERS,
706 .brw = BRW_NEW_BATCH,
707 .cache = 0
708 },
709 .emit = brw_update_renderbuffer_surfaces,
710 };
711
712
713 static void
714 update_stage_texture_surfaces(struct brw_context *brw,
715 const struct gl_program *prog,
716 uint32_t *surf_offset)
717 {
718 if (!prog)
719 return;
720
721 struct gl_context *ctx = &brw->ctx;
722
723 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
724
725 for (unsigned s = 0; s < num_samplers; s++) {
726 surf_offset[s] = 0;
727
728 if (prog->SamplersUsed & (1 << s)) {
729 const unsigned unit = prog->SamplerUnits[s];
730
731 /* _NEW_TEXTURE */
732 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
733 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s);
734 }
735 }
736 }
737 }
738
739
740 /**
741 * Construct SURFACE_STATE objects for enabled textures.
742 */
743 static void
744 brw_update_texture_surfaces(struct brw_context *brw)
745 {
746 /* BRW_NEW_VERTEX_PROGRAM */
747 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
748
749 /* BRW_NEW_GEOMETRY_PROGRAM */
750 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
751
752 /* BRW_NEW_FRAGMENT_PROGRAM */
753 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
754
755 /* _NEW_TEXTURE */
756 update_stage_texture_surfaces(brw, vs,
757 brw->vs.base.surf_offset +
758 SURF_INDEX_VEC4_TEXTURE(0));
759 update_stage_texture_surfaces(brw, gs,
760 brw->gs.base.surf_offset +
761 SURF_INDEX_VEC4_TEXTURE(0));
762 update_stage_texture_surfaces(brw, fs,
763 brw->wm.base.surf_offset +
764 SURF_INDEX_TEXTURE(0));
765
766 brw->state.dirty.brw |= BRW_NEW_SURFACES;
767 }
768
769 const struct brw_tracked_state brw_texture_surfaces = {
770 .dirty = {
771 .mesa = _NEW_TEXTURE,
772 .brw = BRW_NEW_BATCH |
773 BRW_NEW_VERTEX_PROGRAM |
774 BRW_NEW_GEOMETRY_PROGRAM |
775 BRW_NEW_FRAGMENT_PROGRAM,
776 .cache = 0
777 },
778 .emit = brw_update_texture_surfaces,
779 };
780
781 void
782 brw_upload_ubo_surfaces(struct brw_context *brw,
783 struct gl_shader *shader,
784 uint32_t *surf_offsets)
785 {
786 struct gl_context *ctx = &brw->ctx;
787
788 if (!shader)
789 return;
790
791 for (int i = 0; i < shader->NumUniformBlocks; i++) {
792 struct gl_uniform_buffer_binding *binding;
793 struct intel_buffer_object *intel_bo;
794
795 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
796 intel_bo = intel_buffer_object(binding->BufferObject);
797 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
798
799 /* Because behavior for referencing outside of the binding's size in the
800 * glBindBufferRange case is undefined, we can just bind the whole buffer
801 * glBindBufferBase wants and be a correct implementation.
802 */
803 brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
804 bo->size - binding->Offset,
805 &surf_offsets[i],
806 shader->Type == GL_FRAGMENT_SHADER);
807 }
808
809 if (shader->NumUniformBlocks)
810 brw->state.dirty.brw |= BRW_NEW_SURFACES;
811 }
812
813 static void
814 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
815 {
816 struct gl_context *ctx = &brw->ctx;
817 /* _NEW_PROGRAM */
818 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
819
820 if (!prog)
821 return;
822
823 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
824 &brw->wm.base.surf_offset[SURF_INDEX_WM_UBO(0)]);
825 }
826
827 const struct brw_tracked_state brw_wm_ubo_surfaces = {
828 .dirty = {
829 .mesa = _NEW_PROGRAM,
830 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
831 .cache = 0,
832 },
833 .emit = brw_upload_wm_ubo_surfaces,
834 };
835
836 void
837 gen4_init_vtable_surface_functions(struct brw_context *brw)
838 {
839 brw->vtbl.update_texture_surface = brw_update_texture_surface;
840 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
841 brw->vtbl.update_null_renderbuffer_surface =
842 brw_update_null_renderbuffer_surface;
843 brw->vtbl.create_constant_surface = brw_create_constant_surface;
844 }