4a3111ac69cb72e3874a32b4454801f51bc96618
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 unreachable("not reached");
77 }
78 }
79
80 uint32_t
81 brw_get_surface_tiling_bits(uint32_t tiling)
82 {
83 switch (tiling) {
84 case I915_TILING_X:
85 return BRW_SURFACE_TILED;
86 case I915_TILING_Y:
87 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
88 default:
89 return 0;
90 }
91 }
92
93
94 uint32_t
95 brw_get_surface_num_multisamples(unsigned num_samples)
96 {
97 if (num_samples > 1)
98 return BRW_SURFACE_MULTISAMPLECOUNT_4;
99 else
100 return BRW_SURFACE_MULTISAMPLECOUNT_1;
101 }
102
103 void
104 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
105 bool is_render_target,
106 unsigned *width, unsigned *height,
107 unsigned *pitch, uint32_t *tiling, unsigned *format)
108 {
109 static const unsigned halign_stencil = 8;
110
111 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
112 * there are half as many rows.
113 * In addition, mip-levels are accessed manually by the program and
114 * therefore the surface is setup to cover all the mip-levels for one slice.
115 * (Hardware is still used to access individual slices).
116 */
117 *tiling = I915_TILING_Y;
118 *pitch = mt->pitch * 2;
119 *width = ALIGN(mt->total_width, halign_stencil) * 2;
120 *height = (mt->total_height / mt->physical_depth0) / 2;
121
122 if (is_render_target) {
123 *format = BRW_SURFACEFORMAT_R8_UINT;
124 }
125 }
126
127
128 /**
129 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
130 * swizzling.
131 */
132 int
133 brw_get_texture_swizzle(const struct gl_context *ctx,
134 const struct gl_texture_object *t)
135 {
136 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
137
138 int swizzles[SWIZZLE_NIL + 1] = {
139 SWIZZLE_X,
140 SWIZZLE_Y,
141 SWIZZLE_Z,
142 SWIZZLE_W,
143 SWIZZLE_ZERO,
144 SWIZZLE_ONE,
145 SWIZZLE_NIL
146 };
147
148 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
149 img->_BaseFormat == GL_DEPTH_STENCIL) {
150 GLenum depth_mode = t->DepthMode;
151
152 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
153 * with depth component data specified with a sized internal format.
154 * Otherwise, it's left at the old default, GL_LUMINANCE.
155 */
156 if (_mesa_is_gles3(ctx) &&
157 img->InternalFormat != GL_DEPTH_COMPONENT &&
158 img->InternalFormat != GL_DEPTH_STENCIL) {
159 depth_mode = GL_RED;
160 }
161
162 switch (depth_mode) {
163 case GL_ALPHA:
164 swizzles[0] = SWIZZLE_ZERO;
165 swizzles[1] = SWIZZLE_ZERO;
166 swizzles[2] = SWIZZLE_ZERO;
167 swizzles[3] = SWIZZLE_X;
168 break;
169 case GL_LUMINANCE:
170 swizzles[0] = SWIZZLE_X;
171 swizzles[1] = SWIZZLE_X;
172 swizzles[2] = SWIZZLE_X;
173 swizzles[3] = SWIZZLE_ONE;
174 break;
175 case GL_INTENSITY:
176 swizzles[0] = SWIZZLE_X;
177 swizzles[1] = SWIZZLE_X;
178 swizzles[2] = SWIZZLE_X;
179 swizzles[3] = SWIZZLE_X;
180 break;
181 case GL_RED:
182 swizzles[0] = SWIZZLE_X;
183 swizzles[1] = SWIZZLE_ZERO;
184 swizzles[2] = SWIZZLE_ZERO;
185 swizzles[3] = SWIZZLE_ONE;
186 break;
187 }
188 }
189
190 /* If the texture's format is alpha-only, force R, G, and B to
191 * 0.0. Similarly, if the texture's format has no alpha channel,
192 * force the alpha value read to 1.0. This allows for the
193 * implementation to use an RGBA texture for any of these formats
194 * without leaking any unexpected values.
195 */
196 switch (img->_BaseFormat) {
197 case GL_ALPHA:
198 swizzles[0] = SWIZZLE_ZERO;
199 swizzles[1] = SWIZZLE_ZERO;
200 swizzles[2] = SWIZZLE_ZERO;
201 break;
202 case GL_RED:
203 case GL_RG:
204 case GL_RGB:
205 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
206 swizzles[3] = SWIZZLE_ONE;
207 break;
208 }
209
210 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
211 swizzles[GET_SWZ(t->_Swizzle, 1)],
212 swizzles[GET_SWZ(t->_Swizzle, 2)],
213 swizzles[GET_SWZ(t->_Swizzle, 3)]);
214 }
215
216 static void
217 gen4_emit_buffer_surface_state(struct brw_context *brw,
218 uint32_t *out_offset,
219 drm_intel_bo *bo,
220 unsigned buffer_offset,
221 unsigned surface_format,
222 unsigned buffer_size,
223 unsigned pitch,
224 unsigned mocs,
225 bool rw)
226 {
227 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
228 6 * 4, 32, out_offset);
229 memset(surf, 0, 6 * 4);
230
231 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
232 surface_format << BRW_SURFACE_FORMAT_SHIFT |
233 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
234 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
235 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
236 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
237 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
238 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
239
240 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
241 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
242 * physical cache. It is mapped in hardware to the sampler cache."
243 */
244 if (bo) {
245 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
246 bo, buffer_offset,
247 I915_GEM_DOMAIN_SAMPLER,
248 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
249 }
250 }
251
252 void
253 brw_update_buffer_texture_surface(struct gl_context *ctx,
254 unsigned unit,
255 uint32_t *surf_offset)
256 {
257 struct brw_context *brw = brw_context(ctx);
258 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
259 struct intel_buffer_object *intel_obj =
260 intel_buffer_object(tObj->BufferObject);
261 uint32_t size = tObj->BufferSize;
262 drm_intel_bo *bo = NULL;
263 mesa_format format = tObj->_BufferObjectFormat;
264 uint32_t brw_format = brw_format_for_mesa_format(format);
265 int texel_size = _mesa_get_format_bytes(format);
266
267 if (intel_obj) {
268 size = MIN2(size, intel_obj->Base.Size);
269 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
270 }
271
272 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
273 _mesa_problem(NULL, "bad format %s for texture buffer\n",
274 _mesa_get_format_name(format));
275 }
276
277 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
278 tObj->BufferOffset,
279 brw_format,
280 size / texel_size,
281 texel_size,
282 0, /* mocs */
283 false /* rw */);
284 }
285
286 static void
287 brw_update_texture_surface(struct gl_context *ctx,
288 unsigned unit,
289 uint32_t *surf_offset,
290 bool for_gather)
291 {
292 struct brw_context *brw = brw_context(ctx);
293 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
294 struct intel_texture_object *intelObj = intel_texture_object(tObj);
295 struct intel_mipmap_tree *mt = intelObj->mt;
296 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
297 uint32_t *surf;
298
299 /* BRW_NEW_UNIFORM_BUFFER */
300 if (tObj->Target == GL_TEXTURE_BUFFER) {
301 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
302 return;
303 }
304
305 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
306 6 * 4, 32, surf_offset);
307
308 uint32_t tex_format = translate_tex_format(brw, mt->format,
309 sampler->sRGBDecode);
310
311 if (for_gather) {
312 /* Sandybridge's gather4 message is broken for integer formats.
313 * To work around this, we pretend the surface is UNORM for
314 * 8 or 16-bit formats, and emit shader instructions to recover
315 * the real INT/UINT value. For 32-bit formats, we pretend
316 * the surface is FLOAT, and simply reinterpret the resulting
317 * bits.
318 */
319 switch (tex_format) {
320 case BRW_SURFACEFORMAT_R8_SINT:
321 case BRW_SURFACEFORMAT_R8_UINT:
322 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
323 break;
324
325 case BRW_SURFACEFORMAT_R16_SINT:
326 case BRW_SURFACEFORMAT_R16_UINT:
327 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
328 break;
329
330 case BRW_SURFACEFORMAT_R32_SINT:
331 case BRW_SURFACEFORMAT_R32_UINT:
332 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
333 break;
334
335 default:
336 break;
337 }
338 }
339
340 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
341 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
342 BRW_SURFACE_CUBEFACE_ENABLES |
343 tex_format << BRW_SURFACE_FORMAT_SHIFT);
344
345 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
346
347 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
348 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
349 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
350
351 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
352 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
353 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
354
355 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
356 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
357
358 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
359
360 /* Emit relocation to surface contents */
361 drm_intel_bo_emit_reloc(brw->batch.bo,
362 *surf_offset + 4,
363 mt->bo,
364 surf[1] - mt->bo->offset64,
365 I915_GEM_DOMAIN_SAMPLER, 0);
366 }
367
368 /**
369 * Create the constant buffer surface. Vertex/fragment shader constants will be
370 * read from this buffer with Data Port Read instructions/messages.
371 */
372 void
373 brw_create_constant_surface(struct brw_context *brw,
374 drm_intel_bo *bo,
375 uint32_t offset,
376 uint32_t size,
377 uint32_t *out_offset,
378 bool dword_pitch)
379 {
380 uint32_t stride = dword_pitch ? 4 : 16;
381 uint32_t elements = ALIGN(size, stride) / stride;
382
383 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
384 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
385 elements, stride, 0, false);
386 }
387
388 /**
389 * Set up a binding table entry for use by stream output logic (transform
390 * feedback).
391 *
392 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
393 */
394 void
395 brw_update_sol_surface(struct brw_context *brw,
396 struct gl_buffer_object *buffer_obj,
397 uint32_t *out_offset, unsigned num_vector_components,
398 unsigned stride_dwords, unsigned offset_dwords)
399 {
400 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
401 uint32_t offset_bytes = 4 * offset_dwords;
402 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
403 offset_bytes,
404 buffer_obj->Size - offset_bytes);
405 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
406 out_offset);
407 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
408 size_t size_dwords = buffer_obj->Size / 4;
409 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
410
411 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
412 * too big to map using a single binding table entry?
413 */
414 assert((size_dwords - offset_dwords) / stride_dwords
415 <= BRW_MAX_NUM_BUFFER_ENTRIES);
416
417 if (size_dwords > offset_dwords + num_vector_components) {
418 /* There is room for at least 1 transform feedback output in the buffer.
419 * Compute the number of additional transform feedback outputs the
420 * buffer has room for.
421 */
422 buffer_size_minus_1 =
423 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
424 } else {
425 /* There isn't even room for a single transform feedback output in the
426 * buffer. We can't configure the binding table entry to prevent output
427 * entirely; we'll have to rely on the geometry shader to detect
428 * overflow. But to minimize the damage in case of a bug, set up the
429 * binding table entry to just allow a single output.
430 */
431 buffer_size_minus_1 = 0;
432 }
433 width = buffer_size_minus_1 & 0x7f;
434 height = (buffer_size_minus_1 & 0xfff80) >> 7;
435 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
436
437 switch (num_vector_components) {
438 case 1:
439 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
440 break;
441 case 2:
442 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
443 break;
444 case 3:
445 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
446 break;
447 case 4:
448 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
449 break;
450 default:
451 unreachable("Invalid vector size for transform feedback output");
452 }
453
454 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
455 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
456 surface_format << BRW_SURFACE_FORMAT_SHIFT |
457 BRW_SURFACE_RC_READ_WRITE;
458 surf[1] = bo->offset64 + offset_bytes; /* reloc */
459 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
460 height << BRW_SURFACE_HEIGHT_SHIFT);
461 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
462 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
463 surf[4] = 0;
464 surf[5] = 0;
465
466 /* Emit relocation to surface contents. */
467 drm_intel_bo_emit_reloc(brw->batch.bo,
468 *out_offset + 4,
469 bo, offset_bytes,
470 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
471 }
472
473 /* Creates a new WM constant buffer reflecting the current fragment program's
474 * constants, if needed by the fragment program.
475 *
476 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
477 * state atom.
478 */
479 static void
480 brw_upload_wm_pull_constants(struct brw_context *brw)
481 {
482 struct brw_stage_state *stage_state = &brw->wm.base;
483 /* BRW_NEW_FRAGMENT_PROGRAM */
484 struct brw_fragment_program *fp =
485 (struct brw_fragment_program *) brw->fragment_program;
486 /* CACHE_NEW_WM_PROG */
487 struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
488
489 /* _NEW_PROGRAM_CONSTANTS */
490 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
491 stage_state, prog_data, true);
492 }
493
494 const struct brw_tracked_state brw_wm_pull_constants = {
495 .dirty = {
496 .mesa = (_NEW_PROGRAM_CONSTANTS),
497 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
498 .cache = CACHE_NEW_WM_PROG,
499 },
500 .emit = brw_upload_wm_pull_constants,
501 };
502
503 /**
504 * Creates a null renderbuffer surface.
505 *
506 * This is used when the shader doesn't write to any color output. An FB
507 * write to target 0 will still be emitted, because that's how the thread is
508 * terminated (and computed depth is returned), so we need to have the
509 * hardware discard the target 0 color output..
510 */
511 static void
512 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
513 {
514 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
515 * Notes):
516 *
517 * A null surface will be used in instances where an actual surface is
518 * not bound. When a write message is generated to a null surface, no
519 * actual surface is written to. When a read message (including any
520 * sampling engine message) is generated to a null surface, the result
521 * is all zeros. Note that a null surface type is allowed to be used
522 * with all messages, even if it is not specificially indicated as
523 * supported. All of the remaining fields in surface state are ignored
524 * for null surfaces, with the following exceptions:
525 *
526 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
527 * depth buffer’s corresponding state for all render target surfaces,
528 * including null.
529 *
530 * - Surface Format must be R8G8B8A8_UNORM.
531 */
532 struct gl_context *ctx = &brw->ctx;
533 uint32_t *surf;
534 unsigned surface_type = BRW_SURFACE_NULL;
535 drm_intel_bo *bo = NULL;
536 unsigned pitch_minus_1 = 0;
537 uint32_t multisampling_state = 0;
538 uint32_t surf_index =
539 brw->wm.prog_data->binding_table.render_target_start + unit;
540
541 /* _NEW_BUFFERS */
542 const struct gl_framebuffer *fb = ctx->DrawBuffer;
543
544 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
545 &brw->wm.base.surf_offset[surf_index]);
546
547 if (fb->Visual.samples > 1) {
548 /* On Gen6, null render targets seem to cause GPU hangs when
549 * multisampling. So work around this problem by rendering into dummy
550 * color buffer.
551 *
552 * To decrease the amount of memory needed by the workaround buffer, we
553 * set its pitch to 128 bytes (the width of a Y tile). This means that
554 * the amount of memory needed for the workaround buffer is
555 * (width_in_tiles + height_in_tiles - 1) tiles.
556 *
557 * Note that since the workaround buffer will be interpreted by the
558 * hardware as an interleaved multisampled buffer, we need to compute
559 * width_in_tiles and height_in_tiles by dividing the width and height
560 * by 16 rather than the normal Y-tile size of 32.
561 */
562 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
563 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
564 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
565 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
566 size_needed);
567 bo = brw->wm.multisampled_null_render_target_bo;
568 surface_type = BRW_SURFACE_2D;
569 pitch_minus_1 = 127;
570 multisampling_state =
571 brw_get_surface_num_multisamples(fb->Visual.samples);
572 }
573
574 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
575 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
576 if (brw->gen < 6) {
577 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
578 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
579 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
580 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
581 }
582 surf[1] = bo ? bo->offset64 : 0;
583 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
584 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
585
586 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
587 * Notes):
588 *
589 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
590 */
591 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
592 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
593 surf[4] = multisampling_state;
594 surf[5] = 0;
595
596 if (bo) {
597 drm_intel_bo_emit_reloc(brw->batch.bo,
598 brw->wm.base.surf_offset[surf_index] + 4,
599 bo, 0,
600 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
601 }
602 }
603
604 /**
605 * Sets up a surface state structure to point at the given region.
606 * While it is only used for the front/back buffer currently, it should be
607 * usable for further buffers when doing ARB_draw_buffer support.
608 */
609 static void
610 brw_update_renderbuffer_surface(struct brw_context *brw,
611 struct gl_renderbuffer *rb,
612 bool layered,
613 unsigned int unit)
614 {
615 struct gl_context *ctx = &brw->ctx;
616 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
617 struct intel_mipmap_tree *mt = irb->mt;
618 uint32_t *surf;
619 uint32_t tile_x, tile_y;
620 uint32_t format = 0;
621 /* _NEW_BUFFERS */
622 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
623 uint32_t surf_index =
624 brw->wm.prog_data->binding_table.render_target_start + unit;
625
626 assert(!layered);
627
628 if (rb->TexImage && !brw->has_surface_tile_offset) {
629 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
630
631 if (tile_x != 0 || tile_y != 0) {
632 /* Original gen4 hardware couldn't draw to a non-tile-aligned
633 * destination in a miptree unless you actually setup your renderbuffer
634 * as a miptree and used the fragile lod/array_index/etc. controls to
635 * select the image. So, instead, we just make a new single-level
636 * miptree and render into that.
637 */
638 intel_renderbuffer_move_to_temp(brw, irb, false);
639 mt = irb->mt;
640 }
641 }
642
643 intel_miptree_used_for_rendering(irb->mt);
644
645 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
646 &brw->wm.base.surf_offset[surf_index]);
647
648 format = brw->render_target_format[rb_format];
649 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
650 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
651 __FUNCTION__, _mesa_get_format_name(rb_format));
652 }
653
654 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
655 format << BRW_SURFACE_FORMAT_SHIFT);
656
657 /* reloc */
658 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
659 mt->bo->offset64);
660
661 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
662 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
663
664 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
665 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
666
667 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
668
669 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
670 /* Note that the low bits of these fields are missing, so
671 * there's the possibility of getting in trouble.
672 */
673 assert(tile_x % 4 == 0);
674 assert(tile_y % 2 == 0);
675 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
676 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
677 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
678
679 if (brw->gen < 6) {
680 /* _NEW_COLOR */
681 if (!ctx->Color.ColorLogicOpEnabled &&
682 (ctx->Color.BlendEnabled & (1 << unit)))
683 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
684
685 if (!ctx->Color.ColorMask[unit][0])
686 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
687 if (!ctx->Color.ColorMask[unit][1])
688 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
689 if (!ctx->Color.ColorMask[unit][2])
690 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
691
692 /* As mentioned above, disable writes to the alpha component when the
693 * renderbuffer is XRGB.
694 */
695 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
696 !ctx->Color.ColorMask[unit][3]) {
697 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
698 }
699 }
700
701 drm_intel_bo_emit_reloc(brw->batch.bo,
702 brw->wm.base.surf_offset[surf_index] + 4,
703 mt->bo,
704 surf[1] - mt->bo->offset64,
705 I915_GEM_DOMAIN_RENDER,
706 I915_GEM_DOMAIN_RENDER);
707 }
708
709 /**
710 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
711 */
712 static void
713 brw_update_renderbuffer_surfaces(struct brw_context *brw)
714 {
715 struct gl_context *ctx = &brw->ctx;
716 GLuint i;
717
718 /* _NEW_BUFFERS | _NEW_COLOR */
719 /* Update surfaces for drawing buffers */
720 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
721 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
722 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
723 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
724 ctx->DrawBuffer->MaxNumLayers > 0, i);
725 } else {
726 brw->vtbl.update_null_renderbuffer_surface(brw, i);
727 }
728 }
729 } else {
730 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
731 }
732 brw->state.dirty.brw |= BRW_NEW_SURFACES;
733 }
734
735 const struct brw_tracked_state brw_renderbuffer_surfaces = {
736 .dirty = {
737 .mesa = (_NEW_COLOR |
738 _NEW_BUFFERS),
739 .brw = BRW_NEW_BATCH,
740 .cache = 0
741 },
742 .emit = brw_update_renderbuffer_surfaces,
743 };
744
745 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
746 .dirty = {
747 .mesa = _NEW_BUFFERS,
748 .brw = BRW_NEW_BATCH,
749 .cache = 0
750 },
751 .emit = brw_update_renderbuffer_surfaces,
752 };
753
754
755 static void
756 update_stage_texture_surfaces(struct brw_context *brw,
757 const struct gl_program *prog,
758 struct brw_stage_state *stage_state,
759 bool for_gather)
760 {
761 if (!prog)
762 return;
763
764 struct gl_context *ctx = &brw->ctx;
765
766 uint32_t *surf_offset = stage_state->surf_offset;
767 if (for_gather)
768 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
769 else
770 surf_offset += stage_state->prog_data->binding_table.texture_start;
771
772 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
773 for (unsigned s = 0; s < num_samplers; s++) {
774 surf_offset[s] = 0;
775
776 if (prog->SamplersUsed & (1 << s)) {
777 const unsigned unit = prog->SamplerUnits[s];
778
779 /* _NEW_TEXTURE */
780 if (ctx->Texture.Unit[unit]._Current) {
781 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
782 }
783 }
784 }
785 }
786
787
788 /**
789 * Construct SURFACE_STATE objects for enabled textures.
790 */
791 static void
792 brw_update_texture_surfaces(struct brw_context *brw)
793 {
794 /* BRW_NEW_VERTEX_PROGRAM */
795 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
796
797 /* BRW_NEW_GEOMETRY_PROGRAM */
798 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
799
800 /* BRW_NEW_FRAGMENT_PROGRAM */
801 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
802
803 /* _NEW_TEXTURE */
804 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
805 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
806 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
807
808 /* emit alternate set of surface state for gather. this
809 * allows the surface format to be overriden for only the
810 * gather4 messages. */
811 if (brw->gen < 8) {
812 if (vs && vs->UsesGather)
813 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
814 if (gs && gs->UsesGather)
815 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
816 if (fs && fs->UsesGather)
817 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
818 }
819
820 brw->state.dirty.brw |= BRW_NEW_SURFACES;
821 }
822
823 const struct brw_tracked_state brw_texture_surfaces = {
824 .dirty = {
825 .mesa = _NEW_TEXTURE,
826 .brw = BRW_NEW_BATCH |
827 BRW_NEW_UNIFORM_BUFFER |
828 BRW_NEW_VERTEX_PROGRAM |
829 BRW_NEW_GEOMETRY_PROGRAM |
830 BRW_NEW_FRAGMENT_PROGRAM,
831 .cache = 0
832 },
833 .emit = brw_update_texture_surfaces,
834 };
835
836 void
837 brw_upload_ubo_surfaces(struct brw_context *brw,
838 struct gl_shader *shader,
839 struct brw_stage_state *stage_state,
840 struct brw_stage_prog_data *prog_data)
841 {
842 struct gl_context *ctx = &brw->ctx;
843
844 if (!shader)
845 return;
846
847 uint32_t *surf_offsets =
848 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
849
850 for (int i = 0; i < shader->NumUniformBlocks; i++) {
851 struct gl_uniform_buffer_binding *binding;
852 struct intel_buffer_object *intel_bo;
853
854 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
855 intel_bo = intel_buffer_object(binding->BufferObject);
856 drm_intel_bo *bo =
857 intel_bufferobj_buffer(brw, intel_bo,
858 binding->Offset,
859 binding->BufferObject->Size - binding->Offset);
860
861 /* Because behavior for referencing outside of the binding's size in the
862 * glBindBufferRange case is undefined, we can just bind the whole buffer
863 * glBindBufferBase wants and be a correct implementation.
864 */
865 brw_create_constant_surface(brw, bo, binding->Offset,
866 bo->size - binding->Offset,
867 &surf_offsets[i],
868 shader->Stage == MESA_SHADER_FRAGMENT);
869 }
870
871 if (shader->NumUniformBlocks)
872 brw->state.dirty.brw |= BRW_NEW_SURFACES;
873 }
874
875 static void
876 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
877 {
878 struct gl_context *ctx = &brw->ctx;
879 /* _NEW_PROGRAM */
880 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
881
882 if (!prog)
883 return;
884
885 /* CACHE_NEW_WM_PROG */
886 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
887 &brw->wm.base, &brw->wm.prog_data->base);
888 }
889
890 const struct brw_tracked_state brw_wm_ubo_surfaces = {
891 .dirty = {
892 .mesa = _NEW_PROGRAM,
893 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
894 .cache = CACHE_NEW_WM_PROG,
895 },
896 .emit = brw_upload_wm_ubo_surfaces,
897 };
898
899 void
900 brw_upload_abo_surfaces(struct brw_context *brw,
901 struct gl_shader_program *prog,
902 struct brw_stage_state *stage_state,
903 struct brw_stage_prog_data *prog_data)
904 {
905 struct gl_context *ctx = &brw->ctx;
906 uint32_t *surf_offsets =
907 &stage_state->surf_offset[prog_data->binding_table.abo_start];
908
909 for (int i = 0; i < prog->NumAtomicBuffers; i++) {
910 struct gl_atomic_buffer_binding *binding =
911 &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
912 struct intel_buffer_object *intel_bo =
913 intel_buffer_object(binding->BufferObject);
914 drm_intel_bo *bo = intel_bufferobj_buffer(
915 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
916
917 brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
918 bo->size - binding->Offset,
919 &surf_offsets[i], true);
920 }
921
922 if (prog->NumUniformBlocks)
923 brw->state.dirty.brw |= BRW_NEW_SURFACES;
924 }
925
926 static void
927 brw_upload_wm_abo_surfaces(struct brw_context *brw)
928 {
929 struct gl_context *ctx = &brw->ctx;
930 /* _NEW_PROGRAM */
931 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
932
933 if (prog) {
934 /* CACHE_NEW_WM_PROG */
935 brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
936 &brw->wm.prog_data->base);
937 }
938 }
939
940 const struct brw_tracked_state brw_wm_abo_surfaces = {
941 .dirty = {
942 .mesa = _NEW_PROGRAM,
943 .brw = BRW_NEW_BATCH | BRW_NEW_ATOMIC_BUFFER,
944 .cache = CACHE_NEW_WM_PROG,
945 },
946 .emit = brw_upload_wm_abo_surfaces,
947 };
948
949 void
950 gen4_init_vtable_surface_functions(struct brw_context *brw)
951 {
952 brw->vtbl.update_texture_surface = brw_update_texture_surface;
953 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
954 brw->vtbl.update_null_renderbuffer_surface =
955 brw_update_null_renderbuffer_surface;
956 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
957 }