160dd2f6c624a7054523b2ad19f08ec5f893a373
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keithw@vmware.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 unreachable("not reached");
77 }
78 }
79
80 uint32_t
81 brw_get_surface_tiling_bits(uint32_t tiling)
82 {
83 switch (tiling) {
84 case I915_TILING_X:
85 return BRW_SURFACE_TILED;
86 case I915_TILING_Y:
87 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
88 default:
89 return 0;
90 }
91 }
92
93
94 uint32_t
95 brw_get_surface_num_multisamples(unsigned num_samples)
96 {
97 if (num_samples > 1)
98 return BRW_SURFACE_MULTISAMPLECOUNT_4;
99 else
100 return BRW_SURFACE_MULTISAMPLECOUNT_1;
101 }
102
103 void
104 brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
105 bool is_render_target,
106 unsigned *width, unsigned *height,
107 unsigned *pitch, uint32_t *tiling, unsigned *format)
108 {
109 static const unsigned halign_stencil = 8;
110
111 /* In Y-tiling row is twice as wide as in W-tiling, and subsequently
112 * there are half as many rows.
113 * In addition, mip-levels are accessed manually by the program and
114 * therefore the surface is setup to cover all the mip-levels for one slice.
115 * (Hardware is still used to access individual slices).
116 */
117 *tiling = I915_TILING_Y;
118 *pitch = mt->pitch * 2;
119 *width = ALIGN(mt->total_width, halign_stencil) * 2;
120 *height = (mt->total_height / mt->physical_depth0) / 2;
121
122 if (is_render_target) {
123 *format = BRW_SURFACEFORMAT_R8_UINT;
124 }
125 }
126
127
128 /**
129 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
130 * swizzling.
131 */
132 int
133 brw_get_texture_swizzle(const struct gl_context *ctx,
134 const struct gl_texture_object *t)
135 {
136 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
137
138 int swizzles[SWIZZLE_NIL + 1] = {
139 SWIZZLE_X,
140 SWIZZLE_Y,
141 SWIZZLE_Z,
142 SWIZZLE_W,
143 SWIZZLE_ZERO,
144 SWIZZLE_ONE,
145 SWIZZLE_NIL
146 };
147
148 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
149 img->_BaseFormat == GL_DEPTH_STENCIL) {
150 GLenum depth_mode = t->DepthMode;
151
152 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
153 * with depth component data specified with a sized internal format.
154 * Otherwise, it's left at the old default, GL_LUMINANCE.
155 */
156 if (_mesa_is_gles3(ctx) &&
157 img->InternalFormat != GL_DEPTH_COMPONENT &&
158 img->InternalFormat != GL_DEPTH_STENCIL) {
159 depth_mode = GL_RED;
160 }
161
162 switch (depth_mode) {
163 case GL_ALPHA:
164 swizzles[0] = SWIZZLE_ZERO;
165 swizzles[1] = SWIZZLE_ZERO;
166 swizzles[2] = SWIZZLE_ZERO;
167 swizzles[3] = SWIZZLE_X;
168 break;
169 case GL_LUMINANCE:
170 swizzles[0] = SWIZZLE_X;
171 swizzles[1] = SWIZZLE_X;
172 swizzles[2] = SWIZZLE_X;
173 swizzles[3] = SWIZZLE_ONE;
174 break;
175 case GL_INTENSITY:
176 swizzles[0] = SWIZZLE_X;
177 swizzles[1] = SWIZZLE_X;
178 swizzles[2] = SWIZZLE_X;
179 swizzles[3] = SWIZZLE_X;
180 break;
181 case GL_RED:
182 swizzles[0] = SWIZZLE_X;
183 swizzles[1] = SWIZZLE_ZERO;
184 swizzles[2] = SWIZZLE_ZERO;
185 swizzles[3] = SWIZZLE_ONE;
186 break;
187 }
188 }
189
190 GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
191
192 /* If the texture's format is alpha-only, force R, G, and B to
193 * 0.0. Similarly, if the texture's format has no alpha channel,
194 * force the alpha value read to 1.0. This allows for the
195 * implementation to use an RGBA texture for any of these formats
196 * without leaking any unexpected values.
197 */
198 switch (img->_BaseFormat) {
199 case GL_ALPHA:
200 swizzles[0] = SWIZZLE_ZERO;
201 swizzles[1] = SWIZZLE_ZERO;
202 swizzles[2] = SWIZZLE_ZERO;
203 break;
204 case GL_LUMINANCE:
205 if (t->_IsIntegerFormat || datatype == GL_SIGNED_NORMALIZED) {
206 swizzles[0] = SWIZZLE_X;
207 swizzles[1] = SWIZZLE_X;
208 swizzles[2] = SWIZZLE_X;
209 swizzles[3] = SWIZZLE_ONE;
210 }
211 break;
212 case GL_LUMINANCE_ALPHA:
213 if (datatype == GL_SIGNED_NORMALIZED) {
214 swizzles[0] = SWIZZLE_X;
215 swizzles[1] = SWIZZLE_X;
216 swizzles[2] = SWIZZLE_X;
217 swizzles[3] = SWIZZLE_W;
218 }
219 break;
220 case GL_INTENSITY:
221 if (datatype == GL_SIGNED_NORMALIZED) {
222 swizzles[0] = SWIZZLE_X;
223 swizzles[1] = SWIZZLE_X;
224 swizzles[2] = SWIZZLE_X;
225 swizzles[3] = SWIZZLE_X;
226 }
227 break;
228 case GL_RED:
229 case GL_RG:
230 case GL_RGB:
231 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
232 swizzles[3] = SWIZZLE_ONE;
233 break;
234 }
235
236 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
237 swizzles[GET_SWZ(t->_Swizzle, 1)],
238 swizzles[GET_SWZ(t->_Swizzle, 2)],
239 swizzles[GET_SWZ(t->_Swizzle, 3)]);
240 }
241
242 static void
243 gen4_emit_buffer_surface_state(struct brw_context *brw,
244 uint32_t *out_offset,
245 drm_intel_bo *bo,
246 unsigned buffer_offset,
247 unsigned surface_format,
248 unsigned buffer_size,
249 unsigned pitch,
250 bool rw)
251 {
252 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
253 6 * 4, 32, out_offset);
254 memset(surf, 0, 6 * 4);
255
256 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
257 surface_format << BRW_SURFACE_FORMAT_SHIFT |
258 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
259 surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
260 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
261 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
262 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
263 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
264
265 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
266 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
267 * physical cache. It is mapped in hardware to the sampler cache."
268 */
269 if (bo) {
270 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
271 bo, buffer_offset,
272 I915_GEM_DOMAIN_SAMPLER,
273 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
274 }
275 }
276
277 void
278 brw_update_buffer_texture_surface(struct gl_context *ctx,
279 unsigned unit,
280 uint32_t *surf_offset)
281 {
282 struct brw_context *brw = brw_context(ctx);
283 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
284 struct intel_buffer_object *intel_obj =
285 intel_buffer_object(tObj->BufferObject);
286 uint32_t size = tObj->BufferSize;
287 drm_intel_bo *bo = NULL;
288 mesa_format format = tObj->_BufferObjectFormat;
289 uint32_t brw_format = brw_format_for_mesa_format(format);
290 int texel_size = _mesa_get_format_bytes(format);
291
292 if (intel_obj) {
293 size = MIN2(size, intel_obj->Base.Size);
294 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
295 }
296
297 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
298 _mesa_problem(NULL, "bad format %s for texture buffer\n",
299 _mesa_get_format_name(format));
300 }
301
302 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
303 tObj->BufferOffset,
304 brw_format,
305 size / texel_size,
306 texel_size,
307 false /* rw */);
308 }
309
310 static void
311 brw_update_texture_surface(struct gl_context *ctx,
312 unsigned unit,
313 uint32_t *surf_offset,
314 bool for_gather)
315 {
316 struct brw_context *brw = brw_context(ctx);
317 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
318 struct intel_texture_object *intelObj = intel_texture_object(tObj);
319 struct intel_mipmap_tree *mt = intelObj->mt;
320 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
321 uint32_t *surf;
322
323 /* BRW_NEW_TEXTURE_BUFFER */
324 if (tObj->Target == GL_TEXTURE_BUFFER) {
325 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
326 return;
327 }
328
329 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
330 6 * 4, 32, surf_offset);
331
332 uint32_t tex_format = translate_tex_format(brw, mt->format,
333 sampler->sRGBDecode);
334
335 if (for_gather) {
336 /* Sandybridge's gather4 message is broken for integer formats.
337 * To work around this, we pretend the surface is UNORM for
338 * 8 or 16-bit formats, and emit shader instructions to recover
339 * the real INT/UINT value. For 32-bit formats, we pretend
340 * the surface is FLOAT, and simply reinterpret the resulting
341 * bits.
342 */
343 switch (tex_format) {
344 case BRW_SURFACEFORMAT_R8_SINT:
345 case BRW_SURFACEFORMAT_R8_UINT:
346 tex_format = BRW_SURFACEFORMAT_R8_UNORM;
347 break;
348
349 case BRW_SURFACEFORMAT_R16_SINT:
350 case BRW_SURFACEFORMAT_R16_UINT:
351 tex_format = BRW_SURFACEFORMAT_R16_UNORM;
352 break;
353
354 case BRW_SURFACEFORMAT_R32_SINT:
355 case BRW_SURFACEFORMAT_R32_UINT:
356 tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
357 break;
358
359 default:
360 break;
361 }
362 }
363
364 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
365 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
366 BRW_SURFACE_CUBEFACE_ENABLES |
367 tex_format << BRW_SURFACE_FORMAT_SHIFT);
368
369 surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
370
371 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
372 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
373 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
374
375 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
376 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
377 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
378
379 surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
380 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
381
382 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
383
384 /* Emit relocation to surface contents */
385 drm_intel_bo_emit_reloc(brw->batch.bo,
386 *surf_offset + 4,
387 mt->bo,
388 surf[1] - mt->bo->offset64,
389 I915_GEM_DOMAIN_SAMPLER, 0);
390 }
391
392 /**
393 * Create the constant buffer surface. Vertex/fragment shader constants will be
394 * read from this buffer with Data Port Read instructions/messages.
395 */
396 void
397 brw_create_constant_surface(struct brw_context *brw,
398 drm_intel_bo *bo,
399 uint32_t offset,
400 uint32_t size,
401 uint32_t *out_offset,
402 bool dword_pitch)
403 {
404 uint32_t stride = dword_pitch ? 4 : 16;
405 uint32_t elements = ALIGN(size, stride) / stride;
406
407 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
408 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
409 elements, stride, false);
410 }
411
412 /**
413 * Set up a binding table entry for use by stream output logic (transform
414 * feedback).
415 *
416 * buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
417 */
418 void
419 brw_update_sol_surface(struct brw_context *brw,
420 struct gl_buffer_object *buffer_obj,
421 uint32_t *out_offset, unsigned num_vector_components,
422 unsigned stride_dwords, unsigned offset_dwords)
423 {
424 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
425 uint32_t offset_bytes = 4 * offset_dwords;
426 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
427 offset_bytes,
428 buffer_obj->Size - offset_bytes);
429 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
430 out_offset);
431 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
432 size_t size_dwords = buffer_obj->Size / 4;
433 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
434
435 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
436 * too big to map using a single binding table entry?
437 */
438 assert((size_dwords - offset_dwords) / stride_dwords
439 <= BRW_MAX_NUM_BUFFER_ENTRIES);
440
441 if (size_dwords > offset_dwords + num_vector_components) {
442 /* There is room for at least 1 transform feedback output in the buffer.
443 * Compute the number of additional transform feedback outputs the
444 * buffer has room for.
445 */
446 buffer_size_minus_1 =
447 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
448 } else {
449 /* There isn't even room for a single transform feedback output in the
450 * buffer. We can't configure the binding table entry to prevent output
451 * entirely; we'll have to rely on the geometry shader to detect
452 * overflow. But to minimize the damage in case of a bug, set up the
453 * binding table entry to just allow a single output.
454 */
455 buffer_size_minus_1 = 0;
456 }
457 width = buffer_size_minus_1 & 0x7f;
458 height = (buffer_size_minus_1 & 0xfff80) >> 7;
459 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
460
461 switch (num_vector_components) {
462 case 1:
463 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
464 break;
465 case 2:
466 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
467 break;
468 case 3:
469 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
470 break;
471 case 4:
472 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
473 break;
474 default:
475 unreachable("Invalid vector size for transform feedback output");
476 }
477
478 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
479 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
480 surface_format << BRW_SURFACE_FORMAT_SHIFT |
481 BRW_SURFACE_RC_READ_WRITE;
482 surf[1] = bo->offset64 + offset_bytes; /* reloc */
483 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
484 height << BRW_SURFACE_HEIGHT_SHIFT);
485 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
486 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
487 surf[4] = 0;
488 surf[5] = 0;
489
490 /* Emit relocation to surface contents. */
491 drm_intel_bo_emit_reloc(brw->batch.bo,
492 *out_offset + 4,
493 bo, offset_bytes,
494 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
495 }
496
497 /* Creates a new WM constant buffer reflecting the current fragment program's
498 * constants, if needed by the fragment program.
499 *
500 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
501 * state atom.
502 */
503 static void
504 brw_upload_wm_pull_constants(struct brw_context *brw)
505 {
506 struct brw_stage_state *stage_state = &brw->wm.base;
507 /* BRW_NEW_FRAGMENT_PROGRAM */
508 struct brw_fragment_program *fp =
509 (struct brw_fragment_program *) brw->fragment_program;
510 /* BRW_NEW_FS_PROG_DATA */
511 struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
512
513 /* _NEW_PROGRAM_CONSTANTS */
514 brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
515 stage_state, prog_data, true);
516 }
517
518 const struct brw_tracked_state brw_wm_pull_constants = {
519 .dirty = {
520 .mesa = _NEW_PROGRAM_CONSTANTS,
521 .brw = BRW_NEW_BATCH |
522 BRW_NEW_FRAGMENT_PROGRAM |
523 BRW_NEW_FS_PROG_DATA,
524 },
525 .emit = brw_upload_wm_pull_constants,
526 };
527
528 /**
529 * Creates a null renderbuffer surface.
530 *
531 * This is used when the shader doesn't write to any color output. An FB
532 * write to target 0 will still be emitted, because that's how the thread is
533 * terminated (and computed depth is returned), so we need to have the
534 * hardware discard the target 0 color output..
535 */
536 static void
537 brw_emit_null_surface_state(struct brw_context *brw,
538 unsigned width,
539 unsigned height,
540 unsigned samples,
541 uint32_t *out_offset)
542 {
543 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
544 * Notes):
545 *
546 * A null surface will be used in instances where an actual surface is
547 * not bound. When a write message is generated to a null surface, no
548 * actual surface is written to. When a read message (including any
549 * sampling engine message) is generated to a null surface, the result
550 * is all zeros. Note that a null surface type is allowed to be used
551 * with all messages, even if it is not specificially indicated as
552 * supported. All of the remaining fields in surface state are ignored
553 * for null surfaces, with the following exceptions:
554 *
555 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
556 * depth buffer’s corresponding state for all render target surfaces,
557 * including null.
558 *
559 * - Surface Format must be R8G8B8A8_UNORM.
560 */
561 unsigned surface_type = BRW_SURFACE_NULL;
562 drm_intel_bo *bo = NULL;
563 unsigned pitch_minus_1 = 0;
564 uint32_t multisampling_state = 0;
565 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
566 out_offset);
567
568 if (samples > 1) {
569 /* On Gen6, null render targets seem to cause GPU hangs when
570 * multisampling. So work around this problem by rendering into dummy
571 * color buffer.
572 *
573 * To decrease the amount of memory needed by the workaround buffer, we
574 * set its pitch to 128 bytes (the width of a Y tile). This means that
575 * the amount of memory needed for the workaround buffer is
576 * (width_in_tiles + height_in_tiles - 1) tiles.
577 *
578 * Note that since the workaround buffer will be interpreted by the
579 * hardware as an interleaved multisampled buffer, we need to compute
580 * width_in_tiles and height_in_tiles by dividing the width and height
581 * by 16 rather than the normal Y-tile size of 32.
582 */
583 unsigned width_in_tiles = ALIGN(width, 16) / 16;
584 unsigned height_in_tiles = ALIGN(height, 16) / 16;
585 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
586 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
587 size_needed);
588 bo = brw->wm.multisampled_null_render_target_bo;
589 surface_type = BRW_SURFACE_2D;
590 pitch_minus_1 = 127;
591 multisampling_state = brw_get_surface_num_multisamples(samples);
592 }
593
594 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
595 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
596 if (brw->gen < 6) {
597 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
598 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
599 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
600 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
601 }
602 surf[1] = bo ? bo->offset64 : 0;
603 surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT |
604 (height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
605
606 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
607 * Notes):
608 *
609 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
610 */
611 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
612 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
613 surf[4] = multisampling_state;
614 surf[5] = 0;
615
616 if (bo) {
617 drm_intel_bo_emit_reloc(brw->batch.bo,
618 *out_offset + 4,
619 bo, 0,
620 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
621 }
622 }
623
624 /**
625 * Sets up a surface state structure to point at the given region.
626 * While it is only used for the front/back buffer currently, it should be
627 * usable for further buffers when doing ARB_draw_buffer support.
628 */
629 static uint32_t
630 brw_update_renderbuffer_surface(struct brw_context *brw,
631 struct gl_renderbuffer *rb,
632 bool layered, unsigned unit,
633 uint32_t surf_index)
634 {
635 struct gl_context *ctx = &brw->ctx;
636 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
637 struct intel_mipmap_tree *mt = irb->mt;
638 uint32_t *surf;
639 uint32_t tile_x, tile_y;
640 uint32_t format = 0;
641 uint32_t offset;
642 /* _NEW_BUFFERS */
643 mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
644 /* BRW_NEW_FS_PROG_DATA */
645
646 assert(!layered);
647
648 if (rb->TexImage && !brw->has_surface_tile_offset) {
649 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
650
651 if (tile_x != 0 || tile_y != 0) {
652 /* Original gen4 hardware couldn't draw to a non-tile-aligned
653 * destination in a miptree unless you actually setup your renderbuffer
654 * as a miptree and used the fragile lod/array_index/etc. controls to
655 * select the image. So, instead, we just make a new single-level
656 * miptree and render into that.
657 */
658 intel_renderbuffer_move_to_temp(brw, irb, false);
659 mt = irb->mt;
660 }
661 }
662
663 intel_miptree_used_for_rendering(irb->mt);
664
665 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
666
667 format = brw->render_target_format[rb_format];
668 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
669 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
670 __func__, _mesa_get_format_name(rb_format));
671 }
672
673 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
674 format << BRW_SURFACE_FORMAT_SHIFT);
675
676 /* reloc */
677 assert(mt->offset % mt->cpp == 0);
678 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
679 mt->bo->offset64 + mt->offset);
680
681 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
682 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
683
684 surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
685 (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
686
687 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
688
689 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
690 /* Note that the low bits of these fields are missing, so
691 * there's the possibility of getting in trouble.
692 */
693 assert(tile_x % 4 == 0);
694 assert(tile_y % 2 == 0);
695 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
696 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
697 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
698
699 if (brw->gen < 6) {
700 /* _NEW_COLOR */
701 if (!ctx->Color.ColorLogicOpEnabled &&
702 (ctx->Color.BlendEnabled & (1 << unit)))
703 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
704
705 if (!ctx->Color.ColorMask[unit][0])
706 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
707 if (!ctx->Color.ColorMask[unit][1])
708 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
709 if (!ctx->Color.ColorMask[unit][2])
710 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
711
712 /* As mentioned above, disable writes to the alpha component when the
713 * renderbuffer is XRGB.
714 */
715 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
716 !ctx->Color.ColorMask[unit][3]) {
717 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
718 }
719 }
720
721 drm_intel_bo_emit_reloc(brw->batch.bo,
722 offset + 4,
723 mt->bo,
724 surf[1] - mt->bo->offset64,
725 I915_GEM_DOMAIN_RENDER,
726 I915_GEM_DOMAIN_RENDER);
727
728 return offset;
729 }
730
731 /**
732 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
733 */
734 void
735 brw_update_renderbuffer_surfaces(struct brw_context *brw,
736 const struct gl_framebuffer *fb,
737 uint32_t render_target_start,
738 uint32_t *surf_offset)
739 {
740 GLuint i;
741
742 /* Update surfaces for drawing buffers */
743 if (fb->_NumColorDrawBuffers >= 1) {
744 for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
745 const uint32_t surf_index = render_target_start + i;
746
747 if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
748 surf_offset[surf_index] =
749 brw->vtbl.update_renderbuffer_surface(
750 brw, fb->_ColorDrawBuffers[i],
751 fb->MaxNumLayers > 0, i, surf_index);
752 } else {
753 brw->vtbl.emit_null_surface_state(
754 brw, fb->Width, fb->Height, fb->Visual.samples,
755 &surf_offset[surf_index]);
756 }
757 }
758 } else {
759 const uint32_t surf_index = render_target_start;
760 brw->vtbl.emit_null_surface_state(
761 brw, fb->Width, fb->Height, fb->Visual.samples,
762 &surf_offset[surf_index]);
763 }
764 }
765
766 static void
767 update_renderbuffer_surfaces(struct brw_context *brw)
768 {
769 const struct gl_context *ctx = &brw->ctx;
770
771 /* _NEW_BUFFERS | _NEW_COLOR */
772 const struct gl_framebuffer *fb = ctx->DrawBuffer;
773 brw_update_renderbuffer_surfaces(
774 brw, fb,
775 brw->wm.prog_data->binding_table.render_target_start,
776 brw->wm.base.surf_offset);
777 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
778 }
779
780 const struct brw_tracked_state brw_renderbuffer_surfaces = {
781 .dirty = {
782 .mesa = _NEW_BUFFERS |
783 _NEW_COLOR,
784 .brw = BRW_NEW_BATCH |
785 BRW_NEW_FS_PROG_DATA,
786 },
787 .emit = update_renderbuffer_surfaces,
788 };
789
790 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
791 .dirty = {
792 .mesa = _NEW_BUFFERS,
793 .brw = BRW_NEW_BATCH,
794 },
795 .emit = update_renderbuffer_surfaces,
796 };
797
798
799 static void
800 update_stage_texture_surfaces(struct brw_context *brw,
801 const struct gl_program *prog,
802 struct brw_stage_state *stage_state,
803 bool for_gather)
804 {
805 if (!prog)
806 return;
807
808 struct gl_context *ctx = &brw->ctx;
809
810 uint32_t *surf_offset = stage_state->surf_offset;
811
812 /* BRW_NEW_*_PROG_DATA */
813 if (for_gather)
814 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
815 else
816 surf_offset += stage_state->prog_data->binding_table.texture_start;
817
818 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
819 for (unsigned s = 0; s < num_samplers; s++) {
820 surf_offset[s] = 0;
821
822 if (prog->SamplersUsed & (1 << s)) {
823 const unsigned unit = prog->SamplerUnits[s];
824
825 /* _NEW_TEXTURE */
826 if (ctx->Texture.Unit[unit]._Current) {
827 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
828 }
829 }
830 }
831 }
832
833
834 /**
835 * Construct SURFACE_STATE objects for enabled textures.
836 */
837 static void
838 brw_update_texture_surfaces(struct brw_context *brw)
839 {
840 /* BRW_NEW_VERTEX_PROGRAM */
841 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
842
843 /* BRW_NEW_GEOMETRY_PROGRAM */
844 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
845
846 /* BRW_NEW_FRAGMENT_PROGRAM */
847 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
848
849 /* _NEW_TEXTURE */
850 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
851 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
852 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
853
854 /* emit alternate set of surface state for gather. this
855 * allows the surface format to be overriden for only the
856 * gather4 messages. */
857 if (brw->gen < 8) {
858 if (vs && vs->UsesGather)
859 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
860 if (gs && gs->UsesGather)
861 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
862 if (fs && fs->UsesGather)
863 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
864 }
865
866 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
867 }
868
869 const struct brw_tracked_state brw_texture_surfaces = {
870 .dirty = {
871 .mesa = _NEW_TEXTURE,
872 .brw = BRW_NEW_BATCH |
873 BRW_NEW_FRAGMENT_PROGRAM |
874 BRW_NEW_FS_PROG_DATA |
875 BRW_NEW_GEOMETRY_PROGRAM |
876 BRW_NEW_GS_PROG_DATA |
877 BRW_NEW_TEXTURE_BUFFER |
878 BRW_NEW_VERTEX_PROGRAM |
879 BRW_NEW_VS_PROG_DATA,
880 },
881 .emit = brw_update_texture_surfaces,
882 };
883
884 void
885 brw_upload_ubo_surfaces(struct brw_context *brw,
886 struct gl_shader *shader,
887 struct brw_stage_state *stage_state,
888 struct brw_stage_prog_data *prog_data,
889 bool dword_pitch)
890 {
891 struct gl_context *ctx = &brw->ctx;
892
893 if (!shader)
894 return;
895
896 uint32_t *surf_offsets =
897 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
898
899 for (int i = 0; i < shader->NumUniformBlocks; i++) {
900 struct gl_uniform_buffer_binding *binding;
901 struct intel_buffer_object *intel_bo;
902
903 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
904 intel_bo = intel_buffer_object(binding->BufferObject);
905 drm_intel_bo *bo =
906 intel_bufferobj_buffer(brw, intel_bo,
907 binding->Offset,
908 binding->BufferObject->Size - binding->Offset);
909
910 /* Because behavior for referencing outside of the binding's size in the
911 * glBindBufferRange case is undefined, we can just bind the whole buffer
912 * glBindBufferBase wants and be a correct implementation.
913 */
914 brw_create_constant_surface(brw, bo, binding->Offset,
915 bo->size - binding->Offset,
916 &surf_offsets[i],
917 dword_pitch);
918 }
919
920 if (shader->NumUniformBlocks)
921 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
922 }
923
924 static void
925 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
926 {
927 struct gl_context *ctx = &brw->ctx;
928 /* _NEW_PROGRAM */
929 struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
930
931 if (!prog)
932 return;
933
934 /* BRW_NEW_FS_PROG_DATA */
935 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
936 &brw->wm.base, &brw->wm.prog_data->base, true);
937 }
938
939 const struct brw_tracked_state brw_wm_ubo_surfaces = {
940 .dirty = {
941 .mesa = _NEW_PROGRAM,
942 .brw = BRW_NEW_BATCH |
943 BRW_NEW_FS_PROG_DATA |
944 BRW_NEW_UNIFORM_BUFFER,
945 },
946 .emit = brw_upload_wm_ubo_surfaces,
947 };
948
949 void
950 brw_upload_abo_surfaces(struct brw_context *brw,
951 struct gl_shader_program *prog,
952 struct brw_stage_state *stage_state,
953 struct brw_stage_prog_data *prog_data)
954 {
955 struct gl_context *ctx = &brw->ctx;
956 uint32_t *surf_offsets =
957 &stage_state->surf_offset[prog_data->binding_table.abo_start];
958
959 for (int i = 0; i < prog->NumAtomicBuffers; i++) {
960 struct gl_atomic_buffer_binding *binding =
961 &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
962 struct intel_buffer_object *intel_bo =
963 intel_buffer_object(binding->BufferObject);
964 drm_intel_bo *bo = intel_bufferobj_buffer(
965 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
966
967 brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
968 binding->Offset, BRW_SURFACEFORMAT_RAW,
969 bo->size - binding->Offset, 1, true);
970 }
971
972 if (prog->NumAtomicBuffers)
973 brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
974 }
975
976 static void
977 brw_upload_wm_abo_surfaces(struct brw_context *brw)
978 {
979 struct gl_context *ctx = &brw->ctx;
980 /* _NEW_PROGRAM */
981 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
982
983 if (prog) {
984 /* BRW_NEW_FS_PROG_DATA */
985 brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
986 &brw->wm.prog_data->base);
987 }
988 }
989
990 const struct brw_tracked_state brw_wm_abo_surfaces = {
991 .dirty = {
992 .mesa = _NEW_PROGRAM,
993 .brw = BRW_NEW_ATOMIC_BUFFER |
994 BRW_NEW_BATCH |
995 BRW_NEW_FS_PROG_DATA,
996 },
997 .emit = brw_upload_wm_abo_surfaces,
998 };
999
1000 static void
1001 brw_upload_cs_abo_surfaces(struct brw_context *brw)
1002 {
1003 struct gl_context *ctx = &brw->ctx;
1004 /* _NEW_PROGRAM */
1005 struct gl_shader_program *prog =
1006 ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
1007
1008 if (prog) {
1009 /* BRW_NEW_CS_PROG_DATA */
1010 brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
1011 &brw->cs.prog_data->base);
1012 }
1013 }
1014
1015 const struct brw_tracked_state brw_cs_abo_surfaces = {
1016 .dirty = {
1017 .mesa = _NEW_PROGRAM,
1018 .brw = BRW_NEW_ATOMIC_BUFFER |
1019 BRW_NEW_BATCH |
1020 BRW_NEW_CS_PROG_DATA,
1021 },
1022 .emit = brw_upload_cs_abo_surfaces,
1023 };
1024
1025 void
1026 gen4_init_vtable_surface_functions(struct brw_context *brw)
1027 {
1028 brw->vtbl.update_texture_surface = brw_update_texture_surface;
1029 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
1030 brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
1031 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
1032 }