i965: Split surface format code into a new file (brw_surface_formats.c).
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 assert(0);
77 return 0;
78 }
79 }
80
81 uint32_t
82 brw_get_surface_tiling_bits(uint32_t tiling)
83 {
84 switch (tiling) {
85 case I915_TILING_X:
86 return BRW_SURFACE_TILED;
87 case I915_TILING_Y:
88 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89 default:
90 return 0;
91 }
92 }
93
94
95 uint32_t
96 brw_get_surface_num_multisamples(unsigned num_samples)
97 {
98 if (num_samples > 1)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4;
100 else
101 return BRW_SURFACE_MULTISAMPLECOUNT_1;
102 }
103
104
105 /**
106 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
107 * swizzling.
108 */
109 int
110 brw_get_texture_swizzle(const struct gl_context *ctx,
111 const struct gl_texture_object *t)
112 {
113 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
114
115 int swizzles[SWIZZLE_NIL + 1] = {
116 SWIZZLE_X,
117 SWIZZLE_Y,
118 SWIZZLE_Z,
119 SWIZZLE_W,
120 SWIZZLE_ZERO,
121 SWIZZLE_ONE,
122 SWIZZLE_NIL
123 };
124
125 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
126 img->_BaseFormat == GL_DEPTH_STENCIL) {
127 GLenum depth_mode = t->DepthMode;
128
129 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
130 * with depth component data specified with a sized internal format.
131 * Otherwise, it's left at the old default, GL_LUMINANCE.
132 */
133 if (_mesa_is_gles3(ctx) &&
134 img->InternalFormat != GL_DEPTH_COMPONENT &&
135 img->InternalFormat != GL_DEPTH_STENCIL) {
136 depth_mode = GL_RED;
137 }
138
139 switch (depth_mode) {
140 case GL_ALPHA:
141 swizzles[0] = SWIZZLE_ZERO;
142 swizzles[1] = SWIZZLE_ZERO;
143 swizzles[2] = SWIZZLE_ZERO;
144 swizzles[3] = SWIZZLE_X;
145 break;
146 case GL_LUMINANCE:
147 swizzles[0] = SWIZZLE_X;
148 swizzles[1] = SWIZZLE_X;
149 swizzles[2] = SWIZZLE_X;
150 swizzles[3] = SWIZZLE_ONE;
151 break;
152 case GL_INTENSITY:
153 swizzles[0] = SWIZZLE_X;
154 swizzles[1] = SWIZZLE_X;
155 swizzles[2] = SWIZZLE_X;
156 swizzles[3] = SWIZZLE_X;
157 break;
158 case GL_RED:
159 swizzles[0] = SWIZZLE_X;
160 swizzles[1] = SWIZZLE_ZERO;
161 swizzles[2] = SWIZZLE_ZERO;
162 swizzles[3] = SWIZZLE_ONE;
163 break;
164 }
165 }
166
167 /* If the texture's format is alpha-only, force R, G, and B to
168 * 0.0. Similarly, if the texture's format has no alpha channel,
169 * force the alpha value read to 1.0. This allows for the
170 * implementation to use an RGBA texture for any of these formats
171 * without leaking any unexpected values.
172 */
173 switch (img->_BaseFormat) {
174 case GL_ALPHA:
175 swizzles[0] = SWIZZLE_ZERO;
176 swizzles[1] = SWIZZLE_ZERO;
177 swizzles[2] = SWIZZLE_ZERO;
178 break;
179 case GL_RED:
180 case GL_RG:
181 case GL_RGB:
182 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
183 swizzles[3] = SWIZZLE_ONE;
184 break;
185 }
186
187 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
188 swizzles[GET_SWZ(t->_Swizzle, 1)],
189 swizzles[GET_SWZ(t->_Swizzle, 2)],
190 swizzles[GET_SWZ(t->_Swizzle, 3)]);
191 }
192
193
194 static void
195 brw_update_buffer_texture_surface(struct gl_context *ctx,
196 unsigned unit,
197 uint32_t *binding_table,
198 unsigned surf_index)
199 {
200 struct brw_context *brw = brw_context(ctx);
201 struct intel_context *intel = &brw->intel;
202 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
203 uint32_t *surf;
204 struct intel_buffer_object *intel_obj =
205 intel_buffer_object(tObj->BufferObject);
206 drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
207 gl_format format = tObj->_BufferObjectFormat;
208 uint32_t brw_format = brw_format_for_mesa_format(format);
209 int texel_size = _mesa_get_format_bytes(format);
210
211 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
212 _mesa_problem(NULL, "bad format %s for texture buffer\n",
213 _mesa_get_format_name(format));
214 }
215
216 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
217 6 * 4, 32, &binding_table[surf_index]);
218
219 surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
220 (brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
221
222 if (intel->gen >= 6)
223 surf[0] |= BRW_SURFACE_RC_READ_WRITE;
224
225 if (bo) {
226 surf[1] = bo->offset; /* reloc */
227
228 /* Emit relocation to surface contents. */
229 drm_intel_bo_emit_reloc(brw->intel.batch.bo,
230 binding_table[surf_index] + 4,
231 bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
232
233 int w = intel_obj->Base.Size / texel_size;
234 surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
235 ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
236 surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
237 (texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
238 } else {
239 surf[1] = 0;
240 surf[2] = 0;
241 surf[3] = 0;
242 }
243
244 surf[4] = 0;
245 surf[5] = 0;
246 }
247
248 static void
249 brw_update_texture_surface(struct gl_context *ctx,
250 unsigned unit,
251 uint32_t *binding_table,
252 unsigned surf_index)
253 {
254 struct intel_context *intel = intel_context(ctx);
255 struct brw_context *brw = brw_context(ctx);
256 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
257 struct intel_texture_object *intelObj = intel_texture_object(tObj);
258 struct intel_mipmap_tree *mt = intelObj->mt;
259 struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
260 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
261 uint32_t *surf;
262 uint32_t tile_x, tile_y;
263
264 if (tObj->Target == GL_TEXTURE_BUFFER) {
265 brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
266 return;
267 }
268
269 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
270 6 * 4, 32, &binding_table[surf_index]);
271
272 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
273 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
274 BRW_SURFACE_CUBEFACE_ENABLES |
275 (translate_tex_format(intel,
276 mt->format,
277 tObj->DepthMode,
278 sampler->sRGBDecode) <<
279 BRW_SURFACE_FORMAT_SHIFT));
280
281 surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
282 surf[1] += intel_miptree_get_tile_offsets(intelObj->mt, firstImage->Level, 0,
283 &tile_x, &tile_y);
284
285 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
286 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
287 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
288
289 surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
290 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
291 (intelObj->mt->region->pitch - 1) <<
292 BRW_SURFACE_PITCH_SHIFT);
293
294 surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
295
296 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
297 /* Note that the low bits of these fields are missing, so
298 * there's the possibility of getting in trouble.
299 */
300 assert(tile_x % 4 == 0);
301 assert(tile_y % 2 == 0);
302 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
303 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
304 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
305
306 /* Emit relocation to surface contents */
307 drm_intel_bo_emit_reloc(brw->intel.batch.bo,
308 binding_table[surf_index] + 4,
309 intelObj->mt->region->bo,
310 surf[1] - intelObj->mt->region->bo->offset,
311 I915_GEM_DOMAIN_SAMPLER, 0);
312 }
313
314 /**
315 * Create the constant buffer surface. Vertex/fragment shader constants will be
316 * read from this buffer with Data Port Read instructions/messages.
317 */
318 static void
319 brw_create_constant_surface(struct brw_context *brw,
320 drm_intel_bo *bo,
321 uint32_t offset,
322 uint32_t size,
323 uint32_t *out_offset,
324 bool dword_pitch)
325 {
326 struct intel_context *intel = &brw->intel;
327 uint32_t stride = dword_pitch ? 4 : 16;
328 uint32_t elements = ALIGN(size, stride) / stride;
329 const GLint w = elements - 1;
330 uint32_t *surf;
331
332 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
333 6 * 4, 32, out_offset);
334
335 surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
336 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
337 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
338
339 if (intel->gen >= 6)
340 surf[0] |= BRW_SURFACE_RC_READ_WRITE;
341
342 surf[1] = bo->offset + offset; /* reloc */
343
344 surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
345 ((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
346
347 surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
348 (stride - 1) << BRW_SURFACE_PITCH_SHIFT);
349
350 surf[4] = 0;
351 surf[5] = 0;
352
353 /* Emit relocation to surface contents. Section 5.1.1 of the gen4
354 * bspec ("Data Cache") says that the data cache does not exist as
355 * a separate cache and is just the sampler cache.
356 */
357 drm_intel_bo_emit_reloc(brw->intel.batch.bo,
358 *out_offset + 4,
359 bo, offset,
360 I915_GEM_DOMAIN_SAMPLER, 0);
361 }
362
363 /**
364 * Set up a binding table entry for use by stream output logic (transform
365 * feedback).
366 *
367 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
368 */
369 void
370 brw_update_sol_surface(struct brw_context *brw,
371 struct gl_buffer_object *buffer_obj,
372 uint32_t *out_offset, unsigned num_vector_components,
373 unsigned stride_dwords, unsigned offset_dwords)
374 {
375 struct intel_context *intel = &brw->intel;
376 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
377 drm_intel_bo *bo =
378 intel_bufferobj_buffer(intel, intel_bo, INTEL_WRITE_PART);
379 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
380 out_offset);
381 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
382 uint32_t offset_bytes = 4 * offset_dwords;
383 size_t size_dwords = buffer_obj->Size / 4;
384 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
385
386 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
387 * too big to map using a single binding table entry?
388 */
389 assert((size_dwords - offset_dwords) / stride_dwords
390 <= BRW_MAX_NUM_BUFFER_ENTRIES);
391
392 if (size_dwords > offset_dwords + num_vector_components) {
393 /* There is room for at least 1 transform feedback output in the buffer.
394 * Compute the number of additional transform feedback outputs the
395 * buffer has room for.
396 */
397 buffer_size_minus_1 =
398 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
399 } else {
400 /* There isn't even room for a single transform feedback output in the
401 * buffer. We can't configure the binding table entry to prevent output
402 * entirely; we'll have to rely on the geometry shader to detect
403 * overflow. But to minimize the damage in case of a bug, set up the
404 * binding table entry to just allow a single output.
405 */
406 buffer_size_minus_1 = 0;
407 }
408 width = buffer_size_minus_1 & 0x7f;
409 height = (buffer_size_minus_1 & 0xfff80) >> 7;
410 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
411
412 switch (num_vector_components) {
413 case 1:
414 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
415 break;
416 case 2:
417 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
418 break;
419 case 3:
420 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
421 break;
422 case 4:
423 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
424 break;
425 default:
426 assert(!"Invalid vector size for transform feedback output");
427 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
428 break;
429 }
430
431 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
432 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
433 surface_format << BRW_SURFACE_FORMAT_SHIFT |
434 BRW_SURFACE_RC_READ_WRITE;
435 surf[1] = bo->offset + offset_bytes; /* reloc */
436 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
437 height << BRW_SURFACE_HEIGHT_SHIFT);
438 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
439 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
440 surf[4] = 0;
441 surf[5] = 0;
442
443 /* Emit relocation to surface contents. */
444 drm_intel_bo_emit_reloc(brw->intel.batch.bo,
445 *out_offset + 4,
446 bo, offset_bytes,
447 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
448 }
449
450 /* Creates a new WM constant buffer reflecting the current fragment program's
451 * constants, if needed by the fragment program.
452 *
453 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
454 * state atom.
455 */
456 static void
457 brw_upload_wm_pull_constants(struct brw_context *brw)
458 {
459 struct gl_context *ctx = &brw->intel.ctx;
460 struct intel_context *intel = &brw->intel;
461 /* BRW_NEW_FRAGMENT_PROGRAM */
462 struct brw_fragment_program *fp =
463 (struct brw_fragment_program *) brw->fragment_program;
464 struct gl_program_parameter_list *params = fp->program.Base.Parameters;
465 const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
466 const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
467 float *constants;
468 unsigned int i;
469
470 _mesa_load_state_parameters(ctx, params);
471
472 /* CACHE_NEW_WM_PROG */
473 if (brw->wm.prog_data->nr_pull_params == 0) {
474 if (brw->wm.const_bo) {
475 drm_intel_bo_unreference(brw->wm.const_bo);
476 brw->wm.const_bo = NULL;
477 brw->wm.surf_offset[surf_index] = 0;
478 brw->state.dirty.brw |= BRW_NEW_SURFACES;
479 }
480 return;
481 }
482
483 drm_intel_bo_unreference(brw->wm.const_bo);
484 brw->wm.const_bo = drm_intel_bo_alloc(intel->bufmgr, "WM const bo",
485 size, 64);
486
487 /* _NEW_PROGRAM_CONSTANTS */
488 drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
489 constants = brw->wm.const_bo->virtual;
490 for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
491 constants[i] = *brw->wm.prog_data->pull_param[i];
492 }
493 drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
494
495 intel->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
496 &brw->wm.surf_offset[surf_index],
497 true);
498
499 brw->state.dirty.brw |= BRW_NEW_SURFACES;
500 }
501
502 const struct brw_tracked_state brw_wm_pull_constants = {
503 .dirty = {
504 .mesa = (_NEW_PROGRAM_CONSTANTS),
505 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
506 .cache = CACHE_NEW_WM_PROG,
507 },
508 .emit = brw_upload_wm_pull_constants,
509 };
510
511 static void
512 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
513 {
514 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
515 * Notes):
516 *
517 * A null surface will be used in instances where an actual surface is
518 * not bound. When a write message is generated to a null surface, no
519 * actual surface is written to. When a read message (including any
520 * sampling engine message) is generated to a null surface, the result
521 * is all zeros. Note that a null surface type is allowed to be used
522 * with all messages, even if it is not specificially indicated as
523 * supported. All of the remaining fields in surface state are ignored
524 * for null surfaces, with the following exceptions:
525 *
526 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
527 * depth buffer’s corresponding state for all render target surfaces,
528 * including null.
529 *
530 * - Surface Format must be R8G8B8A8_UNORM.
531 */
532 struct intel_context *intel = &brw->intel;
533 struct gl_context *ctx = &intel->ctx;
534 uint32_t *surf;
535 unsigned surface_type = BRW_SURFACE_NULL;
536 drm_intel_bo *bo = NULL;
537 unsigned pitch_minus_1 = 0;
538 uint32_t multisampling_state = 0;
539
540 /* _NEW_BUFFERS */
541 const struct gl_framebuffer *fb = ctx->DrawBuffer;
542
543 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
544 6 * 4, 32, &brw->wm.surf_offset[unit]);
545
546 if (fb->Visual.samples > 1) {
547 /* On Gen6, null render targets seem to cause GPU hangs when
548 * multisampling. So work around this problem by rendering into dummy
549 * color buffer.
550 *
551 * To decrease the amount of memory needed by the workaround buffer, we
552 * set its pitch to 128 bytes (the width of a Y tile). This means that
553 * the amount of memory needed for the workaround buffer is
554 * (width_in_tiles + height_in_tiles - 1) tiles.
555 *
556 * Note that since the workaround buffer will be interpreted by the
557 * hardware as an interleaved multisampled buffer, we need to compute
558 * width_in_tiles and height_in_tiles by dividing the width and height
559 * by 16 rather than the normal Y-tile size of 32.
560 */
561 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
562 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
563 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
564 brw_get_scratch_bo(intel, &brw->wm.multisampled_null_render_target_bo,
565 size_needed);
566 bo = brw->wm.multisampled_null_render_target_bo;
567 surface_type = BRW_SURFACE_2D;
568 pitch_minus_1 = 127;
569 multisampling_state =
570 brw_get_surface_num_multisamples(fb->Visual.samples);
571 }
572
573 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
574 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
575 if (intel->gen < 6) {
576 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
577 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
578 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
579 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
580 }
581 surf[1] = bo ? bo->offset : 0;
582 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
583 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
584
585 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
586 * Notes):
587 *
588 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
589 */
590 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
591 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
592 surf[4] = multisampling_state;
593 surf[5] = 0;
594
595 if (bo) {
596 drm_intel_bo_emit_reloc(brw->intel.batch.bo,
597 brw->wm.surf_offset[unit] + 4,
598 bo, 0,
599 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
600 }
601 }
602
603 /**
604 * Sets up a surface state structure to point at the given region.
605 * While it is only used for the front/back buffer currently, it should be
606 * usable for further buffers when doing ARB_draw_buffer support.
607 */
608 static void
609 brw_update_renderbuffer_surface(struct brw_context *brw,
610 struct gl_renderbuffer *rb,
611 bool layered,
612 unsigned int unit)
613 {
614 struct intel_context *intel = &brw->intel;
615 struct gl_context *ctx = &intel->ctx;
616 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
617 struct intel_mipmap_tree *mt = irb->mt;
618 struct intel_region *region;
619 uint32_t *surf;
620 uint32_t tile_x, tile_y;
621 uint32_t format = 0;
622 /* _NEW_BUFFERS */
623 gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
624
625 assert(!layered);
626
627 if (rb->TexImage && !brw->has_surface_tile_offset) {
628 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
629
630 if (tile_x != 0 || tile_y != 0) {
631 /* Original gen4 hardware couldn't draw to a non-tile-aligned
632 * destination in a miptree unless you actually setup your renderbuffer
633 * as a miptree and used the fragile lod/array_index/etc. controls to
634 * select the image. So, instead, we just make a new single-level
635 * miptree and render into that.
636 */
637 intel_renderbuffer_move_to_temp(intel, irb, false);
638 mt = irb->mt;
639 }
640 }
641
642 intel_miptree_used_for_rendering(irb->mt);
643
644 region = irb->mt->region;
645
646 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
647 6 * 4, 32, &brw->wm.surf_offset[unit]);
648
649 format = brw->render_target_format[rb_format];
650 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
651 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
652 __FUNCTION__, _mesa_get_format_name(rb_format));
653 }
654
655 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
656 format << BRW_SURFACE_FORMAT_SHIFT);
657
658 /* reloc */
659 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
660 region->bo->offset);
661
662 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
663 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
664
665 surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
666 (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
667
668 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
669
670 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
671 /* Note that the low bits of these fields are missing, so
672 * there's the possibility of getting in trouble.
673 */
674 assert(tile_x % 4 == 0);
675 assert(tile_y % 2 == 0);
676 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
677 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
678 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
679
680 if (intel->gen < 6) {
681 /* _NEW_COLOR */
682 if (!ctx->Color.ColorLogicOpEnabled &&
683 (ctx->Color.BlendEnabled & (1 << unit)))
684 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
685
686 if (!ctx->Color.ColorMask[unit][0])
687 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
688 if (!ctx->Color.ColorMask[unit][1])
689 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
690 if (!ctx->Color.ColorMask[unit][2])
691 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
692
693 /* As mentioned above, disable writes to the alpha component when the
694 * renderbuffer is XRGB.
695 */
696 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
697 !ctx->Color.ColorMask[unit][3]) {
698 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
699 }
700 }
701
702 drm_intel_bo_emit_reloc(brw->intel.batch.bo,
703 brw->wm.surf_offset[unit] + 4,
704 region->bo,
705 surf[1] - region->bo->offset,
706 I915_GEM_DOMAIN_RENDER,
707 I915_GEM_DOMAIN_RENDER);
708 }
709
710 /**
711 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
712 */
713 static void
714 brw_update_renderbuffer_surfaces(struct brw_context *brw)
715 {
716 struct intel_context *intel = &brw->intel;
717 struct gl_context *ctx = &brw->intel.ctx;
718 GLuint i;
719
720 /* _NEW_BUFFERS | _NEW_COLOR */
721 /* Update surfaces for drawing buffers */
722 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
723 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
724 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
725 intel->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
726 ctx->DrawBuffer->Layered, i);
727 } else {
728 intel->vtbl.update_null_renderbuffer_surface(brw, i);
729 }
730 }
731 } else {
732 intel->vtbl.update_null_renderbuffer_surface(brw, 0);
733 }
734 brw->state.dirty.brw |= BRW_NEW_SURFACES;
735 }
736
737 const struct brw_tracked_state brw_renderbuffer_surfaces = {
738 .dirty = {
739 .mesa = (_NEW_COLOR |
740 _NEW_BUFFERS),
741 .brw = BRW_NEW_BATCH,
742 .cache = 0
743 },
744 .emit = brw_update_renderbuffer_surfaces,
745 };
746
747 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
748 .dirty = {
749 .mesa = _NEW_BUFFERS,
750 .brw = BRW_NEW_BATCH,
751 .cache = 0
752 },
753 .emit = brw_update_renderbuffer_surfaces,
754 };
755
756 /**
757 * Construct SURFACE_STATE objects for enabled textures.
758 */
759 static void
760 brw_update_texture_surfaces(struct brw_context *brw)
761 {
762 struct intel_context *intel = &brw->intel;
763 struct gl_context *ctx = &intel->ctx;
764
765 /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
766 * Unfortunately, we're stuck using the gl_program structs until the
767 * ARB_fragment_program front-end gets converted to GLSL IR. These
768 * have the downside that SamplerUnits is split and only contains the
769 * mappings for samplers active in that stage.
770 */
771 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
772 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
773
774 unsigned num_samplers = _mesa_fls(vs->SamplersUsed | fs->SamplersUsed);
775
776 for (unsigned s = 0; s < num_samplers; s++) {
777 brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0;
778 brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0;
779
780 if (vs->SamplersUsed & (1 << s)) {
781 const unsigned unit = vs->SamplerUnits[s];
782
783 /* _NEW_TEXTURE */
784 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
785 intel->vtbl.update_texture_surface(ctx, unit,
786 brw->vs.surf_offset,
787 SURF_INDEX_VS_TEXTURE(s));
788 }
789 }
790
791 if (fs->SamplersUsed & (1 << s)) {
792 const unsigned unit = fs->SamplerUnits[s];
793
794 /* _NEW_TEXTURE */
795 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
796 intel->vtbl.update_texture_surface(ctx, unit,
797 brw->wm.surf_offset,
798 SURF_INDEX_TEXTURE(s));
799 }
800 }
801 }
802
803 brw->state.dirty.brw |= BRW_NEW_SURFACES;
804 }
805
806 const struct brw_tracked_state brw_texture_surfaces = {
807 .dirty = {
808 .mesa = _NEW_TEXTURE,
809 .brw = BRW_NEW_BATCH |
810 BRW_NEW_VERTEX_PROGRAM |
811 BRW_NEW_FRAGMENT_PROGRAM,
812 .cache = 0
813 },
814 .emit = brw_update_texture_surfaces,
815 };
816
817 void
818 brw_upload_ubo_surfaces(struct brw_context *brw,
819 struct gl_shader *shader,
820 uint32_t *surf_offsets)
821 {
822 struct gl_context *ctx = &brw->intel.ctx;
823 struct intel_context *intel = &brw->intel;
824
825 if (!shader)
826 return;
827
828 for (int i = 0; i < shader->NumUniformBlocks; i++) {
829 struct gl_uniform_buffer_binding *binding;
830 struct intel_buffer_object *intel_bo;
831
832 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
833 intel_bo = intel_buffer_object(binding->BufferObject);
834 drm_intel_bo *bo = intel_bufferobj_buffer(intel, intel_bo, INTEL_READ);
835
836 /* Because behavior for referencing outside of the binding's size in the
837 * glBindBufferRange case is undefined, we can just bind the whole buffer
838 * glBindBufferBase wants and be a correct implementation.
839 */
840 intel->vtbl.create_constant_surface(brw, bo, binding->Offset,
841 bo->size - binding->Offset,
842 &surf_offsets[i],
843 shader->Type == GL_FRAGMENT_SHADER);
844 }
845
846 if (shader->NumUniformBlocks)
847 brw->state.dirty.brw |= BRW_NEW_SURFACES;
848 }
849
850 static void
851 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
852 {
853 struct gl_context *ctx = &brw->intel.ctx;
854 /* _NEW_PROGRAM */
855 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
856
857 if (!prog)
858 return;
859
860 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
861 &brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
862 }
863
864 const struct brw_tracked_state brw_wm_ubo_surfaces = {
865 .dirty = {
866 .mesa = _NEW_PROGRAM,
867 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
868 .cache = 0,
869 },
870 .emit = brw_upload_wm_ubo_surfaces,
871 };
872
873 /**
874 * Constructs the binding table for the WM surface state, which maps unit
875 * numbers to surface state objects.
876 */
877 static void
878 brw_upload_wm_binding_table(struct brw_context *brw)
879 {
880 uint32_t *bind;
881 int i;
882
883 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
884 gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
885 }
886
887 /* Might want to calculate nr_surfaces first, to avoid taking up so much
888 * space for the binding table.
889 */
890 bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
891 sizeof(uint32_t) * BRW_MAX_WM_SURFACES,
892 32, &brw->wm.bind_bo_offset);
893
894 /* BRW_NEW_SURFACES */
895 for (i = 0; i < BRW_MAX_WM_SURFACES; i++) {
896 bind[i] = brw->wm.surf_offset[i];
897 }
898
899 brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
900 }
901
902 const struct brw_tracked_state brw_wm_binding_table = {
903 .dirty = {
904 .mesa = 0,
905 .brw = (BRW_NEW_BATCH |
906 BRW_NEW_SURFACES),
907 .cache = 0
908 },
909 .emit = brw_upload_wm_binding_table,
910 };
911
912 void
913 gen4_init_vtable_surface_functions(struct brw_context *brw)
914 {
915 struct intel_context *intel = &brw->intel;
916
917 intel->vtbl.update_texture_surface = brw_update_texture_surface;
918 intel->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
919 intel->vtbl.update_null_renderbuffer_surface =
920 brw_update_null_renderbuffer_surface;
921 intel->vtbl.create_constant_surface = brw_create_constant_surface;
922 }