i965: Fix undefined value usage in ABO setup.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_wm_surface_state.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/context.h"
34 #include "main/blend.h"
35 #include "main/mtypes.h"
36 #include "main/samplerobj.h"
37 #include "program/prog_parameter.h"
38
39 #include "intel_mipmap_tree.h"
40 #include "intel_batchbuffer.h"
41 #include "intel_tex.h"
42 #include "intel_fbo.h"
43 #include "intel_buffer_objects.h"
44
45 #include "brw_context.h"
46 #include "brw_state.h"
47 #include "brw_defines.h"
48 #include "brw_wm.h"
49
50 GLuint
51 translate_tex_target(GLenum target)
52 {
53 switch (target) {
54 case GL_TEXTURE_1D:
55 case GL_TEXTURE_1D_ARRAY_EXT:
56 return BRW_SURFACE_1D;
57
58 case GL_TEXTURE_RECTANGLE_NV:
59 return BRW_SURFACE_2D;
60
61 case GL_TEXTURE_2D:
62 case GL_TEXTURE_2D_ARRAY_EXT:
63 case GL_TEXTURE_EXTERNAL_OES:
64 case GL_TEXTURE_2D_MULTISAMPLE:
65 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
66 return BRW_SURFACE_2D;
67
68 case GL_TEXTURE_3D:
69 return BRW_SURFACE_3D;
70
71 case GL_TEXTURE_CUBE_MAP:
72 case GL_TEXTURE_CUBE_MAP_ARRAY:
73 return BRW_SURFACE_CUBE;
74
75 default:
76 assert(0);
77 return 0;
78 }
79 }
80
81 uint32_t
82 brw_get_surface_tiling_bits(uint32_t tiling)
83 {
84 switch (tiling) {
85 case I915_TILING_X:
86 return BRW_SURFACE_TILED;
87 case I915_TILING_Y:
88 return BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y;
89 default:
90 return 0;
91 }
92 }
93
94
95 uint32_t
96 brw_get_surface_num_multisamples(unsigned num_samples)
97 {
98 if (num_samples > 1)
99 return BRW_SURFACE_MULTISAMPLECOUNT_4;
100 else
101 return BRW_SURFACE_MULTISAMPLECOUNT_1;
102 }
103
104
105 /**
106 * Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
107 * swizzling.
108 */
109 int
110 brw_get_texture_swizzle(const struct gl_context *ctx,
111 const struct gl_texture_object *t)
112 {
113 const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
114
115 int swizzles[SWIZZLE_NIL + 1] = {
116 SWIZZLE_X,
117 SWIZZLE_Y,
118 SWIZZLE_Z,
119 SWIZZLE_W,
120 SWIZZLE_ZERO,
121 SWIZZLE_ONE,
122 SWIZZLE_NIL
123 };
124
125 if (img->_BaseFormat == GL_DEPTH_COMPONENT ||
126 img->_BaseFormat == GL_DEPTH_STENCIL) {
127 GLenum depth_mode = t->DepthMode;
128
129 /* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
130 * with depth component data specified with a sized internal format.
131 * Otherwise, it's left at the old default, GL_LUMINANCE.
132 */
133 if (_mesa_is_gles3(ctx) &&
134 img->InternalFormat != GL_DEPTH_COMPONENT &&
135 img->InternalFormat != GL_DEPTH_STENCIL) {
136 depth_mode = GL_RED;
137 }
138
139 switch (depth_mode) {
140 case GL_ALPHA:
141 swizzles[0] = SWIZZLE_ZERO;
142 swizzles[1] = SWIZZLE_ZERO;
143 swizzles[2] = SWIZZLE_ZERO;
144 swizzles[3] = SWIZZLE_X;
145 break;
146 case GL_LUMINANCE:
147 swizzles[0] = SWIZZLE_X;
148 swizzles[1] = SWIZZLE_X;
149 swizzles[2] = SWIZZLE_X;
150 swizzles[3] = SWIZZLE_ONE;
151 break;
152 case GL_INTENSITY:
153 swizzles[0] = SWIZZLE_X;
154 swizzles[1] = SWIZZLE_X;
155 swizzles[2] = SWIZZLE_X;
156 swizzles[3] = SWIZZLE_X;
157 break;
158 case GL_RED:
159 swizzles[0] = SWIZZLE_X;
160 swizzles[1] = SWIZZLE_ZERO;
161 swizzles[2] = SWIZZLE_ZERO;
162 swizzles[3] = SWIZZLE_ONE;
163 break;
164 }
165 }
166
167 /* If the texture's format is alpha-only, force R, G, and B to
168 * 0.0. Similarly, if the texture's format has no alpha channel,
169 * force the alpha value read to 1.0. This allows for the
170 * implementation to use an RGBA texture for any of these formats
171 * without leaking any unexpected values.
172 */
173 switch (img->_BaseFormat) {
174 case GL_ALPHA:
175 swizzles[0] = SWIZZLE_ZERO;
176 swizzles[1] = SWIZZLE_ZERO;
177 swizzles[2] = SWIZZLE_ZERO;
178 break;
179 case GL_RED:
180 case GL_RG:
181 case GL_RGB:
182 if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
183 swizzles[3] = SWIZZLE_ONE;
184 break;
185 }
186
187 return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
188 swizzles[GET_SWZ(t->_Swizzle, 1)],
189 swizzles[GET_SWZ(t->_Swizzle, 2)],
190 swizzles[GET_SWZ(t->_Swizzle, 3)]);
191 }
192
193 static void
194 gen4_emit_buffer_surface_state(struct brw_context *brw,
195 uint32_t *out_offset,
196 drm_intel_bo *bo,
197 unsigned buffer_offset,
198 unsigned surface_format,
199 unsigned buffer_size,
200 unsigned pitch,
201 unsigned mocs,
202 bool rw)
203 {
204 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
205 6 * 4, 32, out_offset);
206 memset(surf, 0, 6 * 4);
207
208 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
209 surface_format << BRW_SURFACE_FORMAT_SHIFT |
210 (brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
211 surf[1] = (bo ? bo->offset : 0) + buffer_offset; /* reloc */
212 surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT |
213 ((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
214 surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT |
215 (pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
216
217 /* Emit relocation to surface contents. The 965 PRM, Volume 4, section
218 * 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
219 * physical cache. It is mapped in hardware to the sampler cache."
220 */
221 if (bo) {
222 drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
223 bo, buffer_offset,
224 I915_GEM_DOMAIN_SAMPLER,
225 (rw ? I915_GEM_DOMAIN_SAMPLER : 0));
226 }
227 }
228
229 void
230 brw_update_buffer_texture_surface(struct gl_context *ctx,
231 unsigned unit,
232 uint32_t *surf_offset)
233 {
234 struct brw_context *brw = brw_context(ctx);
235 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
236 struct intel_buffer_object *intel_obj =
237 intel_buffer_object(tObj->BufferObject);
238 uint32_t size = tObj->BufferSize;
239 drm_intel_bo *bo = NULL;
240 gl_format format = tObj->_BufferObjectFormat;
241 uint32_t brw_format = brw_format_for_mesa_format(format);
242 int texel_size = _mesa_get_format_bytes(format);
243
244 if (intel_obj) {
245 size = MIN2(size, intel_obj->Base.Size);
246 bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
247 }
248
249 if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
250 _mesa_problem(NULL, "bad format %s for texture buffer\n",
251 _mesa_get_format_name(format));
252 }
253
254 brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
255 tObj->BufferOffset,
256 brw_format,
257 size / texel_size,
258 texel_size,
259 0, /* mocs */
260 false /* rw */);
261 }
262
263 static void
264 brw_update_texture_surface(struct gl_context *ctx,
265 unsigned unit,
266 uint32_t *surf_offset,
267 bool for_gather)
268 {
269 struct brw_context *brw = brw_context(ctx);
270 struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
271 struct intel_texture_object *intelObj = intel_texture_object(tObj);
272 struct intel_mipmap_tree *mt = intelObj->mt;
273 struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
274 uint32_t *surf;
275
276 /* BRW_NEW_UNIFORM_BUFFER */
277 if (tObj->Target == GL_TEXTURE_BUFFER) {
278 brw_update_buffer_texture_surface(ctx, unit, surf_offset);
279 return;
280 }
281
282 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
283 6 * 4, 32, surf_offset);
284
285 (void) for_gather; /* no w/a to apply for this gen */
286
287 surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
288 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
289 BRW_SURFACE_CUBEFACE_ENABLES |
290 (translate_tex_format(brw,
291 mt->format,
292 tObj->DepthMode,
293 sampler->sRGBDecode) <<
294 BRW_SURFACE_FORMAT_SHIFT));
295
296 surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
297
298 surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT |
299 (mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT |
300 (mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
301
302 surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) |
303 (mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT |
304 (intelObj->mt->region->pitch - 1) <<
305 BRW_SURFACE_PITCH_SHIFT);
306
307 surf[4] = (brw_get_surface_num_multisamples(intelObj->mt->num_samples) |
308 SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
309
310 surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
311
312 /* Emit relocation to surface contents */
313 drm_intel_bo_emit_reloc(brw->batch.bo,
314 *surf_offset + 4,
315 intelObj->mt->region->bo,
316 surf[1] - intelObj->mt->region->bo->offset,
317 I915_GEM_DOMAIN_SAMPLER, 0);
318 }
319
320 /**
321 * Create the constant buffer surface. Vertex/fragment shader constants will be
322 * read from this buffer with Data Port Read instructions/messages.
323 */
324 void
325 brw_create_constant_surface(struct brw_context *brw,
326 drm_intel_bo *bo,
327 uint32_t offset,
328 uint32_t size,
329 uint32_t *out_offset,
330 bool dword_pitch)
331 {
332 uint32_t stride = dword_pitch ? 4 : 16;
333 uint32_t elements = ALIGN(size, stride) / stride;
334
335 brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
336 BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
337 elements, stride, 0, false);
338 }
339
340 /**
341 * Set up a binding table entry for use by stream output logic (transform
342 * feedback).
343 *
344 * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
345 */
346 void
347 brw_update_sol_surface(struct brw_context *brw,
348 struct gl_buffer_object *buffer_obj,
349 uint32_t *out_offset, unsigned num_vector_components,
350 unsigned stride_dwords, unsigned offset_dwords)
351 {
352 struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
353 uint32_t offset_bytes = 4 * offset_dwords;
354 drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
355 offset_bytes,
356 buffer_obj->Size - offset_bytes);
357 uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
358 out_offset);
359 uint32_t pitch_minus_1 = 4*stride_dwords - 1;
360 size_t size_dwords = buffer_obj->Size / 4;
361 uint32_t buffer_size_minus_1, width, height, depth, surface_format;
362
363 /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
364 * too big to map using a single binding table entry?
365 */
366 assert((size_dwords - offset_dwords) / stride_dwords
367 <= BRW_MAX_NUM_BUFFER_ENTRIES);
368
369 if (size_dwords > offset_dwords + num_vector_components) {
370 /* There is room for at least 1 transform feedback output in the buffer.
371 * Compute the number of additional transform feedback outputs the
372 * buffer has room for.
373 */
374 buffer_size_minus_1 =
375 (size_dwords - offset_dwords - num_vector_components) / stride_dwords;
376 } else {
377 /* There isn't even room for a single transform feedback output in the
378 * buffer. We can't configure the binding table entry to prevent output
379 * entirely; we'll have to rely on the geometry shader to detect
380 * overflow. But to minimize the damage in case of a bug, set up the
381 * binding table entry to just allow a single output.
382 */
383 buffer_size_minus_1 = 0;
384 }
385 width = buffer_size_minus_1 & 0x7f;
386 height = (buffer_size_minus_1 & 0xfff80) >> 7;
387 depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
388
389 switch (num_vector_components) {
390 case 1:
391 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
392 break;
393 case 2:
394 surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
395 break;
396 case 3:
397 surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
398 break;
399 case 4:
400 surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
401 break;
402 default:
403 assert(!"Invalid vector size for transform feedback output");
404 surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
405 break;
406 }
407
408 surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT |
409 BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
410 surface_format << BRW_SURFACE_FORMAT_SHIFT |
411 BRW_SURFACE_RC_READ_WRITE;
412 surf[1] = bo->offset + offset_bytes; /* reloc */
413 surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
414 height << BRW_SURFACE_HEIGHT_SHIFT);
415 surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
416 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
417 surf[4] = 0;
418 surf[5] = 0;
419
420 /* Emit relocation to surface contents. */
421 drm_intel_bo_emit_reloc(brw->batch.bo,
422 *out_offset + 4,
423 bo, offset_bytes,
424 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
425 }
426
427 /* Creates a new WM constant buffer reflecting the current fragment program's
428 * constants, if needed by the fragment program.
429 *
430 * Otherwise, constants go through the CURBEs using the brw_constant_buffer
431 * state atom.
432 */
433 static void
434 brw_upload_wm_pull_constants(struct brw_context *brw)
435 {
436 struct gl_context *ctx = &brw->ctx;
437 /* BRW_NEW_FRAGMENT_PROGRAM */
438 struct brw_fragment_program *fp =
439 (struct brw_fragment_program *) brw->fragment_program;
440 struct gl_program_parameter_list *params = fp->program.Base.Parameters;
441 const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
442 const int surf_index =
443 brw->wm.prog_data->base.binding_table.pull_constants_start;
444 float *constants;
445 unsigned int i;
446
447 _mesa_load_state_parameters(ctx, params);
448
449 /* CACHE_NEW_WM_PROG */
450 if (brw->wm.prog_data->nr_pull_params == 0) {
451 if (brw->wm.base.const_bo) {
452 drm_intel_bo_unreference(brw->wm.base.const_bo);
453 brw->wm.base.const_bo = NULL;
454 brw->wm.base.surf_offset[surf_index] = 0;
455 brw->state.dirty.brw |= BRW_NEW_SURFACES;
456 }
457 return;
458 }
459
460 drm_intel_bo_unreference(brw->wm.base.const_bo);
461 brw->wm.base.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
462 size, 64);
463
464 /* _NEW_PROGRAM_CONSTANTS */
465 drm_intel_gem_bo_map_gtt(brw->wm.base.const_bo);
466 constants = brw->wm.base.const_bo->virtual;
467 for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
468 constants[i] = *brw->wm.prog_data->pull_param[i];
469 }
470 drm_intel_gem_bo_unmap_gtt(brw->wm.base.const_bo);
471
472 brw_create_constant_surface(brw, brw->wm.base.const_bo, 0, size,
473 &brw->wm.base.surf_offset[surf_index],
474 true);
475
476 brw->state.dirty.brw |= BRW_NEW_SURFACES;
477 }
478
479 const struct brw_tracked_state brw_wm_pull_constants = {
480 .dirty = {
481 .mesa = (_NEW_PROGRAM_CONSTANTS),
482 .brw = (BRW_NEW_BATCH | BRW_NEW_FRAGMENT_PROGRAM),
483 .cache = CACHE_NEW_WM_PROG,
484 },
485 .emit = brw_upload_wm_pull_constants,
486 };
487
488 static void
489 brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
490 {
491 /* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
492 * Notes):
493 *
494 * A null surface will be used in instances where an actual surface is
495 * not bound. When a write message is generated to a null surface, no
496 * actual surface is written to. When a read message (including any
497 * sampling engine message) is generated to a null surface, the result
498 * is all zeros. Note that a null surface type is allowed to be used
499 * with all messages, even if it is not specificially indicated as
500 * supported. All of the remaining fields in surface state are ignored
501 * for null surfaces, with the following exceptions:
502 *
503 * - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
504 * depth buffer’s corresponding state for all render target surfaces,
505 * including null.
506 *
507 * - Surface Format must be R8G8B8A8_UNORM.
508 */
509 struct gl_context *ctx = &brw->ctx;
510 uint32_t *surf;
511 unsigned surface_type = BRW_SURFACE_NULL;
512 drm_intel_bo *bo = NULL;
513 unsigned pitch_minus_1 = 0;
514 uint32_t multisampling_state = 0;
515 uint32_t surf_index =
516 brw->wm.prog_data->binding_table.render_target_start + unit;
517
518 /* _NEW_BUFFERS */
519 const struct gl_framebuffer *fb = ctx->DrawBuffer;
520
521 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
522 &brw->wm.base.surf_offset[surf_index]);
523
524 if (fb->Visual.samples > 1) {
525 /* On Gen6, null render targets seem to cause GPU hangs when
526 * multisampling. So work around this problem by rendering into dummy
527 * color buffer.
528 *
529 * To decrease the amount of memory needed by the workaround buffer, we
530 * set its pitch to 128 bytes (the width of a Y tile). This means that
531 * the amount of memory needed for the workaround buffer is
532 * (width_in_tiles + height_in_tiles - 1) tiles.
533 *
534 * Note that since the workaround buffer will be interpreted by the
535 * hardware as an interleaved multisampled buffer, we need to compute
536 * width_in_tiles and height_in_tiles by dividing the width and height
537 * by 16 rather than the normal Y-tile size of 32.
538 */
539 unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
540 unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
541 unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
542 brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
543 size_needed);
544 bo = brw->wm.multisampled_null_render_target_bo;
545 surface_type = BRW_SURFACE_2D;
546 pitch_minus_1 = 127;
547 multisampling_state =
548 brw_get_surface_num_multisamples(fb->Visual.samples);
549 }
550
551 surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT |
552 BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
553 if (brw->gen < 6) {
554 surf[0] |= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT |
555 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT |
556 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT |
557 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
558 }
559 surf[1] = bo ? bo->offset : 0;
560 surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
561 (fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
562
563 /* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
564 * Notes):
565 *
566 * If Surface Type is SURFTYPE_NULL, this field must be TRUE
567 */
568 surf[3] = (BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y |
569 pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
570 surf[4] = multisampling_state;
571 surf[5] = 0;
572
573 if (bo) {
574 drm_intel_bo_emit_reloc(brw->batch.bo,
575 brw->wm.base.surf_offset[surf_index] + 4,
576 bo, 0,
577 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
578 }
579 }
580
581 /**
582 * Sets up a surface state structure to point at the given region.
583 * While it is only used for the front/back buffer currently, it should be
584 * usable for further buffers when doing ARB_draw_buffer support.
585 */
586 static void
587 brw_update_renderbuffer_surface(struct brw_context *brw,
588 struct gl_renderbuffer *rb,
589 bool layered,
590 unsigned int unit)
591 {
592 struct gl_context *ctx = &brw->ctx;
593 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
594 struct intel_mipmap_tree *mt = irb->mt;
595 struct intel_region *region;
596 uint32_t *surf;
597 uint32_t tile_x, tile_y;
598 uint32_t format = 0;
599 /* _NEW_BUFFERS */
600 gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
601 uint32_t surf_index =
602 brw->wm.prog_data->binding_table.render_target_start + unit;
603
604 assert(!layered);
605
606 if (rb->TexImage && !brw->has_surface_tile_offset) {
607 intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
608
609 if (tile_x != 0 || tile_y != 0) {
610 /* Original gen4 hardware couldn't draw to a non-tile-aligned
611 * destination in a miptree unless you actually setup your renderbuffer
612 * as a miptree and used the fragile lod/array_index/etc. controls to
613 * select the image. So, instead, we just make a new single-level
614 * miptree and render into that.
615 */
616 intel_renderbuffer_move_to_temp(brw, irb, false);
617 mt = irb->mt;
618 }
619 }
620
621 intel_miptree_used_for_rendering(irb->mt);
622
623 region = irb->mt->region;
624
625 surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
626 &brw->wm.base.surf_offset[surf_index]);
627
628 format = brw->render_target_format[rb_format];
629 if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
630 _mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
631 __FUNCTION__, _mesa_get_format_name(rb_format));
632 }
633
634 surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
635 format << BRW_SURFACE_FORMAT_SHIFT);
636
637 /* reloc */
638 surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
639 region->bo->offset);
640
641 surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
642 (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
643
644 surf[3] = (brw_get_surface_tiling_bits(region->tiling) |
645 (region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
646
647 surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
648
649 assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
650 /* Note that the low bits of these fields are missing, so
651 * there's the possibility of getting in trouble.
652 */
653 assert(tile_x % 4 == 0);
654 assert(tile_y % 2 == 0);
655 surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
656 (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
657 (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
658
659 if (brw->gen < 6) {
660 /* _NEW_COLOR */
661 if (!ctx->Color.ColorLogicOpEnabled &&
662 (ctx->Color.BlendEnabled & (1 << unit)))
663 surf[0] |= BRW_SURFACE_BLEND_ENABLED;
664
665 if (!ctx->Color.ColorMask[unit][0])
666 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
667 if (!ctx->Color.ColorMask[unit][1])
668 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
669 if (!ctx->Color.ColorMask[unit][2])
670 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
671
672 /* As mentioned above, disable writes to the alpha component when the
673 * renderbuffer is XRGB.
674 */
675 if (ctx->DrawBuffer->Visual.alphaBits == 0 ||
676 !ctx->Color.ColorMask[unit][3]) {
677 surf[0] |= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
678 }
679 }
680
681 drm_intel_bo_emit_reloc(brw->batch.bo,
682 brw->wm.base.surf_offset[surf_index] + 4,
683 region->bo,
684 surf[1] - region->bo->offset,
685 I915_GEM_DOMAIN_RENDER,
686 I915_GEM_DOMAIN_RENDER);
687 }
688
689 /**
690 * Construct SURFACE_STATE objects for renderbuffers/draw buffers.
691 */
692 static void
693 brw_update_renderbuffer_surfaces(struct brw_context *brw)
694 {
695 struct gl_context *ctx = &brw->ctx;
696 GLuint i;
697
698 /* _NEW_BUFFERS | _NEW_COLOR */
699 /* Update surfaces for drawing buffers */
700 if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
701 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
702 if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
703 brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
704 ctx->DrawBuffer->Layered, i);
705 } else {
706 brw->vtbl.update_null_renderbuffer_surface(brw, i);
707 }
708 }
709 } else {
710 brw->vtbl.update_null_renderbuffer_surface(brw, 0);
711 }
712 brw->state.dirty.brw |= BRW_NEW_SURFACES;
713 }
714
715 const struct brw_tracked_state brw_renderbuffer_surfaces = {
716 .dirty = {
717 .mesa = (_NEW_COLOR |
718 _NEW_BUFFERS),
719 .brw = BRW_NEW_BATCH,
720 .cache = 0
721 },
722 .emit = brw_update_renderbuffer_surfaces,
723 };
724
725 const struct brw_tracked_state gen6_renderbuffer_surfaces = {
726 .dirty = {
727 .mesa = _NEW_BUFFERS,
728 .brw = BRW_NEW_BATCH,
729 .cache = 0
730 },
731 .emit = brw_update_renderbuffer_surfaces,
732 };
733
734
735 static void
736 update_stage_texture_surfaces(struct brw_context *brw,
737 const struct gl_program *prog,
738 struct brw_stage_state *stage_state,
739 bool for_gather)
740 {
741 if (!prog)
742 return;
743
744 struct gl_context *ctx = &brw->ctx;
745
746 uint32_t *surf_offset = stage_state->surf_offset;
747 if (for_gather)
748 surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
749 else
750 surf_offset += stage_state->prog_data->binding_table.texture_start;
751
752 unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
753 for (unsigned s = 0; s < num_samplers; s++) {
754 surf_offset[s] = 0;
755
756 if (prog->SamplersUsed & (1 << s)) {
757 const unsigned unit = prog->SamplerUnits[s];
758
759 /* _NEW_TEXTURE */
760 if (ctx->Texture.Unit[unit]._ReallyEnabled) {
761 brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
762 }
763 }
764 }
765 }
766
767
768 /**
769 * Construct SURFACE_STATE objects for enabled textures.
770 */
771 static void
772 brw_update_texture_surfaces(struct brw_context *brw)
773 {
774 /* BRW_NEW_VERTEX_PROGRAM */
775 struct gl_program *vs = (struct gl_program *) brw->vertex_program;
776
777 /* BRW_NEW_GEOMETRY_PROGRAM */
778 struct gl_program *gs = (struct gl_program *) brw->geometry_program;
779
780 /* BRW_NEW_FRAGMENT_PROGRAM */
781 struct gl_program *fs = (struct gl_program *) brw->fragment_program;
782
783 /* _NEW_TEXTURE */
784 update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
785 update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
786 update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
787
788 /* emit alternate set of surface state for gather. this
789 * allows the surface format to be overriden for only the
790 * gather4 messages. */
791 if (vs && vs->UsesGather)
792 update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
793 if (gs && gs->UsesGather)
794 update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
795 if (fs && fs->UsesGather)
796 update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
797
798 brw->state.dirty.brw |= BRW_NEW_SURFACES;
799 }
800
801 const struct brw_tracked_state brw_texture_surfaces = {
802 .dirty = {
803 .mesa = _NEW_TEXTURE,
804 .brw = BRW_NEW_BATCH |
805 BRW_NEW_UNIFORM_BUFFER |
806 BRW_NEW_VERTEX_PROGRAM |
807 BRW_NEW_GEOMETRY_PROGRAM |
808 BRW_NEW_FRAGMENT_PROGRAM,
809 .cache = 0
810 },
811 .emit = brw_update_texture_surfaces,
812 };
813
814 void
815 brw_upload_ubo_surfaces(struct brw_context *brw,
816 struct gl_shader *shader,
817 struct brw_stage_state *stage_state,
818 struct brw_stage_prog_data *prog_data)
819 {
820 struct gl_context *ctx = &brw->ctx;
821
822 if (!shader)
823 return;
824
825 uint32_t *surf_offsets =
826 &stage_state->surf_offset[prog_data->binding_table.ubo_start];
827
828 for (int i = 0; i < shader->NumUniformBlocks; i++) {
829 struct gl_uniform_buffer_binding *binding;
830 struct intel_buffer_object *intel_bo;
831
832 binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
833 intel_bo = intel_buffer_object(binding->BufferObject);
834 drm_intel_bo *bo =
835 intel_bufferobj_buffer(brw, intel_bo,
836 binding->Offset,
837 binding->BufferObject->Size - binding->Offset);
838
839 /* Because behavior for referencing outside of the binding's size in the
840 * glBindBufferRange case is undefined, we can just bind the whole buffer
841 * glBindBufferBase wants and be a correct implementation.
842 */
843 brw_create_constant_surface(brw, bo, binding->Offset,
844 bo->size - binding->Offset,
845 &surf_offsets[i],
846 shader->Type == GL_FRAGMENT_SHADER);
847 }
848
849 if (shader->NumUniformBlocks)
850 brw->state.dirty.brw |= BRW_NEW_SURFACES;
851 }
852
853 static void
854 brw_upload_wm_ubo_surfaces(struct brw_context *brw)
855 {
856 struct gl_context *ctx = &brw->ctx;
857 /* _NEW_PROGRAM */
858 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
859
860 if (!prog)
861 return;
862
863 /* CACHE_NEW_WM_PROG */
864 brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
865 &brw->wm.base, &brw->wm.prog_data->base);
866 }
867
868 const struct brw_tracked_state brw_wm_ubo_surfaces = {
869 .dirty = {
870 .mesa = _NEW_PROGRAM,
871 .brw = BRW_NEW_BATCH | BRW_NEW_UNIFORM_BUFFER,
872 .cache = CACHE_NEW_WM_PROG,
873 },
874 .emit = brw_upload_wm_ubo_surfaces,
875 };
876
877 void
878 brw_upload_abo_surfaces(struct brw_context *brw,
879 struct gl_shader_program *prog,
880 struct brw_stage_state *stage_state,
881 struct brw_stage_prog_data *prog_data)
882 {
883 struct gl_context *ctx = &brw->ctx;
884 uint32_t *surf_offsets =
885 &stage_state->surf_offset[prog_data->binding_table.abo_start];
886
887 for (int i = 0; i < prog->NumAtomicBuffers; i++) {
888 struct gl_atomic_buffer_binding *binding =
889 &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
890 struct intel_buffer_object *intel_bo =
891 intel_buffer_object(binding->BufferObject);
892 drm_intel_bo *bo = intel_bufferobj_buffer(
893 brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
894
895 brw->vtbl.create_raw_surface(brw, bo, binding->Offset,
896 bo->size - binding->Offset,
897 &surf_offsets[i], true);
898 }
899
900 if (prog->NumUniformBlocks)
901 brw->state.dirty.brw |= BRW_NEW_SURFACES;
902 }
903
904 static void
905 brw_upload_wm_abo_surfaces(struct brw_context *brw)
906 {
907 struct gl_context *ctx = &brw->ctx;
908 /* _NEW_PROGRAM */
909 struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
910
911 if (prog) {
912 /* CACHE_NEW_WM_PROG */
913 brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
914 &brw->wm.prog_data->base);
915 }
916 }
917
918 const struct brw_tracked_state brw_wm_abo_surfaces = {
919 .dirty = {
920 .mesa = _NEW_PROGRAM,
921 .brw = BRW_NEW_BATCH | BRW_NEW_ATOMIC_BUFFER,
922 .cache = CACHE_NEW_WM_PROG,
923 },
924 .emit = brw_upload_wm_abo_surfaces,
925 };
926
927 void
928 gen4_init_vtable_surface_functions(struct brw_context *brw)
929 {
930 brw->vtbl.update_texture_surface = brw_update_texture_surface;
931 brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
932 brw->vtbl.update_null_renderbuffer_surface =
933 brw_update_null_renderbuffer_surface;
934 brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
935 }