Merge remote-tracking branch 'origin/master' into vulkan
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
1 /*
2 Copyright 2003 VMware, Inc.
3 Copyright (C) Intel Corp. 2006. All Rights Reserved.
4 Intel funded Tungsten Graphics to
5 develop this 3D driver.
6
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
14
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **********************************************************************/
28 /*
29 * Authors:
30 * Keith Whitwell <keithw@vmware.com>
31 */
32
33
34 #include "main/api_exec.h"
35 #include "main/context.h"
36 #include "main/fbobject.h"
37 #include "main/extensions.h"
38 #include "main/imports.h"
39 #include "main/macros.h"
40 #include "main/points.h"
41 #include "main/version.h"
42 #include "main/vtxfmt.h"
43 #include "main/texobj.h"
44 #include "main/framebuffer.h"
45
46 #include "vbo/vbo_context.h"
47
48 #include "drivers/common/driverfuncs.h"
49 #include "drivers/common/meta.h"
50 #include "utils.h"
51
52 #include "brw_context.h"
53 #include "brw_defines.h"
54 #include "brw_compiler.h"
55 #include "brw_draw.h"
56 #include "brw_state.h"
57
58 #include "intel_batchbuffer.h"
59 #include "intel_buffer_objects.h"
60 #include "intel_buffers.h"
61 #include "intel_fbo.h"
62 #include "intel_mipmap_tree.h"
63 #include "intel_pixel.h"
64 #include "intel_image.h"
65 #include "intel_tex.h"
66 #include "intel_tex_obj.h"
67
68 #include "swrast_setup/swrast_setup.h"
69 #include "tnl/tnl.h"
70 #include "tnl/t_pipeline.h"
71 #include "util/ralloc.h"
72 #include "util/debug.h"
73
74 /***************************************
75 * Mesa's Driver Functions
76 ***************************************/
77
78 static size_t
79 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
80 GLenum internalFormat, int samples[16])
81 {
82 struct brw_context *brw = brw_context(ctx);
83
84 (void) target;
85
86 switch (brw->gen) {
87 case 9:
88 samples[0] = 16;
89 samples[1] = 8;
90 samples[2] = 4;
91 samples[3] = 2;
92 return 4;
93
94 case 8:
95 samples[0] = 8;
96 samples[1] = 4;
97 samples[2] = 2;
98 return 3;
99
100 case 7:
101 samples[0] = 8;
102 samples[1] = 4;
103 return 2;
104
105 case 6:
106 samples[0] = 4;
107 return 1;
108
109 default:
110 assert(brw->gen < 6);
111 samples[0] = 1;
112 return 1;
113 }
114 }
115
116 const char *const brw_vendor_string = "Intel Open Source Technology Center";
117
118 const char *
119 brw_get_renderer_string(unsigned deviceID)
120 {
121 const char *chipset;
122 static char buffer[128];
123
124 switch (deviceID) {
125 #undef CHIPSET
126 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
127 #include "pci_ids/i965_pci_ids.h"
128 default:
129 chipset = "Unknown Intel Chipset";
130 break;
131 }
132
133 (void) driGetRendererString(buffer, chipset, 0);
134 return buffer;
135 }
136
137 static const GLubyte *
138 intel_get_string(struct gl_context * ctx, GLenum name)
139 {
140 const struct brw_context *const brw = brw_context(ctx);
141
142 switch (name) {
143 case GL_VENDOR:
144 return (GLubyte *) brw_vendor_string;
145
146 case GL_RENDERER:
147 return
148 (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
149
150 default:
151 return NULL;
152 }
153 }
154
155 static void
156 intel_viewport(struct gl_context *ctx)
157 {
158 struct brw_context *brw = brw_context(ctx);
159 __DRIcontext *driContext = brw->driContext;
160
161 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
162 if (driContext->driDrawablePriv)
163 dri2InvalidateDrawable(driContext->driDrawablePriv);
164 if (driContext->driReadablePriv)
165 dri2InvalidateDrawable(driContext->driReadablePriv);
166 }
167 }
168
169 static void
170 intel_update_framebuffer(struct gl_context *ctx,
171 struct gl_framebuffer *fb)
172 {
173 struct brw_context *brw = brw_context(ctx);
174
175 /* Quantize the derived default number of samples
176 */
177 fb->DefaultGeometry._NumSamples =
178 intel_quantize_num_samples(brw->intelScreen,
179 fb->DefaultGeometry.NumSamples);
180 }
181
182 static void
183 intel_update_state(struct gl_context * ctx, GLuint new_state)
184 {
185 struct brw_context *brw = brw_context(ctx);
186 struct intel_texture_object *tex_obj;
187 struct intel_renderbuffer *depth_irb;
188
189 if (ctx->swrast_context)
190 _swrast_InvalidateState(ctx, new_state);
191 _vbo_InvalidateState(ctx, new_state);
192
193 brw->NewGLState |= new_state;
194
195 _mesa_unlock_context_textures(ctx);
196
197 /* Resolve the depth buffer's HiZ buffer. */
198 depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
199 if (depth_irb)
200 intel_renderbuffer_resolve_hiz(brw, depth_irb);
201
202 /* Resolve depth buffer and render cache of each enabled texture. */
203 int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
204 for (int i = 0; i <= maxEnabledUnit; i++) {
205 if (!ctx->Texture.Unit[i]._Current)
206 continue;
207 tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
208 if (!tex_obj || !tex_obj->mt)
209 continue;
210 intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
211 /* Sampling engine understands lossless compression and resolving
212 * those surfaces should be skipped for performance reasons.
213 */
214 intel_miptree_resolve_color(brw, tex_obj->mt,
215 INTEL_MIPTREE_IGNORE_CCS_E);
216 brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
217 }
218
219 /* Resolve color for each active shader image. */
220 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
221 const struct gl_shader *shader = ctx->_Shader->CurrentProgram[i] ?
222 ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL;
223
224 if (unlikely(shader && shader->NumImages)) {
225 for (unsigned j = 0; j < shader->NumImages; j++) {
226 struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]];
227 tex_obj = intel_texture_object(u->TexObj);
228
229 if (tex_obj && tex_obj->mt) {
230 /* Access to images is implemented using indirect messages
231 * against data port. Normal render target write understands
232 * lossless compression but unfortunately the typed/untyped
233 * read/write interface doesn't. Therefore the compressed
234 * surfaces need to be resolved prior to accessing them.
235 */
236 intel_miptree_resolve_color(brw, tex_obj->mt, 0);
237 brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
238 }
239 }
240 }
241 }
242
243 /* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the
244 * single-sampled color renderbuffers because the CCS buffer isn't
245 * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
246 * enabled because otherwise the surface state will be programmed with the
247 * linear equivalent format anyway.
248 */
249 if (brw->gen >= 9 && ctx->Color.sRGBEnabled) {
250 struct gl_framebuffer *fb = ctx->DrawBuffer;
251 for (int i = 0; i < fb->_NumColorDrawBuffers; i++) {
252 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
253
254 if (rb == NULL)
255 continue;
256
257 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
258 struct intel_mipmap_tree *mt = irb->mt;
259
260 if (mt == NULL ||
261 mt->num_samples > 1 ||
262 _mesa_get_srgb_format_linear(mt->format) == mt->format)
263 continue;
264
265 /* Lossless compression is not supported for SRGB formats, it
266 * should be impossible to get here with such surfaces.
267 */
268 assert(!intel_miptree_is_lossless_compressed(brw, mt));
269 intel_miptree_resolve_color(brw, mt, 0);
270 brw_render_cache_set_check_flush(brw, mt->bo);
271 }
272 }
273
274 _mesa_lock_context_textures(ctx);
275
276 if (new_state & _NEW_BUFFERS) {
277 intel_update_framebuffer(ctx, ctx->DrawBuffer);
278 if (ctx->DrawBuffer != ctx->ReadBuffer)
279 intel_update_framebuffer(ctx, ctx->ReadBuffer);
280 }
281 }
282
283 #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
284
285 static void
286 intel_flush_front(struct gl_context *ctx)
287 {
288 struct brw_context *brw = brw_context(ctx);
289 __DRIcontext *driContext = brw->driContext;
290 __DRIdrawable *driDrawable = driContext->driDrawablePriv;
291 __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
292
293 if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
294 if (flushFront(screen) && driDrawable &&
295 driDrawable->loaderPrivate) {
296
297 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
298 *
299 * This potentially resolves both front and back buffer. It
300 * is unnecessary to resolve the back, but harms nothing except
301 * performance. And no one cares about front-buffer render
302 * performance.
303 */
304 intel_resolve_for_dri2_flush(brw, driDrawable);
305 intel_batchbuffer_flush(brw);
306
307 flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
308
309 /* We set the dirty bit in intel_prepare_render() if we're
310 * front buffer rendering once we get there.
311 */
312 brw->front_buffer_dirty = false;
313 }
314 }
315 }
316
317 static void
318 intel_glFlush(struct gl_context *ctx)
319 {
320 struct brw_context *brw = brw_context(ctx);
321
322 intel_batchbuffer_flush(brw);
323 intel_flush_front(ctx);
324
325 brw->need_flush_throttle = true;
326 }
327
328 static void
329 intel_finish(struct gl_context * ctx)
330 {
331 struct brw_context *brw = brw_context(ctx);
332
333 intel_glFlush(ctx);
334
335 if (brw->batch.last_bo)
336 drm_intel_bo_wait_rendering(brw->batch.last_bo);
337 }
338
339 static void
340 brw_init_driver_functions(struct brw_context *brw,
341 struct dd_function_table *functions)
342 {
343 _mesa_init_driver_functions(functions);
344
345 /* GLX uses DRI2 invalidate events to handle window resizing.
346 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
347 * which doesn't provide a mechanism for snooping the event queues.
348 *
349 * So EGL still relies on viewport hacks to handle window resizing.
350 * This should go away with DRI3000.
351 */
352 if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
353 functions->Viewport = intel_viewport;
354
355 functions->Flush = intel_glFlush;
356 functions->Finish = intel_finish;
357 functions->GetString = intel_get_string;
358 functions->UpdateState = intel_update_state;
359
360 intelInitTextureFuncs(functions);
361 intelInitTextureImageFuncs(functions);
362 intelInitTextureSubImageFuncs(functions);
363 intelInitTextureCopyImageFuncs(functions);
364 intelInitCopyImageFuncs(functions);
365 intelInitClearFuncs(functions);
366 intelInitBufferFuncs(functions);
367 intelInitPixelFuncs(functions);
368 intelInitBufferObjectFuncs(functions);
369 intel_init_syncobj_functions(functions);
370 brw_init_object_purgeable_functions(functions);
371
372 brwInitFragProgFuncs( functions );
373 brw_init_common_queryobj_functions(functions);
374 if (brw->gen >= 6)
375 gen6_init_queryobj_functions(functions);
376 else
377 gen4_init_queryobj_functions(functions);
378 brw_init_compute_functions(functions);
379 if (brw->gen >= 7)
380 brw_init_conditional_render_functions(functions);
381
382 functions->QuerySamplesForFormat = brw_query_samples_for_format;
383
384 functions->NewTransformFeedback = brw_new_transform_feedback;
385 functions->DeleteTransformFeedback = brw_delete_transform_feedback;
386 functions->GetTransformFeedbackVertexCount =
387 brw_get_transform_feedback_vertex_count;
388 if (brw->gen >= 7) {
389 functions->BeginTransformFeedback = gen7_begin_transform_feedback;
390 functions->EndTransformFeedback = gen7_end_transform_feedback;
391 functions->PauseTransformFeedback = gen7_pause_transform_feedback;
392 functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
393 } else {
394 functions->BeginTransformFeedback = brw_begin_transform_feedback;
395 functions->EndTransformFeedback = brw_end_transform_feedback;
396 }
397
398 if (brw->gen >= 6)
399 functions->GetSamplePosition = gen6_get_sample_position;
400 }
401
402 static void
403 brw_initialize_context_constants(struct brw_context *brw)
404 {
405 struct gl_context *ctx = &brw->ctx;
406 const struct brw_compiler *compiler = brw->intelScreen->compiler;
407
408 const bool stage_exists[MESA_SHADER_STAGES] = {
409 [MESA_SHADER_VERTEX] = true,
410 [MESA_SHADER_TESS_CTRL] = brw->gen >= 7,
411 [MESA_SHADER_TESS_EVAL] = brw->gen >= 7,
412 [MESA_SHADER_GEOMETRY] = brw->gen >= 6,
413 [MESA_SHADER_FRAGMENT] = true,
414 [MESA_SHADER_COMPUTE] =
415 (ctx->API == API_OPENGL_CORE &&
416 ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
417 (ctx->API == API_OPENGLES2 &&
418 ctx->Const.MaxComputeWorkGroupSize[0] >= 128) ||
419 _mesa_extension_override_enables.ARB_compute_shader,
420 };
421
422 unsigned num_stages = 0;
423 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
424 if (stage_exists[i])
425 num_stages++;
426 }
427
428 unsigned max_samplers =
429 brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
430
431 ctx->Const.MaxDualSourceDrawBuffers = 1;
432 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
433 ctx->Const.MaxCombinedShaderOutputResources =
434 MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
435
436 ctx->Const.QueryCounterBits.Timestamp = 36;
437
438 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
439 ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
440 ctx->Const.MaxRenderbufferSize = 8192;
441 ctx->Const.MaxTextureLevels = MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS);
442 ctx->Const.Max3DTextureLevels = 12; /* 2048 */
443 ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
444 ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
445 ctx->Const.MaxTextureMbytes = 1536;
446 ctx->Const.MaxTextureRectSize = 1 << 12;
447 ctx->Const.MaxTextureMaxAnisotropy = 16.0;
448 ctx->Const.StripTextureBorder = true;
449 if (brw->gen >= 7)
450 ctx->Const.MaxProgramTextureGatherComponents = 4;
451 else if (brw->gen == 6)
452 ctx->Const.MaxProgramTextureGatherComponents = 1;
453
454 ctx->Const.MaxUniformBlockSize = 65536;
455
456 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
457 struct gl_program_constants *prog = &ctx->Const.Program[i];
458
459 if (!stage_exists[i])
460 continue;
461
462 prog->MaxTextureImageUnits = max_samplers;
463
464 prog->MaxUniformBlocks = BRW_MAX_UBO;
465 prog->MaxCombinedUniformComponents =
466 prog->MaxUniformComponents +
467 ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
468
469 prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
470 prog->MaxAtomicBuffers = BRW_MAX_ABO;
471 prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
472 prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
473 }
474
475 ctx->Const.MaxTextureUnits =
476 MIN2(ctx->Const.MaxTextureCoordUnits,
477 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
478
479 ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
480 ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
481 ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
482 ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
483 ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
484 ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
485 ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
486
487
488 /* Hardware only supports a limited number of transform feedback buffers.
489 * So we need to override the Mesa default (which is based only on software
490 * limits).
491 */
492 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
493
494 /* On Gen6, in the worst case, we use up one binding table entry per
495 * transform feedback component (see comments above the definition of
496 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
497 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
498 * BRW_MAX_SOL_BINDINGS.
499 *
500 * In "separate components" mode, we need to divide this value by
501 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
502 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
503 */
504 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
505 ctx->Const.MaxTransformFeedbackSeparateComponents =
506 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
507
508 ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
509
510 int max_samples;
511 const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
512 const int clamp_max_samples =
513 driQueryOptioni(&brw->optionCache, "clamp_max_samples");
514
515 if (clamp_max_samples < 0) {
516 max_samples = msaa_modes[0];
517 } else {
518 /* Select the largest supported MSAA mode that does not exceed
519 * clamp_max_samples.
520 */
521 max_samples = 0;
522 for (int i = 0; msaa_modes[i] != 0; ++i) {
523 if (msaa_modes[i] <= clamp_max_samples) {
524 max_samples = msaa_modes[i];
525 break;
526 }
527 }
528 }
529
530 ctx->Const.MaxSamples = max_samples;
531 ctx->Const.MaxColorTextureSamples = max_samples;
532 ctx->Const.MaxDepthTextureSamples = max_samples;
533 ctx->Const.MaxIntegerSamples = max_samples;
534 ctx->Const.MaxImageSamples = 0;
535
536 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
537 * to map indices of rectangular grid to sample numbers within a pixel.
538 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
539 * extension implementation. For more details see the comment above
540 * gen6_set_sample_maps() definition.
541 */
542 gen6_set_sample_maps(ctx);
543
544 ctx->Const.MinLineWidth = 1.0;
545 ctx->Const.MinLineWidthAA = 1.0;
546 if (brw->gen >= 6) {
547 ctx->Const.MaxLineWidth = 7.375;
548 ctx->Const.MaxLineWidthAA = 7.375;
549 ctx->Const.LineWidthGranularity = 0.125;
550 } else {
551 ctx->Const.MaxLineWidth = 7.0;
552 ctx->Const.MaxLineWidthAA = 7.0;
553 ctx->Const.LineWidthGranularity = 0.5;
554 }
555
556 /* For non-antialiased lines, we have to round the line width to the
557 * nearest whole number. Make sure that we don't advertise a line
558 * width that, when rounded, will be beyond the actual hardware
559 * maximum.
560 */
561 assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
562
563 ctx->Const.MinPointSize = 1.0;
564 ctx->Const.MinPointSizeAA = 1.0;
565 ctx->Const.MaxPointSize = 255.0;
566 ctx->Const.MaxPointSizeAA = 255.0;
567 ctx->Const.PointSizeGranularity = 1.0;
568
569 if (brw->gen >= 5 || brw->is_g4x)
570 ctx->Const.MaxClipPlanes = 8;
571
572 ctx->Const.LowerTessLevel = true;
573
574 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
575 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
576 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
577 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
578 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
579 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
580 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
581 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
582 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
583 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
584 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
585 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
586 MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
587 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
588
589 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
590 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
591 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
592 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
593 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
594 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
595 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
596 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
597 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
598 MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
599 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
600
601 /* Fragment shaders use real, 32-bit twos-complement integers for all
602 * integer types.
603 */
604 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
605 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
606 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
607 ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
608 ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
609
610 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
611 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
612 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
613 ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
614 ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
615
616 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
617 * but we're not sure how it's actually done for vertex order,
618 * that affect provoking vertex decision. Always use last vertex
619 * convention for quad primitive which works as expected for now.
620 */
621 if (brw->gen >= 6)
622 ctx->Const.QuadsFollowProvokingVertexConvention = false;
623
624 ctx->Const.NativeIntegers = true;
625 ctx->Const.VertexID_is_zero_based = true;
626
627 /* Regarding the CMP instruction, the Ivybridge PRM says:
628 *
629 * "For each enabled channel 0b or 1b is assigned to the appropriate flag
630 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
631 * 0xFFFFFFFF) is assigned to dst."
632 *
633 * but PRMs for earlier generations say
634 *
635 * "In dword format, one GRF may store up to 8 results. When the register
636 * is used later as a vector of Booleans, as only LSB at each channel
637 * contains meaning [sic] data, software should make sure all higher bits
638 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
639 *
640 * We select the representation of a true boolean uniform to be ~0, and fix
641 * the results of Gen <= 5 CMP instruction's with -(result & 1).
642 */
643 ctx->Const.UniformBooleanTrue = ~0;
644
645 /* From the gen4 PRM, volume 4 page 127:
646 *
647 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
648 * the base address of the first element of the surface, computed in
649 * software by adding the surface base address to the byte offset of
650 * the element in the buffer."
651 *
652 * However, unaligned accesses are slower, so enforce buffer alignment.
653 */
654 ctx->Const.UniformBufferOffsetAlignment = 16;
655
656 /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
657 * that we can safely have the CPU and GPU writing the same SSBO on
658 * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
659 * writes, so there's no problem. For an SSBO, the GPU and the CPU can
660 * be updating disjoint regions of the buffer simultaneously and that will
661 * break if the regions overlap the same cacheline.
662 */
663 ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
664 ctx->Const.TextureBufferOffsetAlignment = 16;
665 ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
666
667 if (brw->gen >= 6) {
668 ctx->Const.MaxVarying = 32;
669 ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
670 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
671 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
672 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
673 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
674 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
675 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
676 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
677 }
678
679 /* We want the GLSL compiler to emit code that uses condition codes */
680 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
681 ctx->Const.ShaderCompilerOptions[i] =
682 brw->intelScreen->compiler->glsl_compiler_options[i];
683 }
684
685 /* ARB_viewport_array */
686 if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
687 ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
688 ctx->Const.ViewportSubpixelBits = 0;
689
690 /* Cast to float before negating because MaxViewportWidth is unsigned.
691 */
692 ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
693 ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
694 }
695
696 /* ARB_gpu_shader5 */
697 if (brw->gen >= 7)
698 ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
699
700 /* ARB_framebuffer_no_attachments */
701 ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth;
702 ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight;
703 ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
704 ctx->Const.MaxFramebufferSamples = max_samples;
705 }
706
707 static void
708 brw_initialize_cs_context_constants(struct brw_context *brw, unsigned max_threads)
709 {
710 struct gl_context *ctx = &brw->ctx;
711
712 /* For ES, we set these constants based on SIMD8.
713 *
714 * TODO: Once we can always generate SIMD16, we should update this.
715 *
716 * For GL, we assume we can generate a SIMD16 program, but this currently
717 * is not always true. This allows us to run more test cases, and will be
718 * required based on desktop GL compute shader requirements.
719 */
720 const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
721
722 const uint32_t max_invocations = simd_size * max_threads;
723 ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
724 ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
725 ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
726 ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
727 ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
728 }
729
730 /**
731 * Process driconf (drirc) options, setting appropriate context flags.
732 *
733 * intelInitExtensions still pokes at optionCache directly, in order to
734 * avoid advertising various extensions. No flags are set, so it makes
735 * sense to continue doing that there.
736 */
737 static void
738 brw_process_driconf_options(struct brw_context *brw)
739 {
740 struct gl_context *ctx = &brw->ctx;
741
742 driOptionCache *options = &brw->optionCache;
743 driParseConfigFiles(options, &brw->intelScreen->optionCache,
744 brw->driContext->driScreenPriv->myNum, "i965");
745
746 int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
747 switch (bo_reuse_mode) {
748 case DRI_CONF_BO_REUSE_DISABLED:
749 break;
750 case DRI_CONF_BO_REUSE_ALL:
751 intel_bufmgr_gem_enable_reuse(brw->bufmgr);
752 break;
753 }
754
755 if (!driQueryOptionb(options, "hiz")) {
756 brw->has_hiz = false;
757 /* On gen6, you can only do separate stencil with HIZ. */
758 if (brw->gen == 6)
759 brw->has_separate_stencil = false;
760 }
761
762 if (driQueryOptionb(options, "always_flush_batch")) {
763 fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
764 brw->always_flush_batch = true;
765 }
766
767 if (driQueryOptionb(options, "always_flush_cache")) {
768 fprintf(stderr, "flushing GPU caches before/after each draw call\n");
769 brw->always_flush_cache = true;
770 }
771
772 if (driQueryOptionb(options, "disable_throttling")) {
773 fprintf(stderr, "disabling flush throttling\n");
774 brw->disable_throttling = true;
775 }
776
777 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
778
779 ctx->Const.ForceGLSLExtensionsWarn =
780 driQueryOptionb(options, "force_glsl_extensions_warn");
781
782 ctx->Const.DisableGLSLLineContinuations =
783 driQueryOptionb(options, "disable_glsl_line_continuations");
784
785 ctx->Const.AllowGLSLExtensionDirectiveMidShader =
786 driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
787
788 brw->dual_color_blend_by_location =
789 driQueryOptionb(options, "dual_color_blend_by_location");
790 }
791
792 GLboolean
793 brwCreateContext(gl_api api,
794 const struct gl_config *mesaVis,
795 __DRIcontext *driContextPriv,
796 unsigned major_version,
797 unsigned minor_version,
798 uint32_t flags,
799 bool notify_reset,
800 unsigned *dri_ctx_error,
801 void *sharedContextPrivate)
802 {
803 __DRIscreen *sPriv = driContextPriv->driScreenPriv;
804 struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
805 struct intel_screen *screen = sPriv->driverPrivate;
806 const struct brw_device_info *devinfo = screen->devinfo;
807 struct dd_function_table functions;
808
809 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
810 * provides us with context reset notifications.
811 */
812 uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
813 | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
814
815 if (screen->has_context_reset_notification)
816 allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
817
818 if (flags & ~allowed_flags) {
819 *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
820 return false;
821 }
822
823 struct brw_context *brw = rzalloc(NULL, struct brw_context);
824 if (!brw) {
825 fprintf(stderr, "%s: failed to alloc context\n", __func__);
826 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
827 return false;
828 }
829
830 driContextPriv->driverPrivate = brw;
831 brw->driContext = driContextPriv;
832 brw->intelScreen = screen;
833 brw->bufmgr = screen->bufmgr;
834
835 brw->gen = devinfo->gen;
836 brw->gt = devinfo->gt;
837 brw->is_g4x = devinfo->is_g4x;
838 brw->is_baytrail = devinfo->is_baytrail;
839 brw->is_haswell = devinfo->is_haswell;
840 brw->is_cherryview = devinfo->is_cherryview;
841 brw->is_broxton = devinfo->is_broxton;
842 brw->has_llc = devinfo->has_llc;
843 brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
844 brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
845 brw->has_pln = devinfo->has_pln;
846 brw->has_compr4 = devinfo->has_compr4;
847 brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
848 brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
849 brw->needs_unlit_centroid_workaround =
850 devinfo->needs_unlit_centroid_workaround;
851
852 brw->must_use_separate_stencil = devinfo->must_use_separate_stencil;
853 brw->has_swizzling = screen->hw_has_swizzling;
854
855 brw->vs.base.stage = MESA_SHADER_VERTEX;
856 brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
857 brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
858 brw->gs.base.stage = MESA_SHADER_GEOMETRY;
859 brw->wm.base.stage = MESA_SHADER_FRAGMENT;
860 if (brw->gen >= 8) {
861 gen8_init_vtable_surface_functions(brw);
862 brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
863 } else if (brw->gen >= 7) {
864 gen7_init_vtable_surface_functions(brw);
865 brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
866 } else if (brw->gen >= 6) {
867 gen6_init_vtable_surface_functions(brw);
868 brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
869 } else {
870 gen4_init_vtable_surface_functions(brw);
871 brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
872 }
873
874 brw_init_driver_functions(brw, &functions);
875
876 if (notify_reset)
877 functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
878
879 struct gl_context *ctx = &brw->ctx;
880
881 if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
882 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
883 fprintf(stderr, "%s: failed to init mesa context\n", __func__);
884 intelDestroyContext(driContextPriv);
885 return false;
886 }
887
888 driContextSetFlags(ctx, flags);
889
890 /* Initialize the software rasterizer and helper modules.
891 *
892 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
893 * software fallbacks (which we have to support on legacy GL to do weird
894 * glDrawPixels(), glBitmap(), and other functions).
895 */
896 if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
897 _swrast_CreateContext(ctx);
898 }
899
900 _vbo_CreateContext(ctx);
901 if (ctx->swrast_context) {
902 _tnl_CreateContext(ctx);
903 TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
904 _swsetup_CreateContext(ctx);
905
906 /* Configure swrast to match hardware characteristics: */
907 _swrast_allow_pixel_fog(ctx, false);
908 _swrast_allow_vertex_fog(ctx, true);
909 }
910
911 _mesa_meta_init(ctx);
912
913 brw_process_driconf_options(brw);
914
915 if (INTEL_DEBUG & DEBUG_PERF)
916 brw->perf_debug = true;
917
918 brw_initialize_cs_context_constants(brw, devinfo->max_cs_threads);
919 brw_initialize_context_constants(brw);
920
921 ctx->Const.ResetStrategy = notify_reset
922 ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
923
924 /* Reinitialize the context point state. It depends on ctx->Const values. */
925 _mesa_init_point(ctx);
926
927 intel_fbo_init(brw);
928
929 intel_batchbuffer_init(brw);
930
931 if (brw->gen >= 6) {
932 /* Create a new hardware context. Using a hardware context means that
933 * our GPU state will be saved/restored on context switch, allowing us
934 * to assume that the GPU is in the same state we left it in.
935 *
936 * This is required for transform feedback buffer offsets, query objects,
937 * and also allows us to reduce how much state we have to emit.
938 */
939 brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
940
941 if (!brw->hw_ctx) {
942 fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
943 intelDestroyContext(driContextPriv);
944 return false;
945 }
946 }
947
948 if (brw_init_pipe_control(brw, devinfo)) {
949 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
950 intelDestroyContext(driContextPriv);
951 return false;
952 }
953
954 brw_init_state(brw);
955
956 intelInitExtensions(ctx);
957
958 brw_init_surface_formats(brw);
959
960 brw->max_vs_threads = devinfo->max_vs_threads;
961 brw->max_hs_threads = devinfo->max_hs_threads;
962 brw->max_ds_threads = devinfo->max_ds_threads;
963 brw->max_gs_threads = devinfo->max_gs_threads;
964 brw->max_wm_threads = devinfo->max_wm_threads;
965 brw->max_cs_threads = devinfo->max_cs_threads;
966 brw->urb.size = devinfo->urb.size;
967 brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
968 brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
969 brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
970 brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
971 brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
972
973 /* Estimate the size of the mappable aperture into the GTT. There's an
974 * ioctl to get the whole GTT size, but not one to get the mappable subset.
975 * It turns out it's basically always 256MB, though some ancient hardware
976 * was smaller.
977 */
978 uint32_t gtt_size = 256 * 1024 * 1024;
979
980 /* We don't want to map two objects such that a memcpy between them would
981 * just fault one mapping in and then the other over and over forever. So
982 * we would need to divide the GTT size by 2. Additionally, some GTT is
983 * taken up by things like the framebuffer and the ringbuffer and such, so
984 * be more conservative.
985 */
986 brw->max_gtt_map_object_size = gtt_size / 4;
987
988 if (brw->gen == 6)
989 brw->urb.gs_present = false;
990
991 brw->prim_restart.in_progress = false;
992 brw->prim_restart.enable_cut_index = false;
993 brw->gs.enabled = false;
994 brw->sf.viewport_transform_enable = true;
995
996 brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
997
998 brw->use_resource_streamer = screen->has_resource_streamer &&
999 (env_var_as_boolean("INTEL_USE_HW_BT", false) ||
1000 env_var_as_boolean("INTEL_USE_GATHER", false));
1001
1002 ctx->VertexProgram._MaintainTnlProgram = true;
1003 ctx->FragmentProgram._MaintainTexEnvProgram = true;
1004
1005 brw_draw_init( brw );
1006
1007 if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
1008 /* Turn on some extra GL_ARB_debug_output generation. */
1009 brw->perf_debug = true;
1010 }
1011
1012 if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
1013 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
1014
1015 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1016 brw_init_shader_time(brw);
1017
1018 _mesa_compute_version(ctx);
1019
1020 _mesa_initialize_dispatch_tables(ctx);
1021 _mesa_initialize_vbo_vtxfmt(ctx);
1022
1023 if (ctx->Extensions.AMD_performance_monitor) {
1024 brw_init_performance_monitors(brw);
1025 }
1026
1027 vbo_use_buffer_objects(ctx);
1028 vbo_always_unmap_buffers(ctx);
1029
1030 return true;
1031 }
1032
1033 void
1034 intelDestroyContext(__DRIcontext * driContextPriv)
1035 {
1036 struct brw_context *brw =
1037 (struct brw_context *) driContextPriv->driverPrivate;
1038 struct gl_context *ctx = &brw->ctx;
1039
1040 /* Dump a final BMP in case the application doesn't call SwapBuffers */
1041 if (INTEL_DEBUG & DEBUG_AUB) {
1042 intel_batchbuffer_flush(brw);
1043 aub_dump_bmp(&brw->ctx);
1044 }
1045
1046 _mesa_meta_free(&brw->ctx);
1047 brw_meta_fast_clear_free(brw);
1048
1049 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1050 /* Force a report. */
1051 brw->shader_time.report_time = 0;
1052
1053 brw_collect_and_report_shader_time(brw);
1054 brw_destroy_shader_time(brw);
1055 }
1056
1057 brw_destroy_state(brw);
1058 brw_draw_destroy(brw);
1059
1060 drm_intel_bo_unreference(brw->curbe.curbe_bo);
1061 if (brw->vs.base.scratch_bo)
1062 drm_intel_bo_unreference(brw->vs.base.scratch_bo);
1063 if (brw->gs.base.scratch_bo)
1064 drm_intel_bo_unreference(brw->gs.base.scratch_bo);
1065 if (brw->wm.base.scratch_bo)
1066 drm_intel_bo_unreference(brw->wm.base.scratch_bo);
1067
1068 gen7_reset_hw_bt_pool_offsets(brw);
1069 drm_intel_bo_unreference(brw->hw_bt_pool.bo);
1070 brw->hw_bt_pool.bo = NULL;
1071
1072 drm_intel_gem_context_destroy(brw->hw_ctx);
1073
1074 if (ctx->swrast_context) {
1075 _swsetup_DestroyContext(&brw->ctx);
1076 _tnl_DestroyContext(&brw->ctx);
1077 }
1078 _vbo_DestroyContext(&brw->ctx);
1079
1080 if (ctx->swrast_context)
1081 _swrast_DestroyContext(&brw->ctx);
1082
1083 brw_fini_pipe_control(brw);
1084 intel_batchbuffer_free(brw);
1085
1086 drm_intel_bo_unreference(brw->throttle_batch[1]);
1087 drm_intel_bo_unreference(brw->throttle_batch[0]);
1088 brw->throttle_batch[1] = NULL;
1089 brw->throttle_batch[0] = NULL;
1090
1091 driDestroyOptionCache(&brw->optionCache);
1092
1093 /* free the Mesa context */
1094 _mesa_free_context_data(&brw->ctx);
1095
1096 ralloc_free(brw);
1097 driContextPriv->driverPrivate = NULL;
1098 }
1099
1100 GLboolean
1101 intelUnbindContext(__DRIcontext * driContextPriv)
1102 {
1103 /* Unset current context and dispath table */
1104 _mesa_make_current(NULL, NULL, NULL);
1105
1106 return true;
1107 }
1108
1109 /**
1110 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1111 * on window system framebuffers.
1112 *
1113 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1114 * your renderbuffer can do sRGB encode, and you can flip a switch that does
1115 * sRGB encode if the renderbuffer can handle it. You can ask specifically
1116 * for a visual where you're guaranteed to be capable, but it turns out that
1117 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1118 * incapable ones, because there's no difference between the two in resources
1119 * used. Applications thus get built that accidentally rely on the default
1120 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds
1121 * great...
1122 *
1123 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1124 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1125 * So they removed the enable knob and made it "if the renderbuffer is sRGB
1126 * capable, do sRGB encode". Then, for your window system renderbuffers, you
1127 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1128 * and get no sRGB encode (assuming that both kinds of visual are available).
1129 * Thus our choice to support sRGB by default on our visuals for desktop would
1130 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1131 *
1132 * Unfortunately, renderbuffer setup happens before a context is created. So
1133 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1134 * context (without an sRGB visual, though we don't have sRGB visuals exposed
1135 * yet), we go turn that back off before anyone finds out.
1136 */
1137 static void
1138 intel_gles3_srgb_workaround(struct brw_context *brw,
1139 struct gl_framebuffer *fb)
1140 {
1141 struct gl_context *ctx = &brw->ctx;
1142
1143 if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1144 return;
1145
1146 /* Some day when we support the sRGB capable bit on visuals available for
1147 * GLES, we'll need to respect that and not disable things here.
1148 */
1149 fb->Visual.sRGBCapable = false;
1150 for (int i = 0; i < BUFFER_COUNT; i++) {
1151 if (fb->Attachment[i].Renderbuffer &&
1152 fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
1153 fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
1154 }
1155 }
1156 }
1157
1158 GLboolean
1159 intelMakeCurrent(__DRIcontext * driContextPriv,
1160 __DRIdrawable * driDrawPriv,
1161 __DRIdrawable * driReadPriv)
1162 {
1163 struct brw_context *brw;
1164 GET_CURRENT_CONTEXT(curCtx);
1165
1166 if (driContextPriv)
1167 brw = (struct brw_context *) driContextPriv->driverPrivate;
1168 else
1169 brw = NULL;
1170
1171 /* According to the glXMakeCurrent() man page: "Pending commands to
1172 * the previous context, if any, are flushed before it is released."
1173 * But only flush if we're actually changing contexts.
1174 */
1175 if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1176 _mesa_flush(curCtx);
1177 }
1178
1179 if (driContextPriv) {
1180 struct gl_context *ctx = &brw->ctx;
1181 struct gl_framebuffer *fb, *readFb;
1182
1183 if (driDrawPriv == NULL) {
1184 fb = _mesa_get_incomplete_framebuffer();
1185 } else {
1186 fb = driDrawPriv->driverPrivate;
1187 driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1188 }
1189
1190 if (driReadPriv == NULL) {
1191 readFb = _mesa_get_incomplete_framebuffer();
1192 } else {
1193 readFb = driReadPriv->driverPrivate;
1194 driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1195 }
1196
1197 /* The sRGB workaround changes the renderbuffer's format. We must change
1198 * the format before the renderbuffer's miptree get's allocated, otherwise
1199 * the formats of the renderbuffer and its miptree will differ.
1200 */
1201 intel_gles3_srgb_workaround(brw, fb);
1202 intel_gles3_srgb_workaround(brw, readFb);
1203
1204 /* If the context viewport hasn't been initialized, force a call out to
1205 * the loader to get buffers so we have a drawable size for the initial
1206 * viewport. */
1207 if (!brw->ctx.ViewportInitialized)
1208 intel_prepare_render(brw);
1209
1210 _mesa_make_current(ctx, fb, readFb);
1211 } else {
1212 _mesa_make_current(NULL, NULL, NULL);
1213 }
1214
1215 return true;
1216 }
1217
1218 void
1219 intel_resolve_for_dri2_flush(struct brw_context *brw,
1220 __DRIdrawable *drawable)
1221 {
1222 if (brw->gen < 6) {
1223 /* MSAA and fast color clear are not supported, so don't waste time
1224 * checking whether a resolve is needed.
1225 */
1226 return;
1227 }
1228
1229 struct gl_framebuffer *fb = drawable->driverPrivate;
1230 struct intel_renderbuffer *rb;
1231
1232 /* Usually, only the back buffer will need to be downsampled. However,
1233 * the front buffer will also need it if the user has rendered into it.
1234 */
1235 static const gl_buffer_index buffers[2] = {
1236 BUFFER_BACK_LEFT,
1237 BUFFER_FRONT_LEFT,
1238 };
1239
1240 for (int i = 0; i < 2; ++i) {
1241 rb = intel_get_renderbuffer(fb, buffers[i]);
1242 if (rb == NULL || rb->mt == NULL)
1243 continue;
1244 if (rb->mt->num_samples <= 1)
1245 intel_miptree_resolve_color(brw, rb->mt, 0);
1246 else
1247 intel_renderbuffer_downsample(brw, rb);
1248 }
1249 }
1250
1251 static unsigned
1252 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1253 {
1254 return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1255 }
1256
1257 static void
1258 intel_query_dri2_buffers(struct brw_context *brw,
1259 __DRIdrawable *drawable,
1260 __DRIbuffer **buffers,
1261 int *count);
1262
1263 static void
1264 intel_process_dri2_buffer(struct brw_context *brw,
1265 __DRIdrawable *drawable,
1266 __DRIbuffer *buffer,
1267 struct intel_renderbuffer *rb,
1268 const char *buffer_name);
1269
1270 static void
1271 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1272
1273 static void
1274 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1275 {
1276 struct gl_framebuffer *fb = drawable->driverPrivate;
1277 struct intel_renderbuffer *rb;
1278 __DRIbuffer *buffers = NULL;
1279 int i, count;
1280 const char *region_name;
1281
1282 /* Set this up front, so that in case our buffers get invalidated
1283 * while we're getting new buffers, we don't clobber the stamp and
1284 * thus ignore the invalidate. */
1285 drawable->lastStamp = drawable->dri2.stamp;
1286
1287 if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1288 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1289
1290 intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1291
1292 if (buffers == NULL)
1293 return;
1294
1295 for (i = 0; i < count; i++) {
1296 switch (buffers[i].attachment) {
1297 case __DRI_BUFFER_FRONT_LEFT:
1298 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1299 region_name = "dri2 front buffer";
1300 break;
1301
1302 case __DRI_BUFFER_FAKE_FRONT_LEFT:
1303 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1304 region_name = "dri2 fake front buffer";
1305 break;
1306
1307 case __DRI_BUFFER_BACK_LEFT:
1308 rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1309 region_name = "dri2 back buffer";
1310 break;
1311
1312 case __DRI_BUFFER_DEPTH:
1313 case __DRI_BUFFER_HIZ:
1314 case __DRI_BUFFER_DEPTH_STENCIL:
1315 case __DRI_BUFFER_STENCIL:
1316 case __DRI_BUFFER_ACCUM:
1317 default:
1318 fprintf(stderr,
1319 "unhandled buffer attach event, attachment type %d\n",
1320 buffers[i].attachment);
1321 return;
1322 }
1323
1324 intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1325 }
1326
1327 }
1328
1329 void
1330 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1331 {
1332 struct brw_context *brw = context->driverPrivate;
1333 __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1334
1335 /* Set this up front, so that in case our buffers get invalidated
1336 * while we're getting new buffers, we don't clobber the stamp and
1337 * thus ignore the invalidate. */
1338 drawable->lastStamp = drawable->dri2.stamp;
1339
1340 if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1341 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1342
1343 if (screen->image.loader)
1344 intel_update_image_buffers(brw, drawable);
1345 else
1346 intel_update_dri2_buffers(brw, drawable);
1347
1348 driUpdateFramebufferSize(&brw->ctx, drawable);
1349 }
1350
1351 /**
1352 * intel_prepare_render should be called anywhere that curent read/drawbuffer
1353 * state is required.
1354 */
1355 void
1356 intel_prepare_render(struct brw_context *brw)
1357 {
1358 struct gl_context *ctx = &brw->ctx;
1359 __DRIcontext *driContext = brw->driContext;
1360 __DRIdrawable *drawable;
1361
1362 drawable = driContext->driDrawablePriv;
1363 if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1364 if (drawable->lastStamp != drawable->dri2.stamp)
1365 intel_update_renderbuffers(driContext, drawable);
1366 driContext->dri2.draw_stamp = drawable->dri2.stamp;
1367 }
1368
1369 drawable = driContext->driReadablePriv;
1370 if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1371 if (drawable->lastStamp != drawable->dri2.stamp)
1372 intel_update_renderbuffers(driContext, drawable);
1373 driContext->dri2.read_stamp = drawable->dri2.stamp;
1374 }
1375
1376 /* If we're currently rendering to the front buffer, the rendering
1377 * that will happen next will probably dirty the front buffer. So
1378 * mark it as dirty here.
1379 */
1380 if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1381 brw->front_buffer_dirty = true;
1382 }
1383
1384 /**
1385 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1386 *
1387 * To determine which DRI buffers to request, examine the renderbuffers
1388 * attached to the drawable's framebuffer. Then request the buffers with
1389 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1390 *
1391 * This is called from intel_update_renderbuffers().
1392 *
1393 * \param drawable Drawable whose buffers are queried.
1394 * \param buffers [out] List of buffers returned by DRI2 query.
1395 * \param buffer_count [out] Number of buffers returned.
1396 *
1397 * \see intel_update_renderbuffers()
1398 * \see DRI2GetBuffers()
1399 * \see DRI2GetBuffersWithFormat()
1400 */
1401 static void
1402 intel_query_dri2_buffers(struct brw_context *brw,
1403 __DRIdrawable *drawable,
1404 __DRIbuffer **buffers,
1405 int *buffer_count)
1406 {
1407 __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1408 struct gl_framebuffer *fb = drawable->driverPrivate;
1409 int i = 0;
1410 unsigned attachments[8];
1411
1412 struct intel_renderbuffer *front_rb;
1413 struct intel_renderbuffer *back_rb;
1414
1415 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1416 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1417
1418 memset(attachments, 0, sizeof(attachments));
1419 if ((_mesa_is_front_buffer_drawing(fb) ||
1420 _mesa_is_front_buffer_reading(fb) ||
1421 !back_rb) && front_rb) {
1422 /* If a fake front buffer is in use, then querying for
1423 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1424 * the real front buffer to the fake front buffer. So before doing the
1425 * query, we need to make sure all the pending drawing has landed in the
1426 * real front buffer.
1427 */
1428 intel_batchbuffer_flush(brw);
1429 intel_flush_front(&brw->ctx);
1430
1431 attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1432 attachments[i++] = intel_bits_per_pixel(front_rb);
1433 } else if (front_rb && brw->front_buffer_dirty) {
1434 /* We have pending front buffer rendering, but we aren't querying for a
1435 * front buffer. If the front buffer we have is a fake front buffer,
1436 * the X server is going to throw it away when it processes the query.
1437 * So before doing the query, make sure all the pending drawing has
1438 * landed in the real front buffer.
1439 */
1440 intel_batchbuffer_flush(brw);
1441 intel_flush_front(&brw->ctx);
1442 }
1443
1444 if (back_rb) {
1445 attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1446 attachments[i++] = intel_bits_per_pixel(back_rb);
1447 }
1448
1449 assert(i <= ARRAY_SIZE(attachments));
1450
1451 *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1452 &drawable->w,
1453 &drawable->h,
1454 attachments, i / 2,
1455 buffer_count,
1456 drawable->loaderPrivate);
1457 }
1458
1459 /**
1460 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1461 *
1462 * This is called from intel_update_renderbuffers().
1463 *
1464 * \par Note:
1465 * DRI buffers whose attachment point is DRI2BufferStencil or
1466 * DRI2BufferDepthStencil are handled as special cases.
1467 *
1468 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1469 * that is passed to drm_intel_bo_gem_create_from_name().
1470 *
1471 * \see intel_update_renderbuffers()
1472 */
1473 static void
1474 intel_process_dri2_buffer(struct brw_context *brw,
1475 __DRIdrawable *drawable,
1476 __DRIbuffer *buffer,
1477 struct intel_renderbuffer *rb,
1478 const char *buffer_name)
1479 {
1480 struct gl_framebuffer *fb = drawable->driverPrivate;
1481 drm_intel_bo *bo;
1482
1483 if (!rb)
1484 return;
1485
1486 unsigned num_samples = rb->Base.Base.NumSamples;
1487
1488 /* We try to avoid closing and reopening the same BO name, because the first
1489 * use of a mapping of the buffer involves a bunch of page faulting which is
1490 * moderately expensive.
1491 */
1492 struct intel_mipmap_tree *last_mt;
1493 if (num_samples == 0)
1494 last_mt = rb->mt;
1495 else
1496 last_mt = rb->singlesample_mt;
1497
1498 uint32_t old_name = 0;
1499 if (last_mt) {
1500 /* The bo already has a name because the miptree was created by a
1501 * previous call to intel_process_dri2_buffer(). If a bo already has a
1502 * name, then drm_intel_bo_flink() is a low-cost getter. It does not
1503 * create a new name.
1504 */
1505 drm_intel_bo_flink(last_mt->bo, &old_name);
1506 }
1507
1508 if (old_name == buffer->name)
1509 return;
1510
1511 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1512 fprintf(stderr,
1513 "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1514 buffer->name, buffer->attachment,
1515 buffer->cpp, buffer->pitch);
1516 }
1517
1518 bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1519 buffer->name);
1520 if (!bo) {
1521 fprintf(stderr,
1522 "Failed to open BO for returned DRI2 buffer "
1523 "(%dx%d, %s, named %d).\n"
1524 "This is likely a bug in the X Server that will lead to a "
1525 "crash soon.\n",
1526 drawable->w, drawable->h, buffer_name, buffer->name);
1527 return;
1528 }
1529
1530 intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1531 drawable->w, drawable->h,
1532 buffer->pitch);
1533
1534 if (_mesa_is_front_buffer_drawing(fb) &&
1535 (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1536 buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1537 rb->Base.Base.NumSamples > 1) {
1538 intel_renderbuffer_upsample(brw, rb);
1539 }
1540
1541 assert(rb->mt);
1542
1543 drm_intel_bo_unreference(bo);
1544 }
1545
1546 /**
1547 * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1548 *
1549 * To determine which DRI buffers to request, examine the renderbuffers
1550 * attached to the drawable's framebuffer. Then request the buffers from
1551 * the image loader
1552 *
1553 * This is called from intel_update_renderbuffers().
1554 *
1555 * \param drawable Drawable whose buffers are queried.
1556 * \param buffers [out] List of buffers returned by DRI2 query.
1557 * \param buffer_count [out] Number of buffers returned.
1558 *
1559 * \see intel_update_renderbuffers()
1560 */
1561
1562 static void
1563 intel_update_image_buffer(struct brw_context *intel,
1564 __DRIdrawable *drawable,
1565 struct intel_renderbuffer *rb,
1566 __DRIimage *buffer,
1567 enum __DRIimageBufferMask buffer_type)
1568 {
1569 struct gl_framebuffer *fb = drawable->driverPrivate;
1570
1571 if (!rb || !buffer->bo)
1572 return;
1573
1574 unsigned num_samples = rb->Base.Base.NumSamples;
1575
1576 /* Check and see if we're already bound to the right
1577 * buffer object
1578 */
1579 struct intel_mipmap_tree *last_mt;
1580 if (num_samples == 0)
1581 last_mt = rb->mt;
1582 else
1583 last_mt = rb->singlesample_mt;
1584
1585 if (last_mt && last_mt->bo == buffer->bo)
1586 return;
1587
1588 intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1589 buffer->width, buffer->height,
1590 buffer->pitch);
1591
1592 if (_mesa_is_front_buffer_drawing(fb) &&
1593 buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1594 rb->Base.Base.NumSamples > 1) {
1595 intel_renderbuffer_upsample(intel, rb);
1596 }
1597 }
1598
1599 static void
1600 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1601 {
1602 struct gl_framebuffer *fb = drawable->driverPrivate;
1603 __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1604 struct intel_renderbuffer *front_rb;
1605 struct intel_renderbuffer *back_rb;
1606 struct __DRIimageList images;
1607 unsigned int format;
1608 uint32_t buffer_mask = 0;
1609
1610 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1611 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1612
1613 if (back_rb)
1614 format = intel_rb_format(back_rb);
1615 else if (front_rb)
1616 format = intel_rb_format(front_rb);
1617 else
1618 return;
1619
1620 if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1621 _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1622 buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1623 }
1624
1625 if (back_rb)
1626 buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1627
1628 (*screen->image.loader->getBuffers) (drawable,
1629 driGLFormatToImageFormat(format),
1630 &drawable->dri2.stamp,
1631 drawable->loaderPrivate,
1632 buffer_mask,
1633 &images);
1634
1635 if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1636 drawable->w = images.front->width;
1637 drawable->h = images.front->height;
1638 intel_update_image_buffer(brw,
1639 drawable,
1640 front_rb,
1641 images.front,
1642 __DRI_IMAGE_BUFFER_FRONT);
1643 }
1644 if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1645 drawable->w = images.back->width;
1646 drawable->h = images.back->height;
1647 intel_update_image_buffer(brw,
1648 drawable,
1649 back_rb,
1650 images.back,
1651 __DRI_IMAGE_BUFFER_BACK);
1652 }
1653 }