i965/program_cache: Cast the key to char * before adding key_size
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
1 /*
2 Copyright 2003 VMware, Inc.
3 Copyright (C) Intel Corp. 2006. All Rights Reserved.
4 Intel funded Tungsten Graphics to
5 develop this 3D driver.
6
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
14
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **********************************************************************/
28 /*
29 * Authors:
30 * Keith Whitwell <keithw@vmware.com>
31 */
32
33
34 #include "compiler/nir/nir.h"
35 #include "main/api_exec.h"
36 #include "main/context.h"
37 #include "main/fbobject.h"
38 #include "main/extensions.h"
39 #include "main/glthread.h"
40 #include "main/imports.h"
41 #include "main/macros.h"
42 #include "main/points.h"
43 #include "main/version.h"
44 #include "main/vtxfmt.h"
45 #include "main/texobj.h"
46 #include "main/framebuffer.h"
47 #include "main/stencil.h"
48 #include "main/state.h"
49
50 #include "vbo/vbo.h"
51
52 #include "drivers/common/driverfuncs.h"
53 #include "drivers/common/meta.h"
54 #include "utils.h"
55
56 #include "brw_context.h"
57 #include "brw_defines.h"
58 #include "brw_blorp.h"
59 #include "brw_draw.h"
60 #include "brw_state.h"
61
62 #include "intel_batchbuffer.h"
63 #include "intel_buffer_objects.h"
64 #include "intel_buffers.h"
65 #include "intel_fbo.h"
66 #include "intel_mipmap_tree.h"
67 #include "intel_pixel.h"
68 #include "intel_image.h"
69 #include "intel_tex.h"
70 #include "intel_tex_obj.h"
71
72 #include "swrast_setup/swrast_setup.h"
73 #include "tnl/tnl.h"
74 #include "tnl/t_pipeline.h"
75 #include "util/ralloc.h"
76 #include "util/debug.h"
77 #include "util/disk_cache.h"
78 #include "isl/isl.h"
79
80 #include "common/gen_defines.h"
81
82 #include "compiler/spirv/nir_spirv.h"
83 /***************************************
84 * Mesa's Driver Functions
85 ***************************************/
86
87 const char *const brw_vendor_string = "Intel Open Source Technology Center";
88
89 static const char *
90 get_bsw_model(const struct intel_screen *screen)
91 {
92 switch (screen->eu_total) {
93 case 16:
94 return "405";
95 case 12:
96 return "400";
97 default:
98 return " ";
99 }
100 }
101
102 const char *
103 brw_get_renderer_string(const struct intel_screen *screen)
104 {
105 const char *chipset;
106 static char buffer[128];
107 char *bsw = NULL;
108
109 switch (screen->deviceID) {
110 #undef CHIPSET
111 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
112 #include "pci_ids/i965_pci_ids.h"
113 default:
114 chipset = "Unknown Intel Chipset";
115 break;
116 }
117
118 /* Braswell branding is funny, so we have to fix it up here */
119 if (screen->deviceID == 0x22B1) {
120 bsw = strdup(chipset);
121 char *needle = strstr(bsw, "XXX");
122 if (needle) {
123 memcpy(needle, get_bsw_model(screen), 3);
124 chipset = bsw;
125 }
126 }
127
128 (void) driGetRendererString(buffer, chipset, 0);
129 free(bsw);
130 return buffer;
131 }
132
133 static const GLubyte *
134 intel_get_string(struct gl_context * ctx, GLenum name)
135 {
136 const struct brw_context *const brw = brw_context(ctx);
137
138 switch (name) {
139 case GL_VENDOR:
140 return (GLubyte *) brw_vendor_string;
141
142 case GL_RENDERER:
143 return
144 (GLubyte *) brw_get_renderer_string(brw->screen);
145
146 default:
147 return NULL;
148 }
149 }
150
151 static void
152 brw_set_background_context(struct gl_context *ctx,
153 struct util_queue_monitoring *queue_info)
154 {
155 struct brw_context *brw = brw_context(ctx);
156 __DRIcontext *driContext = brw->driContext;
157 __DRIscreen *driScreen = driContext->driScreenPriv;
158 const __DRIbackgroundCallableExtension *backgroundCallable =
159 driScreen->dri2.backgroundCallable;
160
161 /* Note: Mesa will only call this function if we've called
162 * _mesa_enable_multithreading(). We only do that if the loader exposed
163 * the __DRI_BACKGROUND_CALLABLE extension. So we know that
164 * backgroundCallable is not NULL.
165 */
166 backgroundCallable->setBackgroundContext(driContext->loaderPrivate);
167 }
168
169 static void
170 intel_viewport(struct gl_context *ctx)
171 {
172 struct brw_context *brw = brw_context(ctx);
173 __DRIcontext *driContext = brw->driContext;
174
175 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
176 if (driContext->driDrawablePriv)
177 dri2InvalidateDrawable(driContext->driDrawablePriv);
178 if (driContext->driReadablePriv)
179 dri2InvalidateDrawable(driContext->driReadablePriv);
180 }
181 }
182
183 static void
184 intel_update_framebuffer(struct gl_context *ctx,
185 struct gl_framebuffer *fb)
186 {
187 struct brw_context *brw = brw_context(ctx);
188
189 /* Quantize the derived default number of samples
190 */
191 fb->DefaultGeometry._NumSamples =
192 intel_quantize_num_samples(brw->screen,
193 fb->DefaultGeometry.NumSamples);
194 }
195
196 static void
197 intel_update_state(struct gl_context * ctx)
198 {
199 GLuint new_state = ctx->NewState;
200 struct brw_context *brw = brw_context(ctx);
201
202 if (ctx->swrast_context)
203 _swrast_InvalidateState(ctx, new_state);
204
205 brw->NewGLState |= new_state;
206
207 if (new_state & (_NEW_SCISSOR | _NEW_BUFFERS | _NEW_VIEWPORT))
208 _mesa_update_draw_buffer_bounds(ctx, ctx->DrawBuffer);
209
210 if (new_state & (_NEW_STENCIL | _NEW_BUFFERS)) {
211 brw->stencil_enabled = _mesa_stencil_is_enabled(ctx);
212 brw->stencil_two_sided = _mesa_stencil_is_two_sided(ctx);
213 brw->stencil_write_enabled =
214 _mesa_stencil_is_write_enabled(ctx, brw->stencil_two_sided);
215 }
216
217 if (new_state & _NEW_POLYGON)
218 brw->polygon_front_bit = _mesa_polygon_get_front_bit(ctx);
219
220 if (new_state & _NEW_BUFFERS) {
221 intel_update_framebuffer(ctx, ctx->DrawBuffer);
222 if (ctx->DrawBuffer != ctx->ReadBuffer)
223 intel_update_framebuffer(ctx, ctx->ReadBuffer);
224 }
225 }
226
227 #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
228
229 static void
230 intel_flush_front(struct gl_context *ctx)
231 {
232 struct brw_context *brw = brw_context(ctx);
233 __DRIcontext *driContext = brw->driContext;
234 __DRIdrawable *driDrawable = driContext->driDrawablePriv;
235 __DRIscreen *const dri_screen = brw->screen->driScrnPriv;
236
237 if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
238 if (flushFront(dri_screen) && driDrawable &&
239 driDrawable->loaderPrivate) {
240
241 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
242 *
243 * This potentially resolves both front and back buffer. It
244 * is unnecessary to resolve the back, but harms nothing except
245 * performance. And no one cares about front-buffer render
246 * performance.
247 */
248 intel_resolve_for_dri2_flush(brw, driDrawable);
249 intel_batchbuffer_flush(brw);
250
251 flushFront(dri_screen)(driDrawable, driDrawable->loaderPrivate);
252
253 /* We set the dirty bit in intel_prepare_render() if we're
254 * front buffer rendering once we get there.
255 */
256 brw->front_buffer_dirty = false;
257 }
258 }
259 }
260
261 static void
262 brw_display_shared_buffer(struct brw_context *brw)
263 {
264 __DRIcontext *dri_context = brw->driContext;
265 __DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
266 __DRIscreen *dri_screen = brw->screen->driScrnPriv;
267 int fence_fd = -1;
268
269 if (!brw->is_shared_buffer_bound)
270 return;
271
272 if (!brw->is_shared_buffer_dirty)
273 return;
274
275 if (brw->screen->has_exec_fence) {
276 /* This function is always called during a flush operation, so there is
277 * no need to flush again here. But we want to provide a fence_fd to the
278 * loader, and a redundant flush is the easiest way to acquire one.
279 */
280 if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd))
281 return;
282 }
283
284 dri_screen->mutableRenderBuffer.loader
285 ->displaySharedBuffer(dri_drawable, fence_fd,
286 dri_drawable->loaderPrivate);
287 brw->is_shared_buffer_dirty = false;
288 }
289
290 static void
291 intel_glFlush(struct gl_context *ctx)
292 {
293 struct brw_context *brw = brw_context(ctx);
294
295 intel_batchbuffer_flush(brw);
296 intel_flush_front(ctx);
297 brw_display_shared_buffer(brw);
298 brw->need_flush_throttle = true;
299 }
300
301 static void
302 intel_finish(struct gl_context * ctx)
303 {
304 struct brw_context *brw = brw_context(ctx);
305
306 intel_glFlush(ctx);
307
308 if (brw->batch.last_bo)
309 brw_bo_wait_rendering(brw->batch.last_bo);
310 }
311
312 static void
313 brw_init_driver_functions(struct brw_context *brw,
314 struct dd_function_table *functions)
315 {
316 const struct gen_device_info *devinfo = &brw->screen->devinfo;
317
318 _mesa_init_driver_functions(functions);
319
320 /* GLX uses DRI2 invalidate events to handle window resizing.
321 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
322 * which doesn't provide a mechanism for snooping the event queues.
323 *
324 * So EGL still relies on viewport hacks to handle window resizing.
325 * This should go away with DRI3000.
326 */
327 if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
328 functions->Viewport = intel_viewport;
329
330 functions->Flush = intel_glFlush;
331 functions->Finish = intel_finish;
332 functions->GetString = intel_get_string;
333 functions->UpdateState = intel_update_state;
334
335 brw_init_draw_functions(functions);
336 intelInitTextureFuncs(functions);
337 intelInitTextureImageFuncs(functions);
338 intelInitTextureCopyImageFuncs(functions);
339 intelInitCopyImageFuncs(functions);
340 intelInitClearFuncs(functions);
341 intelInitBufferFuncs(functions);
342 intelInitPixelFuncs(functions);
343 intelInitBufferObjectFuncs(functions);
344 brw_init_syncobj_functions(functions);
345 brw_init_object_purgeable_functions(functions);
346
347 brwInitFragProgFuncs( functions );
348 brw_init_common_queryobj_functions(functions);
349 if (devinfo->gen >= 8 || devinfo->is_haswell)
350 hsw_init_queryobj_functions(functions);
351 else if (devinfo->gen >= 6)
352 gen6_init_queryobj_functions(functions);
353 else
354 gen4_init_queryobj_functions(functions);
355 brw_init_compute_functions(functions);
356 brw_init_conditional_render_functions(functions);
357
358 functions->GenerateMipmap = brw_generate_mipmap;
359
360 functions->QueryInternalFormat = brw_query_internal_format;
361
362 functions->NewTransformFeedback = brw_new_transform_feedback;
363 functions->DeleteTransformFeedback = brw_delete_transform_feedback;
364 if (can_do_mi_math_and_lrr(brw->screen)) {
365 functions->BeginTransformFeedback = hsw_begin_transform_feedback;
366 functions->EndTransformFeedback = hsw_end_transform_feedback;
367 functions->PauseTransformFeedback = hsw_pause_transform_feedback;
368 functions->ResumeTransformFeedback = hsw_resume_transform_feedback;
369 } else if (devinfo->gen >= 7) {
370 functions->BeginTransformFeedback = gen7_begin_transform_feedback;
371 functions->EndTransformFeedback = gen7_end_transform_feedback;
372 functions->PauseTransformFeedback = gen7_pause_transform_feedback;
373 functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
374 functions->GetTransformFeedbackVertexCount =
375 brw_get_transform_feedback_vertex_count;
376 } else {
377 functions->BeginTransformFeedback = brw_begin_transform_feedback;
378 functions->EndTransformFeedback = brw_end_transform_feedback;
379 functions->PauseTransformFeedback = brw_pause_transform_feedback;
380 functions->ResumeTransformFeedback = brw_resume_transform_feedback;
381 functions->GetTransformFeedbackVertexCount =
382 brw_get_transform_feedback_vertex_count;
383 }
384
385 if (devinfo->gen >= 6)
386 functions->GetSamplePosition = gen6_get_sample_position;
387
388 /* GL_ARB_get_program_binary */
389 brw_program_binary_init(brw->screen->deviceID);
390 functions->GetProgramBinaryDriverSHA1 = brw_get_program_binary_driver_sha1;
391 functions->ProgramBinarySerializeDriverBlob = brw_serialize_program_binary;
392 functions->ProgramBinaryDeserializeDriverBlob =
393 brw_deserialize_program_binary;
394
395 if (brw->screen->disk_cache) {
396 functions->ShaderCacheSerializeDriverBlob = brw_program_serialize_nir;
397 }
398
399 functions->SetBackgroundContext = brw_set_background_context;
400 }
401
402 static void
403 brw_initialize_spirv_supported_capabilities(struct brw_context *brw)
404 {
405 const struct gen_device_info *devinfo = &brw->screen->devinfo;
406 struct gl_context *ctx = &brw->ctx;
407
408 /* The following SPIR-V capabilities are only supported on gen7+. In theory
409 * you should enable the extension only on gen7+, but just in case let's
410 * assert it.
411 */
412 assert(devinfo->gen >= 7);
413
414 ctx->Const.SpirVCapabilities.atomic_storage = devinfo->gen >= 7;
415 ctx->Const.SpirVCapabilities.draw_parameters = true;
416 ctx->Const.SpirVCapabilities.float64 = devinfo->gen >= 8;
417 ctx->Const.SpirVCapabilities.geometry_streams = devinfo->gen >= 7;
418 ctx->Const.SpirVCapabilities.image_write_without_format = true;
419 ctx->Const.SpirVCapabilities.int64 = devinfo->gen >= 8;
420 ctx->Const.SpirVCapabilities.tessellation = true;
421 ctx->Const.SpirVCapabilities.transform_feedback = devinfo->gen >= 7;
422 ctx->Const.SpirVCapabilities.variable_pointers = true;
423 }
424
425 static void
426 brw_initialize_context_constants(struct brw_context *brw)
427 {
428 const struct gen_device_info *devinfo = &brw->screen->devinfo;
429 struct gl_context *ctx = &brw->ctx;
430 const struct brw_compiler *compiler = brw->screen->compiler;
431
432 const bool stage_exists[MESA_SHADER_STAGES] = {
433 [MESA_SHADER_VERTEX] = true,
434 [MESA_SHADER_TESS_CTRL] = devinfo->gen >= 7,
435 [MESA_SHADER_TESS_EVAL] = devinfo->gen >= 7,
436 [MESA_SHADER_GEOMETRY] = devinfo->gen >= 6,
437 [MESA_SHADER_FRAGMENT] = true,
438 [MESA_SHADER_COMPUTE] =
439 (_mesa_is_desktop_gl(ctx) &&
440 ctx->Const.MaxComputeWorkGroupSize[0] >= 1024) ||
441 (ctx->API == API_OPENGLES2 &&
442 ctx->Const.MaxComputeWorkGroupSize[0] >= 128),
443 };
444
445 unsigned num_stages = 0;
446 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
447 if (stage_exists[i])
448 num_stages++;
449 }
450
451 unsigned max_samplers =
452 devinfo->gen >= 8 || devinfo->is_haswell ? BRW_MAX_TEX_UNIT : 16;
453
454 ctx->Const.MaxDualSourceDrawBuffers = 1;
455 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
456 ctx->Const.MaxCombinedShaderOutputResources =
457 MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
458
459 /* The timestamp register we can read for glGetTimestamp() is
460 * sometimes only 32 bits, before scaling to nanoseconds (depending
461 * on kernel).
462 *
463 * Once scaled to nanoseconds the timestamp would roll over at a
464 * non-power-of-two, so an application couldn't use
465 * GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we
466 * report 36 bits and truncate at that (rolling over 5 times as
467 * often as the HW counter), and when the 32-bit counter rolls
468 * over, it happens to also be at a rollover in the reported value
469 * from near (1<<36) to 0.
470 *
471 * The low 32 bits rolls over in ~343 seconds. Our 36-bit result
472 * rolls over every ~69 seconds.
473 */
474 ctx->Const.QueryCounterBits.Timestamp = 36;
475
476 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
477 ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
478 if (devinfo->gen >= 7) {
479 ctx->Const.MaxRenderbufferSize = 16384;
480 ctx->Const.MaxTextureSize = 16384;
481 ctx->Const.MaxCubeTextureLevels = 15; /* 16384 */
482 } else {
483 ctx->Const.MaxRenderbufferSize = 8192;
484 ctx->Const.MaxTextureSize = 8192;
485 ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
486 }
487 ctx->Const.Max3DTextureLevels = 12; /* 2048 */
488 ctx->Const.MaxArrayTextureLayers = devinfo->gen >= 7 ? 2048 : 512;
489 ctx->Const.MaxTextureMbytes = 1536;
490 ctx->Const.MaxTextureRectSize = devinfo->gen >= 7 ? 16384 : 8192;
491 ctx->Const.MaxTextureMaxAnisotropy = 16.0;
492 ctx->Const.MaxTextureLodBias = 15.0;
493 ctx->Const.StripTextureBorder = true;
494 if (devinfo->gen >= 7) {
495 ctx->Const.MaxProgramTextureGatherComponents = 4;
496 ctx->Const.MinProgramTextureGatherOffset = -32;
497 ctx->Const.MaxProgramTextureGatherOffset = 31;
498 } else if (devinfo->gen == 6) {
499 ctx->Const.MaxProgramTextureGatherComponents = 1;
500 ctx->Const.MinProgramTextureGatherOffset = -8;
501 ctx->Const.MaxProgramTextureGatherOffset = 7;
502 }
503
504 ctx->Const.MaxUniformBlockSize = 65536;
505
506 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
507 struct gl_program_constants *prog = &ctx->Const.Program[i];
508
509 if (!stage_exists[i])
510 continue;
511
512 prog->MaxTextureImageUnits = max_samplers;
513
514 prog->MaxUniformBlocks = BRW_MAX_UBO;
515 prog->MaxCombinedUniformComponents =
516 prog->MaxUniformComponents +
517 ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
518
519 prog->MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
520 prog->MaxAtomicBuffers = BRW_MAX_ABO;
521 prog->MaxImageUniforms = compiler->scalar_stage[i] ? BRW_MAX_IMAGES : 0;
522 prog->MaxShaderStorageBlocks = BRW_MAX_SSBO;
523 }
524
525 ctx->Const.MaxTextureUnits =
526 MIN2(ctx->Const.MaxTextureCoordUnits,
527 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
528
529 ctx->Const.MaxUniformBufferBindings = num_stages * BRW_MAX_UBO;
530 ctx->Const.MaxCombinedUniformBlocks = num_stages * BRW_MAX_UBO;
531 ctx->Const.MaxCombinedAtomicBuffers = num_stages * BRW_MAX_ABO;
532 ctx->Const.MaxCombinedShaderStorageBlocks = num_stages * BRW_MAX_SSBO;
533 ctx->Const.MaxShaderStorageBufferBindings = num_stages * BRW_MAX_SSBO;
534 ctx->Const.MaxCombinedTextureImageUnits = num_stages * max_samplers;
535 ctx->Const.MaxCombinedImageUniforms = num_stages * BRW_MAX_IMAGES;
536
537
538 /* Hardware only supports a limited number of transform feedback buffers.
539 * So we need to override the Mesa default (which is based only on software
540 * limits).
541 */
542 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
543
544 /* On Gen6, in the worst case, we use up one binding table entry per
545 * transform feedback component (see comments above the definition of
546 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
547 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
548 * BRW_MAX_SOL_BINDINGS.
549 *
550 * In "separate components" mode, we need to divide this value by
551 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
552 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
553 */
554 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
555 ctx->Const.MaxTransformFeedbackSeparateComponents =
556 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
557
558 ctx->Const.AlwaysUseGetTransformFeedbackVertexCount =
559 !can_do_mi_math_and_lrr(brw->screen);
560
561 int max_samples;
562 const int *msaa_modes = intel_supported_msaa_modes(brw->screen);
563 const int clamp_max_samples =
564 driQueryOptioni(&brw->optionCache, "clamp_max_samples");
565
566 if (clamp_max_samples < 0) {
567 max_samples = msaa_modes[0];
568 } else {
569 /* Select the largest supported MSAA mode that does not exceed
570 * clamp_max_samples.
571 */
572 max_samples = 0;
573 for (int i = 0; msaa_modes[i] != 0; ++i) {
574 if (msaa_modes[i] <= clamp_max_samples) {
575 max_samples = msaa_modes[i];
576 break;
577 }
578 }
579 }
580
581 ctx->Const.MaxSamples = max_samples;
582 ctx->Const.MaxColorTextureSamples = max_samples;
583 ctx->Const.MaxDepthTextureSamples = max_samples;
584 ctx->Const.MaxIntegerSamples = max_samples;
585 ctx->Const.MaxImageSamples = 0;
586
587 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
588 * to map indices of rectangular grid to sample numbers within a pixel.
589 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
590 * extension implementation. For more details see the comment above
591 * gen6_set_sample_maps() definition.
592 */
593 gen6_set_sample_maps(ctx);
594
595 ctx->Const.MinLineWidth = 1.0;
596 ctx->Const.MinLineWidthAA = 1.0;
597 if (devinfo->gen >= 6) {
598 ctx->Const.MaxLineWidth = 7.375;
599 ctx->Const.MaxLineWidthAA = 7.375;
600 ctx->Const.LineWidthGranularity = 0.125;
601 } else {
602 ctx->Const.MaxLineWidth = 7.0;
603 ctx->Const.MaxLineWidthAA = 7.0;
604 ctx->Const.LineWidthGranularity = 0.5;
605 }
606
607 /* For non-antialiased lines, we have to round the line width to the
608 * nearest whole number. Make sure that we don't advertise a line
609 * width that, when rounded, will be beyond the actual hardware
610 * maximum.
611 */
612 assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
613
614 ctx->Const.MinPointSize = 1.0;
615 ctx->Const.MinPointSizeAA = 1.0;
616 ctx->Const.MaxPointSize = 255.0;
617 ctx->Const.MaxPointSizeAA = 255.0;
618 ctx->Const.PointSizeGranularity = 1.0;
619
620 if (devinfo->gen >= 5 || devinfo->is_g4x)
621 ctx->Const.MaxClipPlanes = 8;
622
623 ctx->Const.GLSLTessLevelsAsInputs = true;
624 ctx->Const.PrimitiveRestartForPatches = true;
625
626 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
627 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
628 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
629 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
630 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
631 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
632 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
633 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
634 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
635 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
636 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
637 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
638 MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
639 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
640
641 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
642 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
643 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
644 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
645 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
646 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
647 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
648 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
649 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
650 MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
651 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
652
653 /* Fragment shaders use real, 32-bit twos-complement integers for all
654 * integer types.
655 */
656 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
657 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
658 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
659 ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
660 ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
661
662 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
663 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
664 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
665 ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
666 ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
667
668 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
669 * but we're not sure how it's actually done for vertex order,
670 * that affect provoking vertex decision. Always use last vertex
671 * convention for quad primitive which works as expected for now.
672 */
673 if (devinfo->gen >= 6)
674 ctx->Const.QuadsFollowProvokingVertexConvention = false;
675
676 ctx->Const.NativeIntegers = true;
677
678 /* Regarding the CMP instruction, the Ivybridge PRM says:
679 *
680 * "For each enabled channel 0b or 1b is assigned to the appropriate flag
681 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
682 * 0xFFFFFFFF) is assigned to dst."
683 *
684 * but PRMs for earlier generations say
685 *
686 * "In dword format, one GRF may store up to 8 results. When the register
687 * is used later as a vector of Booleans, as only LSB at each channel
688 * contains meaning [sic] data, software should make sure all higher bits
689 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
690 *
691 * We select the representation of a true boolean uniform to be ~0, and fix
692 * the results of Gen <= 5 CMP instruction's with -(result & 1).
693 */
694 ctx->Const.UniformBooleanTrue = ~0;
695
696 /* From the gen4 PRM, volume 4 page 127:
697 *
698 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
699 * the base address of the first element of the surface, computed in
700 * software by adding the surface base address to the byte offset of
701 * the element in the buffer."
702 *
703 * However, unaligned accesses are slower, so enforce buffer alignment.
704 *
705 * In order to push UBO data, 3DSTATE_CONSTANT_XS imposes an additional
706 * restriction: the start of the buffer needs to be 32B aligned.
707 */
708 ctx->Const.UniformBufferOffsetAlignment = 32;
709
710 /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
711 * that we can safely have the CPU and GPU writing the same SSBO on
712 * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
713 * writes, so there's no problem. For an SSBO, the GPU and the CPU can
714 * be updating disjoint regions of the buffer simultaneously and that will
715 * break if the regions overlap the same cacheline.
716 */
717 ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
718 ctx->Const.TextureBufferOffsetAlignment = 16;
719 ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
720
721 if (devinfo->gen >= 6) {
722 ctx->Const.MaxVarying = 32;
723 ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
724 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents =
725 compiler->scalar_stage[MESA_SHADER_GEOMETRY] ? 128 : 64;
726 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
727 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
728 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents = 128;
729 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents = 128;
730 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents = 128;
731 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents = 128;
732 }
733
734 /* We want the GLSL compiler to emit code that uses condition codes */
735 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
736 ctx->Const.ShaderCompilerOptions[i] =
737 brw->screen->compiler->glsl_compiler_options[i];
738 }
739
740 if (devinfo->gen >= 7) {
741 ctx->Const.MaxViewportWidth = 32768;
742 ctx->Const.MaxViewportHeight = 32768;
743 }
744
745 /* ARB_viewport_array, OES_viewport_array */
746 if (devinfo->gen >= 6) {
747 ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
748 ctx->Const.ViewportSubpixelBits = 8;
749
750 /* Cast to float before negating because MaxViewportWidth is unsigned.
751 */
752 ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
753 ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
754 }
755
756 /* ARB_gpu_shader5 */
757 if (devinfo->gen >= 7)
758 ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
759
760 /* ARB_framebuffer_no_attachments */
761 ctx->Const.MaxFramebufferWidth = 16384;
762 ctx->Const.MaxFramebufferHeight = 16384;
763 ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
764 ctx->Const.MaxFramebufferSamples = max_samples;
765
766 /* OES_primitive_bounding_box */
767 ctx->Const.NoPrimitiveBoundingBoxOutput = true;
768
769 /* TODO: We should be able to use STD430 packing by default on all hardware
770 * but some piglit tests [1] currently fail on SNB when this is enabled.
771 * The problem is the messages we're using for doing uniform pulls
772 * in the vec4 back-end on SNB is the OWORD block load instruction, which
773 * takes its offset in units of OWORDS (16 bytes). On IVB+, we use the
774 * sampler which doesn't have these restrictions.
775 *
776 * In the scalar back-end, we use the sampler for dynamic uniform loads and
777 * pull an entire cache line at a time for constant offset loads both of
778 * which support almost any alignment.
779 *
780 * [1] glsl-1.40/uniform_buffer/vs-float-array-variable-index.shader_test
781 */
782 if (devinfo->gen >= 7)
783 ctx->Const.UseSTD430AsDefaultPacking = true;
784
785 if (!(ctx->Const.ContextFlags & GL_CONTEXT_FLAG_DEBUG_BIT))
786 ctx->Const.AllowMappedBuffersDuringExecution = true;
787
788 /* GL_ARB_get_program_binary */
789 ctx->Const.NumProgramBinaryFormats = 1;
790 }
791
792 static void
793 brw_initialize_cs_context_constants(struct brw_context *brw)
794 {
795 struct gl_context *ctx = &brw->ctx;
796 const struct intel_screen *screen = brw->screen;
797 struct gen_device_info *devinfo = &brw->screen->devinfo;
798
799 /* FINISHME: Do this for all platforms that the kernel supports */
800 if (devinfo->is_cherryview &&
801 screen->subslice_total > 0 && screen->eu_total > 0) {
802 /* Logical CS threads = EUs per subslice * 7 threads per EU */
803 uint32_t max_cs_threads = screen->eu_total / screen->subslice_total * 7;
804
805 /* Fuse configurations may give more threads than expected, never less. */
806 if (max_cs_threads > devinfo->max_cs_threads)
807 devinfo->max_cs_threads = max_cs_threads;
808 }
809
810 /* Maximum number of scalar compute shader invocations that can be run in
811 * parallel in the same subslice assuming SIMD32 dispatch.
812 *
813 * We don't advertise more than 64 threads, because we are limited to 64 by
814 * our usage of thread_width_max in the gpgpu walker command. This only
815 * currently impacts Haswell, which otherwise might be able to advertise 70
816 * threads. With SIMD32 and 64 threads, Haswell still provides twice the
817 * required the number of invocation needed for ARB_compute_shader.
818 */
819 const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
820 const uint32_t max_invocations = 32 * max_threads;
821 ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
822 ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
823 ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
824 ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
825 ctx->Const.MaxComputeSharedMemorySize = 64 * 1024;
826 }
827
828 /**
829 * Process driconf (drirc) options, setting appropriate context flags.
830 *
831 * intelInitExtensions still pokes at optionCache directly, in order to
832 * avoid advertising various extensions. No flags are set, so it makes
833 * sense to continue doing that there.
834 */
835 static void
836 brw_process_driconf_options(struct brw_context *brw)
837 {
838 const struct gen_device_info *devinfo = &brw->screen->devinfo;
839 struct gl_context *ctx = &brw->ctx;
840
841 driOptionCache *options = &brw->optionCache;
842 driParseConfigFiles(options, &brw->screen->optionCache,
843 brw->driContext->driScreenPriv->myNum,
844 "i965", NULL);
845
846 int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
847 switch (bo_reuse_mode) {
848 case DRI_CONF_BO_REUSE_DISABLED:
849 break;
850 case DRI_CONF_BO_REUSE_ALL:
851 brw_bufmgr_enable_reuse(brw->bufmgr);
852 break;
853 }
854
855 if (INTEL_DEBUG & DEBUG_NO_HIZ) {
856 brw->has_hiz = false;
857 /* On gen6, you can only do separate stencil with HIZ. */
858 if (devinfo->gen == 6)
859 brw->has_separate_stencil = false;
860 }
861
862 if (driQueryOptionb(options, "mesa_no_error"))
863 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_NO_ERROR_BIT_KHR;
864
865 if (driQueryOptionb(options, "always_flush_batch")) {
866 fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
867 brw->always_flush_batch = true;
868 }
869
870 if (driQueryOptionb(options, "always_flush_cache")) {
871 fprintf(stderr, "flushing GPU caches before/after each draw call\n");
872 brw->always_flush_cache = true;
873 }
874
875 if (driQueryOptionb(options, "disable_throttling")) {
876 fprintf(stderr, "disabling flush throttling\n");
877 brw->disable_throttling = true;
878 }
879
880 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
881
882 if (driQueryOptionb(&brw->optionCache, "precise_trig"))
883 brw->screen->compiler->precise_trig = true;
884
885 ctx->Const.ForceGLSLExtensionsWarn =
886 driQueryOptionb(options, "force_glsl_extensions_warn");
887
888 ctx->Const.ForceGLSLVersion =
889 driQueryOptioni(options, "force_glsl_version");
890
891 ctx->Const.DisableGLSLLineContinuations =
892 driQueryOptionb(options, "disable_glsl_line_continuations");
893
894 ctx->Const.AllowGLSLExtensionDirectiveMidShader =
895 driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
896
897 ctx->Const.AllowGLSLBuiltinVariableRedeclaration =
898 driQueryOptionb(options, "allow_glsl_builtin_variable_redeclaration");
899
900 ctx->Const.AllowHigherCompatVersion =
901 driQueryOptionb(options, "allow_higher_compat_version");
902
903 ctx->Const.ForceGLSLAbsSqrt =
904 driQueryOptionb(options, "force_glsl_abs_sqrt");
905
906 ctx->Const.GLSLZeroInit = driQueryOptionb(options, "glsl_zero_init");
907
908 brw->dual_color_blend_by_location =
909 driQueryOptionb(options, "dual_color_blend_by_location");
910
911 ctx->Const.AllowGLSLCrossStageInterpolationMismatch =
912 driQueryOptionb(options, "allow_glsl_cross_stage_interpolation_mismatch");
913
914 ctx->Const.dri_config_options_sha1 = ralloc_array(brw, unsigned char, 20);
915 driComputeOptionsSha1(&brw->screen->optionCache,
916 ctx->Const.dri_config_options_sha1);
917 }
918
919 GLboolean
920 brwCreateContext(gl_api api,
921 const struct gl_config *mesaVis,
922 __DRIcontext *driContextPriv,
923 const struct __DriverContextConfig *ctx_config,
924 unsigned *dri_ctx_error,
925 void *sharedContextPrivate)
926 {
927 struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
928 struct intel_screen *screen = driContextPriv->driScreenPriv->driverPrivate;
929 const struct gen_device_info *devinfo = &screen->devinfo;
930 struct dd_function_table functions;
931
932 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
933 * provides us with context reset notifications.
934 */
935 uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG |
936 __DRI_CTX_FLAG_FORWARD_COMPATIBLE |
937 __DRI_CTX_FLAG_NO_ERROR;
938
939 if (screen->has_context_reset_notification)
940 allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
941
942 if (ctx_config->flags & ~allowed_flags) {
943 *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
944 return false;
945 }
946
947 if (ctx_config->attribute_mask &
948 ~(__DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY |
949 __DRIVER_CONTEXT_ATTRIB_PRIORITY)) {
950 *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_ATTRIBUTE;
951 return false;
952 }
953
954 bool notify_reset =
955 ((ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_RESET_STRATEGY) &&
956 ctx_config->reset_strategy != __DRI_CTX_RESET_NO_NOTIFICATION);
957
958 struct brw_context *brw = rzalloc(NULL, struct brw_context);
959 if (!brw) {
960 fprintf(stderr, "%s: failed to alloc context\n", __func__);
961 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
962 return false;
963 }
964
965 driContextPriv->driverPrivate = brw;
966 brw->driContext = driContextPriv;
967 brw->screen = screen;
968 brw->bufmgr = screen->bufmgr;
969
970 brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
971 brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
972
973 brw->has_swizzling = screen->hw_has_swizzling;
974
975 brw->isl_dev = screen->isl_dev;
976
977 brw->vs.base.stage = MESA_SHADER_VERTEX;
978 brw->tcs.base.stage = MESA_SHADER_TESS_CTRL;
979 brw->tes.base.stage = MESA_SHADER_TESS_EVAL;
980 brw->gs.base.stage = MESA_SHADER_GEOMETRY;
981 brw->wm.base.stage = MESA_SHADER_FRAGMENT;
982 brw->cs.base.stage = MESA_SHADER_COMPUTE;
983
984 brw_init_driver_functions(brw, &functions);
985
986 if (notify_reset)
987 functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
988
989 struct gl_context *ctx = &brw->ctx;
990
991 if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
992 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
993 fprintf(stderr, "%s: failed to init mesa context\n", __func__);
994 intelDestroyContext(driContextPriv);
995 return false;
996 }
997
998 driContextSetFlags(ctx, ctx_config->flags);
999
1000 /* Initialize the software rasterizer and helper modules.
1001 *
1002 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
1003 * software fallbacks (which we have to support on legacy GL to do weird
1004 * glDrawPixels(), glBitmap(), and other functions).
1005 */
1006 if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
1007 _swrast_CreateContext(ctx);
1008 }
1009
1010 _vbo_CreateContext(ctx);
1011 if (ctx->swrast_context) {
1012 _tnl_CreateContext(ctx);
1013 TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
1014 _swsetup_CreateContext(ctx);
1015
1016 /* Configure swrast to match hardware characteristics: */
1017 _swrast_allow_pixel_fog(ctx, false);
1018 _swrast_allow_vertex_fog(ctx, true);
1019 }
1020
1021 _mesa_meta_init(ctx);
1022
1023 brw_process_driconf_options(brw);
1024
1025 if (INTEL_DEBUG & DEBUG_PERF)
1026 brw->perf_debug = true;
1027
1028 brw_initialize_cs_context_constants(brw);
1029 brw_initialize_context_constants(brw);
1030
1031 ctx->Const.ResetStrategy = notify_reset
1032 ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
1033
1034 /* Reinitialize the context point state. It depends on ctx->Const values. */
1035 _mesa_init_point(ctx);
1036
1037 intel_fbo_init(brw);
1038
1039 intel_batchbuffer_init(brw);
1040
1041 /* Create a new hardware context. Using a hardware context means that
1042 * our GPU state will be saved/restored on context switch, allowing us
1043 * to assume that the GPU is in the same state we left it in.
1044 *
1045 * This is required for transform feedback buffer offsets, query objects,
1046 * and also allows us to reduce how much state we have to emit.
1047 */
1048 brw->hw_ctx = brw_create_hw_context(brw->bufmgr);
1049 if (!brw->hw_ctx && devinfo->gen >= 6) {
1050 fprintf(stderr, "Failed to create hardware context.\n");
1051 intelDestroyContext(driContextPriv);
1052 return false;
1053 }
1054
1055 if (brw->hw_ctx) {
1056 int hw_priority = GEN_CONTEXT_MEDIUM_PRIORITY;
1057 if (ctx_config->attribute_mask & __DRIVER_CONTEXT_ATTRIB_PRIORITY) {
1058 switch (ctx_config->priority) {
1059 case __DRI_CTX_PRIORITY_LOW:
1060 hw_priority = GEN_CONTEXT_LOW_PRIORITY;
1061 break;
1062 case __DRI_CTX_PRIORITY_HIGH:
1063 hw_priority = GEN_CONTEXT_HIGH_PRIORITY;
1064 break;
1065 }
1066 }
1067 if (hw_priority != I915_CONTEXT_DEFAULT_PRIORITY &&
1068 brw_hw_context_set_priority(brw->bufmgr, brw->hw_ctx, hw_priority)) {
1069 fprintf(stderr,
1070 "Failed to set priority [%d:%d] for hardware context.\n",
1071 ctx_config->priority, hw_priority);
1072 intelDestroyContext(driContextPriv);
1073 return false;
1074 }
1075 }
1076
1077 if (brw_init_pipe_control(brw, devinfo)) {
1078 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
1079 intelDestroyContext(driContextPriv);
1080 return false;
1081 }
1082
1083 brw_upload_init(&brw->upload, brw->bufmgr, 65536);
1084
1085 brw_init_state(brw);
1086
1087 intelInitExtensions(ctx);
1088
1089 brw_init_surface_formats(brw);
1090
1091 brw_blorp_init(brw);
1092
1093 brw->urb.size = devinfo->urb.size;
1094
1095 if (devinfo->gen == 6)
1096 brw->urb.gs_present = false;
1097
1098 brw->prim_restart.in_progress = false;
1099 brw->prim_restart.enable_cut_index = false;
1100 brw->gs.enabled = false;
1101 brw->clip.viewport_count = 1;
1102
1103 brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
1104
1105 brw->max_gtt_map_object_size = screen->max_gtt_map_object_size;
1106
1107 ctx->VertexProgram._MaintainTnlProgram = true;
1108 ctx->FragmentProgram._MaintainTexEnvProgram = true;
1109
1110 brw_draw_init( brw );
1111
1112 if ((ctx_config->flags & __DRI_CTX_FLAG_DEBUG) != 0) {
1113 /* Turn on some extra GL_ARB_debug_output generation. */
1114 brw->perf_debug = true;
1115 }
1116
1117 if ((ctx_config->flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0) {
1118 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
1119 ctx->Const.RobustAccess = GL_TRUE;
1120 }
1121
1122 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
1123 brw_init_shader_time(brw);
1124
1125 _mesa_override_extensions(ctx);
1126 _mesa_compute_version(ctx);
1127
1128 /* GL_ARB_gl_spirv */
1129 if (ctx->Extensions.ARB_gl_spirv)
1130 brw_initialize_spirv_supported_capabilities(brw);
1131
1132 _mesa_initialize_dispatch_tables(ctx);
1133 _mesa_initialize_vbo_vtxfmt(ctx);
1134
1135 if (ctx->Extensions.INTEL_performance_query)
1136 brw_init_performance_queries(brw);
1137
1138 vbo_use_buffer_objects(ctx);
1139 vbo_always_unmap_buffers(ctx);
1140
1141 brw->ctx.Cache = brw->screen->disk_cache;
1142
1143 if (driContextPriv->driScreenPriv->dri2.backgroundCallable &&
1144 driQueryOptionb(&screen->optionCache, "mesa_glthread")) {
1145 /* Loader supports multithreading, and so do we. */
1146 _mesa_glthread_init(ctx);
1147 }
1148
1149 return true;
1150 }
1151
1152 void
1153 intelDestroyContext(__DRIcontext * driContextPriv)
1154 {
1155 struct brw_context *brw =
1156 (struct brw_context *) driContextPriv->driverPrivate;
1157 struct gl_context *ctx = &brw->ctx;
1158
1159 GET_CURRENT_CONTEXT(curctx);
1160
1161 if (curctx == NULL) {
1162 /* No current context, but we need one to release
1163 * renderbuffer surface when we release framebuffer.
1164 * So temporarily bind the context.
1165 */
1166 _mesa_make_current(ctx, NULL, NULL);
1167 }
1168
1169 _mesa_glthread_destroy(&brw->ctx);
1170
1171 _mesa_meta_free(&brw->ctx);
1172
1173 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
1174 /* Force a report. */
1175 brw->shader_time.report_time = 0;
1176
1177 brw_collect_and_report_shader_time(brw);
1178 brw_destroy_shader_time(brw);
1179 }
1180
1181 blorp_finish(&brw->blorp);
1182
1183 brw_destroy_state(brw);
1184 brw_draw_destroy(brw);
1185
1186 brw_bo_unreference(brw->curbe.curbe_bo);
1187
1188 brw_bo_unreference(brw->vs.base.scratch_bo);
1189 brw_bo_unreference(brw->tcs.base.scratch_bo);
1190 brw_bo_unreference(brw->tes.base.scratch_bo);
1191 brw_bo_unreference(brw->gs.base.scratch_bo);
1192 brw_bo_unreference(brw->wm.base.scratch_bo);
1193
1194 brw_bo_unreference(brw->vs.base.push_const_bo);
1195 brw_bo_unreference(brw->tcs.base.push_const_bo);
1196 brw_bo_unreference(brw->tes.base.push_const_bo);
1197 brw_bo_unreference(brw->gs.base.push_const_bo);
1198 brw_bo_unreference(brw->wm.base.push_const_bo);
1199
1200 brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
1201
1202 if (ctx->swrast_context) {
1203 _swsetup_DestroyContext(&brw->ctx);
1204 _tnl_DestroyContext(&brw->ctx);
1205 }
1206 _vbo_DestroyContext(&brw->ctx);
1207
1208 if (ctx->swrast_context)
1209 _swrast_DestroyContext(&brw->ctx);
1210
1211 brw_fini_pipe_control(brw);
1212 intel_batchbuffer_free(&brw->batch);
1213
1214 brw_bo_unreference(brw->throttle_batch[1]);
1215 brw_bo_unreference(brw->throttle_batch[0]);
1216 brw->throttle_batch[1] = NULL;
1217 brw->throttle_batch[0] = NULL;
1218
1219 driDestroyOptionCache(&brw->optionCache);
1220
1221 /* free the Mesa context */
1222 _mesa_free_context_data(&brw->ctx, true);
1223
1224 ralloc_free(brw);
1225 driContextPriv->driverPrivate = NULL;
1226 }
1227
1228 GLboolean
1229 intelUnbindContext(__DRIcontext * driContextPriv)
1230 {
1231 GET_CURRENT_CONTEXT(ctx);
1232 _mesa_glthread_finish(ctx);
1233
1234 /* Unset current context and dispath table */
1235 _mesa_make_current(NULL, NULL, NULL);
1236
1237 return true;
1238 }
1239
1240 /**
1241 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1242 * on window system framebuffers.
1243 *
1244 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1245 * your renderbuffer can do sRGB encode, and you can flip a switch that does
1246 * sRGB encode if the renderbuffer can handle it. You can ask specifically
1247 * for a visual where you're guaranteed to be capable, but it turns out that
1248 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1249 * incapable ones, because there's no difference between the two in resources
1250 * used. Applications thus get built that accidentally rely on the default
1251 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds
1252 * great...
1253 *
1254 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1255 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1256 * So they removed the enable knob and made it "if the renderbuffer is sRGB
1257 * capable, do sRGB encode". Then, for your window system renderbuffers, you
1258 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1259 * and get no sRGB encode (assuming that both kinds of visual are available).
1260 * Thus our choice to support sRGB by default on our visuals for desktop would
1261 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1262 *
1263 * Unfortunately, renderbuffer setup happens before a context is created. So
1264 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1265 * context (without an sRGB visual), we go turn that back off before anyone
1266 * finds out.
1267 */
1268 static void
1269 intel_gles3_srgb_workaround(struct brw_context *brw,
1270 struct gl_framebuffer *fb)
1271 {
1272 struct gl_context *ctx = &brw->ctx;
1273
1274 if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1275 return;
1276
1277 for (int i = 0; i < BUFFER_COUNT; i++) {
1278 struct gl_renderbuffer *rb = fb->Attachment[i].Renderbuffer;
1279
1280 /* Check if sRGB was specifically asked for. */
1281 struct intel_renderbuffer *irb = intel_get_renderbuffer(fb, i);
1282 if (irb && irb->need_srgb)
1283 return;
1284
1285 if (rb)
1286 rb->Format = _mesa_get_srgb_format_linear(rb->Format);
1287 }
1288 /* Disable sRGB from framebuffers that are not compatible. */
1289 fb->Visual.sRGBCapable = false;
1290 }
1291
1292 GLboolean
1293 intelMakeCurrent(__DRIcontext * driContextPriv,
1294 __DRIdrawable * driDrawPriv,
1295 __DRIdrawable * driReadPriv)
1296 {
1297 struct brw_context *brw;
1298
1299 if (driContextPriv)
1300 brw = (struct brw_context *) driContextPriv->driverPrivate;
1301 else
1302 brw = NULL;
1303
1304 if (driContextPriv) {
1305 struct gl_context *ctx = &brw->ctx;
1306 struct gl_framebuffer *fb, *readFb;
1307
1308 if (driDrawPriv == NULL) {
1309 fb = _mesa_get_incomplete_framebuffer();
1310 } else {
1311 fb = driDrawPriv->driverPrivate;
1312 driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1313 }
1314
1315 if (driReadPriv == NULL) {
1316 readFb = _mesa_get_incomplete_framebuffer();
1317 } else {
1318 readFb = driReadPriv->driverPrivate;
1319 driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1320 }
1321
1322 /* The sRGB workaround changes the renderbuffer's format. We must change
1323 * the format before the renderbuffer's miptree get's allocated, otherwise
1324 * the formats of the renderbuffer and its miptree will differ.
1325 */
1326 intel_gles3_srgb_workaround(brw, fb);
1327 intel_gles3_srgb_workaround(brw, readFb);
1328
1329 /* If the context viewport hasn't been initialized, force a call out to
1330 * the loader to get buffers so we have a drawable size for the initial
1331 * viewport. */
1332 if (!brw->ctx.ViewportInitialized)
1333 intel_prepare_render(brw);
1334
1335 _mesa_make_current(ctx, fb, readFb);
1336 } else {
1337 GET_CURRENT_CONTEXT(ctx);
1338 _mesa_glthread_finish(ctx);
1339 _mesa_make_current(NULL, NULL, NULL);
1340 }
1341
1342 return true;
1343 }
1344
1345 void
1346 intel_resolve_for_dri2_flush(struct brw_context *brw,
1347 __DRIdrawable *drawable)
1348 {
1349 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1350
1351 if (devinfo->gen < 6) {
1352 /* MSAA and fast color clear are not supported, so don't waste time
1353 * checking whether a resolve is needed.
1354 */
1355 return;
1356 }
1357
1358 struct gl_framebuffer *fb = drawable->driverPrivate;
1359 struct intel_renderbuffer *rb;
1360
1361 /* Usually, only the back buffer will need to be downsampled. However,
1362 * the front buffer will also need it if the user has rendered into it.
1363 */
1364 static const gl_buffer_index buffers[2] = {
1365 BUFFER_BACK_LEFT,
1366 BUFFER_FRONT_LEFT,
1367 };
1368
1369 for (int i = 0; i < 2; ++i) {
1370 rb = intel_get_renderbuffer(fb, buffers[i]);
1371 if (rb == NULL || rb->mt == NULL)
1372 continue;
1373 if (rb->mt->surf.samples == 1) {
1374 assert(rb->mt_layer == 0 && rb->mt_level == 0 &&
1375 rb->layer_count == 1);
1376 intel_miptree_prepare_external(brw, rb->mt);
1377 } else {
1378 intel_renderbuffer_downsample(brw, rb);
1379
1380 /* Call prepare_external on the single-sample miptree to do any
1381 * needed resolves prior to handing it off to the window system.
1382 * This is needed in the case that rb->singlesample_mt is Y-tiled
1383 * with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E. In
1384 * this case, the MSAA resolve above will write compressed data into
1385 * rb->singlesample_mt.
1386 *
1387 * TODO: Some day, if we decide to care about the tiny performance
1388 * hit we're taking by doing the MSAA resolve and then a CCS resolve,
1389 * we could detect this case and just allocate the single-sampled
1390 * miptree without aux. However, that would be a lot of plumbing and
1391 * this is a rather exotic case so it's not really worth it.
1392 */
1393 intel_miptree_prepare_external(brw, rb->singlesample_mt);
1394 }
1395 }
1396 }
1397
1398 static unsigned
1399 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1400 {
1401 return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1402 }
1403
1404 static void
1405 intel_query_dri2_buffers(struct brw_context *brw,
1406 __DRIdrawable *drawable,
1407 __DRIbuffer **buffers,
1408 int *count);
1409
1410 static void
1411 intel_process_dri2_buffer(struct brw_context *brw,
1412 __DRIdrawable *drawable,
1413 __DRIbuffer *buffer,
1414 struct intel_renderbuffer *rb,
1415 const char *buffer_name);
1416
1417 static void
1418 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1419
1420 static void
1421 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1422 {
1423 struct gl_framebuffer *fb = drawable->driverPrivate;
1424 struct intel_renderbuffer *rb;
1425 __DRIbuffer *buffers = NULL;
1426 int count;
1427 const char *region_name;
1428
1429 /* Set this up front, so that in case our buffers get invalidated
1430 * while we're getting new buffers, we don't clobber the stamp and
1431 * thus ignore the invalidate. */
1432 drawable->lastStamp = drawable->dri2.stamp;
1433
1434 if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1435 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1436
1437 intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1438
1439 if (buffers == NULL)
1440 return;
1441
1442 for (int i = 0; i < count; i++) {
1443 switch (buffers[i].attachment) {
1444 case __DRI_BUFFER_FRONT_LEFT:
1445 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1446 region_name = "dri2 front buffer";
1447 break;
1448
1449 case __DRI_BUFFER_FAKE_FRONT_LEFT:
1450 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1451 region_name = "dri2 fake front buffer";
1452 break;
1453
1454 case __DRI_BUFFER_BACK_LEFT:
1455 rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1456 region_name = "dri2 back buffer";
1457 break;
1458
1459 case __DRI_BUFFER_DEPTH:
1460 case __DRI_BUFFER_HIZ:
1461 case __DRI_BUFFER_DEPTH_STENCIL:
1462 case __DRI_BUFFER_STENCIL:
1463 case __DRI_BUFFER_ACCUM:
1464 default:
1465 fprintf(stderr,
1466 "unhandled buffer attach event, attachment type %d\n",
1467 buffers[i].attachment);
1468 return;
1469 }
1470
1471 intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1472 }
1473
1474 }
1475
1476 void
1477 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1478 {
1479 struct brw_context *brw = context->driverPrivate;
1480 __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1481
1482 /* Set this up front, so that in case our buffers get invalidated
1483 * while we're getting new buffers, we don't clobber the stamp and
1484 * thus ignore the invalidate. */
1485 drawable->lastStamp = drawable->dri2.stamp;
1486
1487 if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1488 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1489
1490 if (dri_screen->image.loader)
1491 intel_update_image_buffers(brw, drawable);
1492 else
1493 intel_update_dri2_buffers(brw, drawable);
1494
1495 driUpdateFramebufferSize(&brw->ctx, drawable);
1496 }
1497
1498 /**
1499 * intel_prepare_render should be called anywhere that curent read/drawbuffer
1500 * state is required.
1501 */
1502 void
1503 intel_prepare_render(struct brw_context *brw)
1504 {
1505 struct gl_context *ctx = &brw->ctx;
1506 __DRIcontext *driContext = brw->driContext;
1507 __DRIdrawable *drawable;
1508
1509 drawable = driContext->driDrawablePriv;
1510 if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1511 if (drawable->lastStamp != drawable->dri2.stamp)
1512 intel_update_renderbuffers(driContext, drawable);
1513 driContext->dri2.draw_stamp = drawable->dri2.stamp;
1514 }
1515
1516 drawable = driContext->driReadablePriv;
1517 if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1518 if (drawable->lastStamp != drawable->dri2.stamp)
1519 intel_update_renderbuffers(driContext, drawable);
1520 driContext->dri2.read_stamp = drawable->dri2.stamp;
1521 }
1522
1523 /* If we're currently rendering to the front buffer, the rendering
1524 * that will happen next will probably dirty the front buffer. So
1525 * mark it as dirty here.
1526 */
1527 if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1528 brw->front_buffer_dirty = true;
1529
1530 if (brw->is_shared_buffer_bound) {
1531 /* Subsequent rendering will probably dirty the shared buffer. */
1532 brw->is_shared_buffer_dirty = true;
1533 }
1534 }
1535
1536 /**
1537 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1538 *
1539 * To determine which DRI buffers to request, examine the renderbuffers
1540 * attached to the drawable's framebuffer. Then request the buffers with
1541 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1542 *
1543 * This is called from intel_update_renderbuffers().
1544 *
1545 * \param drawable Drawable whose buffers are queried.
1546 * \param buffers [out] List of buffers returned by DRI2 query.
1547 * \param buffer_count [out] Number of buffers returned.
1548 *
1549 * \see intel_update_renderbuffers()
1550 * \see DRI2GetBuffers()
1551 * \see DRI2GetBuffersWithFormat()
1552 */
1553 static void
1554 intel_query_dri2_buffers(struct brw_context *brw,
1555 __DRIdrawable *drawable,
1556 __DRIbuffer **buffers,
1557 int *buffer_count)
1558 {
1559 __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1560 struct gl_framebuffer *fb = drawable->driverPrivate;
1561 int i = 0;
1562 unsigned attachments[8];
1563
1564 struct intel_renderbuffer *front_rb;
1565 struct intel_renderbuffer *back_rb;
1566
1567 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1568 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1569
1570 memset(attachments, 0, sizeof(attachments));
1571 if ((_mesa_is_front_buffer_drawing(fb) ||
1572 _mesa_is_front_buffer_reading(fb) ||
1573 !back_rb) && front_rb) {
1574 /* If a fake front buffer is in use, then querying for
1575 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1576 * the real front buffer to the fake front buffer. So before doing the
1577 * query, we need to make sure all the pending drawing has landed in the
1578 * real front buffer.
1579 */
1580 intel_batchbuffer_flush(brw);
1581 intel_flush_front(&brw->ctx);
1582
1583 attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1584 attachments[i++] = intel_bits_per_pixel(front_rb);
1585 } else if (front_rb && brw->front_buffer_dirty) {
1586 /* We have pending front buffer rendering, but we aren't querying for a
1587 * front buffer. If the front buffer we have is a fake front buffer,
1588 * the X server is going to throw it away when it processes the query.
1589 * So before doing the query, make sure all the pending drawing has
1590 * landed in the real front buffer.
1591 */
1592 intel_batchbuffer_flush(brw);
1593 intel_flush_front(&brw->ctx);
1594 }
1595
1596 if (back_rb) {
1597 attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1598 attachments[i++] = intel_bits_per_pixel(back_rb);
1599 }
1600
1601 assert(i <= ARRAY_SIZE(attachments));
1602
1603 *buffers =
1604 dri_screen->dri2.loader->getBuffersWithFormat(drawable,
1605 &drawable->w,
1606 &drawable->h,
1607 attachments, i / 2,
1608 buffer_count,
1609 drawable->loaderPrivate);
1610 }
1611
1612 /**
1613 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1614 *
1615 * This is called from intel_update_renderbuffers().
1616 *
1617 * \par Note:
1618 * DRI buffers whose attachment point is DRI2BufferStencil or
1619 * DRI2BufferDepthStencil are handled as special cases.
1620 *
1621 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1622 * that is passed to brw_bo_gem_create_from_name().
1623 *
1624 * \see intel_update_renderbuffers()
1625 */
1626 static void
1627 intel_process_dri2_buffer(struct brw_context *brw,
1628 __DRIdrawable *drawable,
1629 __DRIbuffer *buffer,
1630 struct intel_renderbuffer *rb,
1631 const char *buffer_name)
1632 {
1633 struct gl_framebuffer *fb = drawable->driverPrivate;
1634 struct brw_bo *bo;
1635
1636 if (!rb)
1637 return;
1638
1639 unsigned num_samples = rb->Base.Base.NumSamples;
1640
1641 /* We try to avoid closing and reopening the same BO name, because the first
1642 * use of a mapping of the buffer involves a bunch of page faulting which is
1643 * moderately expensive.
1644 */
1645 struct intel_mipmap_tree *last_mt;
1646 if (num_samples == 0)
1647 last_mt = rb->mt;
1648 else
1649 last_mt = rb->singlesample_mt;
1650
1651 uint32_t old_name = 0;
1652 if (last_mt) {
1653 /* The bo already has a name because the miptree was created by a
1654 * previous call to intel_process_dri2_buffer(). If a bo already has a
1655 * name, then brw_bo_flink() is a low-cost getter. It does not
1656 * create a new name.
1657 */
1658 brw_bo_flink(last_mt->bo, &old_name);
1659 }
1660
1661 if (old_name == buffer->name)
1662 return;
1663
1664 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1665 fprintf(stderr,
1666 "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1667 buffer->name, buffer->attachment,
1668 buffer->cpp, buffer->pitch);
1669 }
1670
1671 bo = brw_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1672 buffer->name);
1673 if (!bo) {
1674 fprintf(stderr,
1675 "Failed to open BO for returned DRI2 buffer "
1676 "(%dx%d, %s, named %d).\n"
1677 "This is likely a bug in the X Server that will lead to a "
1678 "crash soon.\n",
1679 drawable->w, drawable->h, buffer_name, buffer->name);
1680 return;
1681 }
1682
1683 uint32_t tiling, swizzle;
1684 brw_bo_get_tiling(bo, &tiling, &swizzle);
1685
1686 struct intel_mipmap_tree *mt =
1687 intel_miptree_create_for_bo(brw,
1688 bo,
1689 intel_rb_format(rb),
1690 0,
1691 drawable->w,
1692 drawable->h,
1693 1,
1694 buffer->pitch,
1695 isl_tiling_from_i915_tiling(tiling),
1696 MIPTREE_CREATE_DEFAULT);
1697 if (!mt) {
1698 brw_bo_unreference(bo);
1699 return;
1700 }
1701
1702 /* We got this BO from X11. We cana't assume that we have coherent texture
1703 * access because X may suddenly decide to use it for scan-out which would
1704 * destroy coherency.
1705 */
1706 bo->cache_coherent = false;
1707
1708 if (!intel_update_winsys_renderbuffer_miptree(brw, rb, mt,
1709 drawable->w, drawable->h,
1710 buffer->pitch)) {
1711 brw_bo_unreference(bo);
1712 intel_miptree_release(&mt);
1713 return;
1714 }
1715
1716 if (_mesa_is_front_buffer_drawing(fb) &&
1717 (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1718 buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1719 rb->Base.Base.NumSamples > 1) {
1720 intel_renderbuffer_upsample(brw, rb);
1721 }
1722
1723 assert(rb->mt);
1724
1725 brw_bo_unreference(bo);
1726 }
1727
1728 /**
1729 * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1730 *
1731 * To determine which DRI buffers to request, examine the renderbuffers
1732 * attached to the drawable's framebuffer. Then request the buffers from
1733 * the image loader
1734 *
1735 * This is called from intel_update_renderbuffers().
1736 *
1737 * \param drawable Drawable whose buffers are queried.
1738 * \param buffers [out] List of buffers returned by DRI2 query.
1739 * \param buffer_count [out] Number of buffers returned.
1740 *
1741 * \see intel_update_renderbuffers()
1742 */
1743
1744 static void
1745 intel_update_image_buffer(struct brw_context *intel,
1746 __DRIdrawable *drawable,
1747 struct intel_renderbuffer *rb,
1748 __DRIimage *buffer,
1749 enum __DRIimageBufferMask buffer_type)
1750 {
1751 struct gl_framebuffer *fb = drawable->driverPrivate;
1752
1753 if (!rb || !buffer->bo)
1754 return;
1755
1756 unsigned num_samples = rb->Base.Base.NumSamples;
1757
1758 /* Check and see if we're already bound to the right
1759 * buffer object
1760 */
1761 struct intel_mipmap_tree *last_mt;
1762 if (num_samples == 0)
1763 last_mt = rb->mt;
1764 else
1765 last_mt = rb->singlesample_mt;
1766
1767 if (last_mt && last_mt->bo == buffer->bo) {
1768 if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1769 intel_miptree_make_shareable(intel, last_mt);
1770 }
1771 return;
1772 }
1773
1774 /* Only allow internal compression if samples == 0. For multisampled
1775 * window system buffers, the only thing the single-sampled buffer is used
1776 * for is as a resolve target. If we do any compression beyond what is
1777 * supported by the window system, we will just have to resolve so it's
1778 * probably better to just not bother.
1779 */
1780 const bool allow_internal_aux = (num_samples == 0);
1781
1782 struct intel_mipmap_tree *mt =
1783 intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
1784 intel_rb_format(rb),
1785 allow_internal_aux);
1786 if (!mt)
1787 return;
1788
1789 if (!intel_update_winsys_renderbuffer_miptree(intel, rb, mt,
1790 buffer->width, buffer->height,
1791 buffer->pitch)) {
1792 intel_miptree_release(&mt);
1793 return;
1794 }
1795
1796 if (_mesa_is_front_buffer_drawing(fb) &&
1797 buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1798 rb->Base.Base.NumSamples > 1) {
1799 intel_renderbuffer_upsample(intel, rb);
1800 }
1801
1802 if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
1803 /* The compositor and the application may access this image
1804 * concurrently. The display hardware may even scanout the image while
1805 * the GPU is rendering to it. Aux surfaces cause difficulty with
1806 * concurrent access, so permanently disable aux for this miptree.
1807 *
1808 * Perhaps we could improve overall application performance by
1809 * re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
1810 * EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
1811 * returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
1812 * approach to be highly dependent on the application's GL usage.
1813 *
1814 * I [chadv] expect clever disabling/reenabling to be counterproductive
1815 * in the use cases I care about: applications that render nearly
1816 * realtime handwriting to the surface while possibly undergiong
1817 * simultaneously scanout as a display plane. The app requires low
1818 * render latency. Even though the app spends most of its time in
1819 * shared-buffer mode, it also frequently transitions between
1820 * shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
1821 * mode. Visual sutter during the transitions should be avoided.
1822 *
1823 * In this case, I [chadv] believe reducing the GPU workload at
1824 * shared-buffer/double-buffer transitions would offer a smoother app
1825 * experience than any savings due to aux compression. But I've
1826 * collected no data to prove my theory.
1827 */
1828 intel_miptree_make_shareable(intel, mt);
1829 }
1830 }
1831
1832 static void
1833 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1834 {
1835 struct gl_framebuffer *fb = drawable->driverPrivate;
1836 __DRIscreen *dri_screen = brw->screen->driScrnPriv;
1837 struct intel_renderbuffer *front_rb;
1838 struct intel_renderbuffer *back_rb;
1839 struct __DRIimageList images;
1840 mesa_format format;
1841 uint32_t buffer_mask = 0;
1842 int ret;
1843
1844 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1845 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1846
1847 if (back_rb)
1848 format = intel_rb_format(back_rb);
1849 else if (front_rb)
1850 format = intel_rb_format(front_rb);
1851 else
1852 return;
1853
1854 if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1855 _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1856 buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1857 }
1858
1859 if (back_rb)
1860 buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1861
1862 ret = dri_screen->image.loader->getBuffers(drawable,
1863 driGLFormatToImageFormat(format),
1864 &drawable->dri2.stamp,
1865 drawable->loaderPrivate,
1866 buffer_mask,
1867 &images);
1868 if (!ret)
1869 return;
1870
1871 if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1872 drawable->w = images.front->width;
1873 drawable->h = images.front->height;
1874 intel_update_image_buffer(brw,
1875 drawable,
1876 front_rb,
1877 images.front,
1878 __DRI_IMAGE_BUFFER_FRONT);
1879 }
1880
1881 if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1882 drawable->w = images.back->width;
1883 drawable->h = images.back->height;
1884 intel_update_image_buffer(brw,
1885 drawable,
1886 back_rb,
1887 images.back,
1888 __DRI_IMAGE_BUFFER_BACK);
1889 }
1890
1891 if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
1892 assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
1893 drawable->w = images.back->width;
1894 drawable->h = images.back->height;
1895 intel_update_image_buffer(brw,
1896 drawable,
1897 back_rb,
1898 images.back,
1899 __DRI_IMAGE_BUFFER_SHARED);
1900 brw->is_shared_buffer_bound = true;
1901 } else {
1902 brw->is_shared_buffer_bound = false;
1903 brw->is_shared_buffer_dirty = false;
1904 }
1905 }