ac6045dbba944addfb9e9e6f3f8fbad23d521778
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
1 /*
2 Copyright 2003 VMware, Inc.
3 Copyright (C) Intel Corp. 2006. All Rights Reserved.
4 Intel funded Tungsten Graphics to
5 develop this 3D driver.
6
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
14
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 **********************************************************************/
28 /*
29 * Authors:
30 * Keith Whitwell <keithw@vmware.com>
31 */
32
33
34 #include "main/api_exec.h"
35 #include "main/context.h"
36 #include "main/fbobject.h"
37 #include "main/extensions.h"
38 #include "main/imports.h"
39 #include "main/macros.h"
40 #include "main/points.h"
41 #include "main/version.h"
42 #include "main/vtxfmt.h"
43 #include "main/texobj.h"
44 #include "main/framebuffer.h"
45
46 #include "vbo/vbo_context.h"
47
48 #include "drivers/common/driverfuncs.h"
49 #include "drivers/common/meta.h"
50 #include "utils.h"
51
52 #include "brw_context.h"
53 #include "brw_defines.h"
54 #include "brw_compiler.h"
55 #include "brw_draw.h"
56 #include "brw_state.h"
57
58 #include "intel_batchbuffer.h"
59 #include "intel_buffer_objects.h"
60 #include "intel_buffers.h"
61 #include "intel_fbo.h"
62 #include "intel_mipmap_tree.h"
63 #include "intel_pixel.h"
64 #include "intel_image.h"
65 #include "intel_tex.h"
66 #include "intel_tex_obj.h"
67
68 #include "swrast_setup/swrast_setup.h"
69 #include "tnl/tnl.h"
70 #include "tnl/t_pipeline.h"
71 #include "util/ralloc.h"
72
73 /***************************************
74 * Mesa's Driver Functions
75 ***************************************/
76
77 static size_t
78 brw_query_samples_for_format(struct gl_context *ctx, GLenum target,
79 GLenum internalFormat, int samples[16])
80 {
81 struct brw_context *brw = brw_context(ctx);
82
83 (void) target;
84
85 switch (brw->gen) {
86 case 9:
87 samples[0] = 16;
88 samples[1] = 8;
89 samples[2] = 4;
90 samples[3] = 2;
91 return 4;
92
93 case 8:
94 samples[0] = 8;
95 samples[1] = 4;
96 samples[2] = 2;
97 return 3;
98
99 case 7:
100 samples[0] = 8;
101 samples[1] = 4;
102 return 2;
103
104 case 6:
105 samples[0] = 4;
106 return 1;
107
108 default:
109 assert(brw->gen < 6);
110 samples[0] = 1;
111 return 1;
112 }
113 }
114
115 const char *const brw_vendor_string = "Intel Open Source Technology Center";
116
117 const char *
118 brw_get_renderer_string(unsigned deviceID)
119 {
120 const char *chipset;
121 static char buffer[128];
122
123 switch (deviceID) {
124 #undef CHIPSET
125 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
126 #include "pci_ids/i965_pci_ids.h"
127 default:
128 chipset = "Unknown Intel Chipset";
129 break;
130 }
131
132 (void) driGetRendererString(buffer, chipset, 0);
133 return buffer;
134 }
135
136 static const GLubyte *
137 intel_get_string(struct gl_context * ctx, GLenum name)
138 {
139 const struct brw_context *const brw = brw_context(ctx);
140
141 switch (name) {
142 case GL_VENDOR:
143 return (GLubyte *) brw_vendor_string;
144
145 case GL_RENDERER:
146 return
147 (GLubyte *) brw_get_renderer_string(brw->intelScreen->deviceID);
148
149 default:
150 return NULL;
151 }
152 }
153
154 static void
155 intel_viewport(struct gl_context *ctx)
156 {
157 struct brw_context *brw = brw_context(ctx);
158 __DRIcontext *driContext = brw->driContext;
159
160 if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
161 dri2InvalidateDrawable(driContext->driDrawablePriv);
162 dri2InvalidateDrawable(driContext->driReadablePriv);
163 }
164 }
165
166 static void
167 intel_update_state(struct gl_context * ctx, GLuint new_state)
168 {
169 struct brw_context *brw = brw_context(ctx);
170 struct intel_texture_object *tex_obj;
171 struct intel_renderbuffer *depth_irb;
172
173 if (ctx->swrast_context)
174 _swrast_InvalidateState(ctx, new_state);
175 _vbo_InvalidateState(ctx, new_state);
176
177 brw->NewGLState |= new_state;
178
179 _mesa_unlock_context_textures(ctx);
180
181 /* Resolve the depth buffer's HiZ buffer. */
182 depth_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
183 if (depth_irb)
184 intel_renderbuffer_resolve_hiz(brw, depth_irb);
185
186 /* Resolve depth buffer and render cache of each enabled texture. */
187 int maxEnabledUnit = ctx->Texture._MaxEnabledTexImageUnit;
188 for (int i = 0; i <= maxEnabledUnit; i++) {
189 if (!ctx->Texture.Unit[i]._Current)
190 continue;
191 tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
192 if (!tex_obj || !tex_obj->mt)
193 continue;
194 intel_miptree_all_slices_resolve_depth(brw, tex_obj->mt);
195 intel_miptree_resolve_color(brw, tex_obj->mt);
196 brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
197 }
198
199 _mesa_lock_context_textures(ctx);
200 }
201
202 #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
203
204 static void
205 intel_flush_front(struct gl_context *ctx)
206 {
207 struct brw_context *brw = brw_context(ctx);
208 __DRIcontext *driContext = brw->driContext;
209 __DRIdrawable *driDrawable = driContext->driDrawablePriv;
210 __DRIscreen *const screen = brw->intelScreen->driScrnPriv;
211
212 if (brw->front_buffer_dirty && _mesa_is_winsys_fbo(ctx->DrawBuffer)) {
213 if (flushFront(screen) && driDrawable &&
214 driDrawable->loaderPrivate) {
215
216 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
217 *
218 * This potentially resolves both front and back buffer. It
219 * is unnecessary to resolve the back, but harms nothing except
220 * performance. And no one cares about front-buffer render
221 * performance.
222 */
223 intel_resolve_for_dri2_flush(brw, driDrawable);
224 intel_batchbuffer_flush(brw);
225
226 flushFront(screen)(driDrawable, driDrawable->loaderPrivate);
227
228 /* We set the dirty bit in intel_prepare_render() if we're
229 * front buffer rendering once we get there.
230 */
231 brw->front_buffer_dirty = false;
232 }
233 }
234 }
235
236 static void
237 intel_glFlush(struct gl_context *ctx)
238 {
239 struct brw_context *brw = brw_context(ctx);
240
241 intel_batchbuffer_flush(brw);
242 intel_flush_front(ctx);
243
244 brw->need_flush_throttle = true;
245 }
246
247 static void
248 intel_finish(struct gl_context * ctx)
249 {
250 struct brw_context *brw = brw_context(ctx);
251
252 intel_glFlush(ctx);
253
254 if (brw->batch.last_bo)
255 drm_intel_bo_wait_rendering(brw->batch.last_bo);
256 }
257
258 static void
259 brw_init_driver_functions(struct brw_context *brw,
260 struct dd_function_table *functions)
261 {
262 _mesa_init_driver_functions(functions);
263
264 /* GLX uses DRI2 invalidate events to handle window resizing.
265 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
266 * which doesn't provide a mechanism for snooping the event queues.
267 *
268 * So EGL still relies on viewport hacks to handle window resizing.
269 * This should go away with DRI3000.
270 */
271 if (!brw->driContext->driScreenPriv->dri2.useInvalidate)
272 functions->Viewport = intel_viewport;
273
274 functions->Flush = intel_glFlush;
275 functions->Finish = intel_finish;
276 functions->GetString = intel_get_string;
277 functions->UpdateState = intel_update_state;
278
279 intelInitTextureFuncs(functions);
280 intelInitTextureImageFuncs(functions);
281 intelInitTextureSubImageFuncs(functions);
282 intelInitTextureCopyImageFuncs(functions);
283 intelInitCopyImageFuncs(functions);
284 intelInitClearFuncs(functions);
285 intelInitBufferFuncs(functions);
286 intelInitPixelFuncs(functions);
287 intelInitBufferObjectFuncs(functions);
288 intel_init_syncobj_functions(functions);
289 brw_init_object_purgeable_functions(functions);
290
291 brwInitFragProgFuncs( functions );
292 brw_init_common_queryobj_functions(functions);
293 if (brw->gen >= 6)
294 gen6_init_queryobj_functions(functions);
295 else
296 gen4_init_queryobj_functions(functions);
297 brw_init_compute_functions(functions);
298 if (brw->gen >= 7)
299 brw_init_conditional_render_functions(functions);
300
301 functions->QuerySamplesForFormat = brw_query_samples_for_format;
302
303 functions->NewTransformFeedback = brw_new_transform_feedback;
304 functions->DeleteTransformFeedback = brw_delete_transform_feedback;
305 functions->GetTransformFeedbackVertexCount =
306 brw_get_transform_feedback_vertex_count;
307 if (brw->gen >= 7) {
308 functions->BeginTransformFeedback = gen7_begin_transform_feedback;
309 functions->EndTransformFeedback = gen7_end_transform_feedback;
310 functions->PauseTransformFeedback = gen7_pause_transform_feedback;
311 functions->ResumeTransformFeedback = gen7_resume_transform_feedback;
312 } else {
313 functions->BeginTransformFeedback = brw_begin_transform_feedback;
314 functions->EndTransformFeedback = brw_end_transform_feedback;
315 }
316
317 if (brw->gen >= 6)
318 functions->GetSamplePosition = gen6_get_sample_position;
319 }
320
321 static void
322 brw_initialize_context_constants(struct brw_context *brw)
323 {
324 struct gl_context *ctx = &brw->ctx;
325
326 unsigned max_samplers =
327 brw->gen >= 8 || brw->is_haswell ? BRW_MAX_TEX_UNIT : 16;
328
329 ctx->Const.QueryCounterBits.Timestamp = 36;
330
331 ctx->Const.StripTextureBorder = true;
332
333 ctx->Const.MaxUniformBlockSize = 65536;
334 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
335 struct gl_program_constants *prog = &ctx->Const.Program[i];
336 prog->MaxUniformBlocks = BRW_MAX_UBO;
337 prog->MaxCombinedUniformComponents =
338 prog->MaxUniformComponents +
339 ctx->Const.MaxUniformBlockSize / 4 * prog->MaxUniformBlocks;
340 }
341
342 ctx->Const.MaxDualSourceDrawBuffers = 1;
343 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
344 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = max_samplers;
345 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
346 ctx->Const.MaxTextureUnits =
347 MIN2(ctx->Const.MaxTextureCoordUnits,
348 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits);
349 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = max_samplers;
350 if (brw->gen >= 6)
351 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = max_samplers;
352 else
353 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 0;
354 if (_mesa_extension_override_enables.ARB_compute_shader) {
355 ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
356 ctx->Const.MaxUniformBufferBindings += BRW_MAX_UBO;
357 } else {
358 ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 0;
359 }
360 ctx->Const.MaxCombinedTextureImageUnits =
361 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits +
362 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits +
363 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits +
364 ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
365
366 ctx->Const.MaxTextureLevels = 14; /* 8192 */
367 if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
368 ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
369 ctx->Const.Max3DTextureLevels = 12; /* 2048 */
370 ctx->Const.MaxCubeTextureLevels = 14; /* 8192 */
371 ctx->Const.MaxTextureMbytes = 1536;
372
373 if (brw->gen >= 7)
374 ctx->Const.MaxArrayTextureLayers = 2048;
375 else
376 ctx->Const.MaxArrayTextureLayers = 512;
377
378 ctx->Const.MaxTextureRectSize = 1 << 12;
379
380 ctx->Const.MaxTextureMaxAnisotropy = 16.0;
381
382 ctx->Const.MaxRenderbufferSize = 8192;
383
384 /* Hardware only supports a limited number of transform feedback buffers.
385 * So we need to override the Mesa default (which is based only on software
386 * limits).
387 */
388 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
389
390 /* On Gen6, in the worst case, we use up one binding table entry per
391 * transform feedback component (see comments above the definition of
392 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
393 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
394 * BRW_MAX_SOL_BINDINGS.
395 *
396 * In "separate components" mode, we need to divide this value by
397 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
398 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
399 */
400 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
401 ctx->Const.MaxTransformFeedbackSeparateComponents =
402 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
403
404 ctx->Const.AlwaysUseGetTransformFeedbackVertexCount = true;
405
406 int max_samples;
407 const int *msaa_modes = intel_supported_msaa_modes(brw->intelScreen);
408 const int clamp_max_samples =
409 driQueryOptioni(&brw->optionCache, "clamp_max_samples");
410
411 if (clamp_max_samples < 0) {
412 max_samples = msaa_modes[0];
413 } else {
414 /* Select the largest supported MSAA mode that does not exceed
415 * clamp_max_samples.
416 */
417 max_samples = 0;
418 for (int i = 0; msaa_modes[i] != 0; ++i) {
419 if (msaa_modes[i] <= clamp_max_samples) {
420 max_samples = msaa_modes[i];
421 break;
422 }
423 }
424 }
425
426 ctx->Const.MaxSamples = max_samples;
427 ctx->Const.MaxColorTextureSamples = max_samples;
428 ctx->Const.MaxDepthTextureSamples = max_samples;
429 ctx->Const.MaxIntegerSamples = max_samples;
430
431 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
432 * to map indices of rectangular grid to sample numbers within a pixel.
433 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
434 * extension implementation. For more details see the comment above
435 * gen6_set_sample_maps() definition.
436 */
437 gen6_set_sample_maps(ctx);
438
439 if (brw->gen >= 7)
440 ctx->Const.MaxProgramTextureGatherComponents = 4;
441 else if (brw->gen == 6)
442 ctx->Const.MaxProgramTextureGatherComponents = 1;
443
444 ctx->Const.MinLineWidth = 1.0;
445 ctx->Const.MinLineWidthAA = 1.0;
446 if (brw->gen >= 6) {
447 ctx->Const.MaxLineWidth = 7.375;
448 ctx->Const.MaxLineWidthAA = 7.375;
449 ctx->Const.LineWidthGranularity = 0.125;
450 } else {
451 ctx->Const.MaxLineWidth = 7.0;
452 ctx->Const.MaxLineWidthAA = 7.0;
453 ctx->Const.LineWidthGranularity = 0.5;
454 }
455
456 /* For non-antialiased lines, we have to round the line width to the
457 * nearest whole number. Make sure that we don't advertise a line
458 * width that, when rounded, will be beyond the actual hardware
459 * maximum.
460 */
461 assert(roundf(ctx->Const.MaxLineWidth) <= ctx->Const.MaxLineWidth);
462
463 ctx->Const.MinPointSize = 1.0;
464 ctx->Const.MinPointSizeAA = 1.0;
465 ctx->Const.MaxPointSize = 255.0;
466 ctx->Const.MaxPointSizeAA = 255.0;
467 ctx->Const.PointSizeGranularity = 1.0;
468
469 if (brw->gen >= 5 || brw->is_g4x)
470 ctx->Const.MaxClipPlanes = 8;
471
472 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeInstructions = 16 * 1024;
473 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAluInstructions = 0;
474 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexInstructions = 0;
475 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTexIndirections = 0;
476 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAluInstructions = 0;
477 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexInstructions = 0;
478 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTexIndirections = 0;
479 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAttribs = 16;
480 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeTemps = 256;
481 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeAddressRegs = 1;
482 ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters = 1024;
483 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams =
484 MIN2(ctx->Const.Program[MESA_SHADER_VERTEX].MaxNativeParameters,
485 ctx->Const.Program[MESA_SHADER_VERTEX].MaxEnvParams);
486
487 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeInstructions = 1024;
488 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAluInstructions = 1024;
489 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexInstructions = 1024;
490 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTexIndirections = 1024;
491 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAttribs = 12;
492 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeTemps = 256;
493 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeAddressRegs = 0;
494 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters = 1024;
495 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams =
496 MIN2(ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxNativeParameters,
497 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxEnvParams);
498
499 /* Fragment shaders use real, 32-bit twos-complement integers for all
500 * integer types.
501 */
502 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMin = 31;
503 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.RangeMax = 30;
504 ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt.Precision = 0;
505 ctx->Const.Program[MESA_SHADER_FRAGMENT].HighInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
506 ctx->Const.Program[MESA_SHADER_FRAGMENT].MediumInt = ctx->Const.Program[MESA_SHADER_FRAGMENT].LowInt;
507
508 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMin = 31;
509 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.RangeMax = 30;
510 ctx->Const.Program[MESA_SHADER_VERTEX].LowInt.Precision = 0;
511 ctx->Const.Program[MESA_SHADER_VERTEX].HighInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
512 ctx->Const.Program[MESA_SHADER_VERTEX].MediumInt = ctx->Const.Program[MESA_SHADER_VERTEX].LowInt;
513
514 if (brw->gen >= 7) {
515 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
516 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
517 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
518 ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicCounters = MAX_ATOMIC_COUNTERS;
519 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers = BRW_MAX_ABO;
520 ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers = BRW_MAX_ABO;
521 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers = BRW_MAX_ABO;
522 ctx->Const.Program[MESA_SHADER_COMPUTE].MaxAtomicBuffers = BRW_MAX_ABO;
523 ctx->Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
524
525 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms =
526 BRW_MAX_IMAGES;
527 ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms =
528 (brw->intelScreen->compiler->scalar_vs ? BRW_MAX_IMAGES : 0);
529 ctx->Const.Program[MESA_SHADER_COMPUTE].MaxImageUniforms =
530 BRW_MAX_IMAGES;
531 ctx->Const.MaxImageUnits = MAX_IMAGE_UNITS;
532 ctx->Const.MaxCombinedShaderOutputResources =
533 MAX_IMAGE_UNITS + BRW_MAX_DRAW_BUFFERS;
534 ctx->Const.MaxImageSamples = 0;
535 ctx->Const.MaxCombinedImageUniforms = 3 * BRW_MAX_IMAGES;
536 }
537
538 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
539 * but we're not sure how it's actually done for vertex order,
540 * that affect provoking vertex decision. Always use last vertex
541 * convention for quad primitive which works as expected for now.
542 */
543 if (brw->gen >= 6)
544 ctx->Const.QuadsFollowProvokingVertexConvention = false;
545
546 ctx->Const.NativeIntegers = true;
547 ctx->Const.VertexID_is_zero_based = true;
548
549 /* Regarding the CMP instruction, the Ivybridge PRM says:
550 *
551 * "For each enabled channel 0b or 1b is assigned to the appropriate flag
552 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
553 * 0xFFFFFFFF) is assigned to dst."
554 *
555 * but PRMs for earlier generations say
556 *
557 * "In dword format, one GRF may store up to 8 results. When the register
558 * is used later as a vector of Booleans, as only LSB at each channel
559 * contains meaning [sic] data, software should make sure all higher bits
560 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
561 *
562 * We select the representation of a true boolean uniform to be ~0, and fix
563 * the results of Gen <= 5 CMP instruction's with -(result & 1).
564 */
565 ctx->Const.UniformBooleanTrue = ~0;
566
567 /* From the gen4 PRM, volume 4 page 127:
568 *
569 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
570 * the base address of the first element of the surface, computed in
571 * software by adding the surface base address to the byte offset of
572 * the element in the buffer."
573 *
574 * However, unaligned accesses are slower, so enforce buffer alignment.
575 */
576 ctx->Const.UniformBufferOffsetAlignment = 16;
577
578 /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
579 * that we can safely have the CPU and GPU writing the same SSBO on
580 * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
581 * writes, so there's no problem. For an SSBO, the GPU and the CPU can
582 * be updating disjoint regions of the buffer simultaneously and that will
583 * break if the regions overlap the same cacheline.
584 */
585 ctx->Const.ShaderStorageBufferOffsetAlignment = 64;
586 ctx->Const.TextureBufferOffsetAlignment = 16;
587 ctx->Const.MaxTextureBufferSize = 128 * 1024 * 1024;
588
589 /* FIXME: Tessellation stages are not yet supported in i965, so
590 * MaxCombinedShaderStorageBlocks doesn't take them into account.
591 */
592 ctx->Const.Program[MESA_SHADER_VERTEX].MaxShaderStorageBlocks = BRW_MAX_SSBO;
593 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxShaderStorageBlocks = BRW_MAX_SSBO;
594 ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxShaderStorageBlocks = 0;
595 ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxShaderStorageBlocks = 0;
596 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxShaderStorageBlocks = BRW_MAX_SSBO;
597 ctx->Const.Program[MESA_SHADER_COMPUTE].MaxShaderStorageBlocks = BRW_MAX_SSBO;
598 ctx->Const.MaxCombinedShaderStorageBlocks = BRW_MAX_SSBO * 3;
599 ctx->Const.MaxShaderStorageBufferBindings = BRW_MAX_SSBO * 3;
600
601 if (_mesa_extension_override_enables.ARB_compute_shader)
602 ctx->Const.MaxShaderStorageBufferBindings += BRW_MAX_SSBO;
603
604 if (brw->gen >= 6) {
605 ctx->Const.MaxVarying = 32;
606 ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 128;
607 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = 64;
608 ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128;
609 ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 128;
610 }
611
612 /* We want the GLSL compiler to emit code that uses condition codes */
613 for (int i = 0; i < MESA_SHADER_STAGES; i++) {
614 ctx->Const.ShaderCompilerOptions[i] =
615 brw->intelScreen->compiler->glsl_compiler_options[i];
616 }
617
618 /* ARB_viewport_array */
619 if (brw->gen >= 6 && ctx->API == API_OPENGL_CORE) {
620 ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
621 ctx->Const.ViewportSubpixelBits = 0;
622
623 /* Cast to float before negating because MaxViewportWidth is unsigned.
624 */
625 ctx->Const.ViewportBounds.Min = -(float)ctx->Const.MaxViewportWidth;
626 ctx->Const.ViewportBounds.Max = ctx->Const.MaxViewportWidth;
627 }
628
629 /* ARB_gpu_shader5 */
630 if (brw->gen >= 7)
631 ctx->Const.MaxVertexStreams = MIN2(4, MAX_VERTEX_STREAMS);
632
633 /* ARB_framebuffer_no_attachments */
634 ctx->Const.MaxFramebufferWidth = ctx->Const.MaxViewportWidth;
635 ctx->Const.MaxFramebufferHeight = ctx->Const.MaxViewportHeight;
636 ctx->Const.MaxFramebufferLayers = ctx->Const.MaxArrayTextureLayers;
637 ctx->Const.MaxFramebufferSamples = max_samples;
638 }
639
640 static void
641 brw_adjust_cs_context_constants(struct brw_context *brw)
642 {
643 struct gl_context *ctx = &brw->ctx;
644
645 /* For ES, we set these constants based on SIMD8.
646 *
647 * TODO: Once we can always generate SIMD16, we should update this.
648 *
649 * For GL, we assume we can generate a SIMD16 program, but this currently
650 * is not always true. This allows us to run more test cases, and will be
651 * required based on desktop GL compute shader requirements.
652 */
653 const int simd_size = ctx->API == API_OPENGL_CORE ? 16 : 8;
654
655 const uint32_t max_invocations = simd_size * brw->max_cs_threads;
656 ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
657 ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
658 ctx->Const.MaxComputeWorkGroupSize[2] = max_invocations;
659 ctx->Const.MaxComputeWorkGroupInvocations = max_invocations;
660 }
661
662 /**
663 * Process driconf (drirc) options, setting appropriate context flags.
664 *
665 * intelInitExtensions still pokes at optionCache directly, in order to
666 * avoid advertising various extensions. No flags are set, so it makes
667 * sense to continue doing that there.
668 */
669 static void
670 brw_process_driconf_options(struct brw_context *brw)
671 {
672 struct gl_context *ctx = &brw->ctx;
673
674 driOptionCache *options = &brw->optionCache;
675 driParseConfigFiles(options, &brw->intelScreen->optionCache,
676 brw->driContext->driScreenPriv->myNum, "i965");
677
678 int bo_reuse_mode = driQueryOptioni(options, "bo_reuse");
679 switch (bo_reuse_mode) {
680 case DRI_CONF_BO_REUSE_DISABLED:
681 break;
682 case DRI_CONF_BO_REUSE_ALL:
683 intel_bufmgr_gem_enable_reuse(brw->bufmgr);
684 break;
685 }
686
687 if (!driQueryOptionb(options, "hiz")) {
688 brw->has_hiz = false;
689 /* On gen6, you can only do separate stencil with HIZ. */
690 if (brw->gen == 6)
691 brw->has_separate_stencil = false;
692 }
693
694 if (driQueryOptionb(options, "always_flush_batch")) {
695 fprintf(stderr, "flushing batchbuffer before/after each draw call\n");
696 brw->always_flush_batch = true;
697 }
698
699 if (driQueryOptionb(options, "always_flush_cache")) {
700 fprintf(stderr, "flushing GPU caches before/after each draw call\n");
701 brw->always_flush_cache = true;
702 }
703
704 if (driQueryOptionb(options, "disable_throttling")) {
705 fprintf(stderr, "disabling flush throttling\n");
706 brw->disable_throttling = true;
707 }
708
709 brw->precompile = driQueryOptionb(&brw->optionCache, "shader_precompile");
710
711 ctx->Const.ForceGLSLExtensionsWarn =
712 driQueryOptionb(options, "force_glsl_extensions_warn");
713
714 ctx->Const.DisableGLSLLineContinuations =
715 driQueryOptionb(options, "disable_glsl_line_continuations");
716
717 ctx->Const.AllowGLSLExtensionDirectiveMidShader =
718 driQueryOptionb(options, "allow_glsl_extension_directive_midshader");
719 }
720
721 GLboolean
722 brwCreateContext(gl_api api,
723 const struct gl_config *mesaVis,
724 __DRIcontext *driContextPriv,
725 unsigned major_version,
726 unsigned minor_version,
727 uint32_t flags,
728 bool notify_reset,
729 unsigned *dri_ctx_error,
730 void *sharedContextPrivate)
731 {
732 __DRIscreen *sPriv = driContextPriv->driScreenPriv;
733 struct gl_context *shareCtx = (struct gl_context *) sharedContextPrivate;
734 struct intel_screen *screen = sPriv->driverPrivate;
735 const struct brw_device_info *devinfo = screen->devinfo;
736 struct dd_function_table functions;
737
738 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
739 * provides us with context reset notifications.
740 */
741 uint32_t allowed_flags = __DRI_CTX_FLAG_DEBUG
742 | __DRI_CTX_FLAG_FORWARD_COMPATIBLE;
743
744 if (screen->has_context_reset_notification)
745 allowed_flags |= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS;
746
747 if (flags & ~allowed_flags) {
748 *dri_ctx_error = __DRI_CTX_ERROR_UNKNOWN_FLAG;
749 return false;
750 }
751
752 struct brw_context *brw = rzalloc(NULL, struct brw_context);
753 if (!brw) {
754 fprintf(stderr, "%s: failed to alloc context\n", __func__);
755 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
756 return false;
757 }
758
759 driContextPriv->driverPrivate = brw;
760 brw->driContext = driContextPriv;
761 brw->intelScreen = screen;
762 brw->bufmgr = screen->bufmgr;
763
764 brw->gen = devinfo->gen;
765 brw->gt = devinfo->gt;
766 brw->is_g4x = devinfo->is_g4x;
767 brw->is_baytrail = devinfo->is_baytrail;
768 brw->is_haswell = devinfo->is_haswell;
769 brw->is_cherryview = devinfo->is_cherryview;
770 brw->is_broxton = devinfo->is_broxton;
771 brw->has_llc = devinfo->has_llc;
772 brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
773 brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
774 brw->has_pln = devinfo->has_pln;
775 brw->has_compr4 = devinfo->has_compr4;
776 brw->has_surface_tile_offset = devinfo->has_surface_tile_offset;
777 brw->has_negative_rhw_bug = devinfo->has_negative_rhw_bug;
778 brw->needs_unlit_centroid_workaround =
779 devinfo->needs_unlit_centroid_workaround;
780
781 brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
782 brw->has_swizzling = screen->hw_has_swizzling;
783
784 brw->vs.base.stage = MESA_SHADER_VERTEX;
785 brw->gs.base.stage = MESA_SHADER_GEOMETRY;
786 brw->wm.base.stage = MESA_SHADER_FRAGMENT;
787 if (brw->gen >= 8) {
788 gen8_init_vtable_surface_functions(brw);
789 brw->vtbl.emit_depth_stencil_hiz = gen8_emit_depth_stencil_hiz;
790 } else if (brw->gen >= 7) {
791 gen7_init_vtable_surface_functions(brw);
792 brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
793 } else if (brw->gen >= 6) {
794 gen6_init_vtable_surface_functions(brw);
795 brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
796 } else {
797 gen4_init_vtable_surface_functions(brw);
798 brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
799 }
800
801 brw_init_driver_functions(brw, &functions);
802
803 if (notify_reset)
804 functions.GetGraphicsResetStatus = brw_get_graphics_reset_status;
805
806 struct gl_context *ctx = &brw->ctx;
807
808 if (!_mesa_initialize_context(ctx, api, mesaVis, shareCtx, &functions)) {
809 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
810 fprintf(stderr, "%s: failed to init mesa context\n", __func__);
811 intelDestroyContext(driContextPriv);
812 return false;
813 }
814
815 driContextSetFlags(ctx, flags);
816
817 /* Initialize the software rasterizer and helper modules.
818 *
819 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
820 * software fallbacks (which we have to support on legacy GL to do weird
821 * glDrawPixels(), glBitmap(), and other functions).
822 */
823 if (api != API_OPENGL_CORE && api != API_OPENGLES2) {
824 _swrast_CreateContext(ctx);
825 }
826
827 _vbo_CreateContext(ctx);
828 if (ctx->swrast_context) {
829 _tnl_CreateContext(ctx);
830 TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
831 _swsetup_CreateContext(ctx);
832
833 /* Configure swrast to match hardware characteristics: */
834 _swrast_allow_pixel_fog(ctx, false);
835 _swrast_allow_vertex_fog(ctx, true);
836 }
837
838 _mesa_meta_init(ctx);
839
840 brw_process_driconf_options(brw);
841
842 if (INTEL_DEBUG & DEBUG_PERF)
843 brw->perf_debug = true;
844
845 brw_initialize_context_constants(brw);
846
847 ctx->Const.ResetStrategy = notify_reset
848 ? GL_LOSE_CONTEXT_ON_RESET_ARB : GL_NO_RESET_NOTIFICATION_ARB;
849
850 /* Reinitialize the context point state. It depends on ctx->Const values. */
851 _mesa_init_point(ctx);
852
853 intel_fbo_init(brw);
854
855 intel_batchbuffer_init(brw);
856
857 if (brw->gen >= 6) {
858 /* Create a new hardware context. Using a hardware context means that
859 * our GPU state will be saved/restored on context switch, allowing us
860 * to assume that the GPU is in the same state we left it in.
861 *
862 * This is required for transform feedback buffer offsets, query objects,
863 * and also allows us to reduce how much state we have to emit.
864 */
865 brw->hw_ctx = drm_intel_gem_context_create(brw->bufmgr);
866
867 if (!brw->hw_ctx) {
868 fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n");
869 intelDestroyContext(driContextPriv);
870 return false;
871 }
872 }
873
874 if (brw_init_pipe_control(brw, devinfo)) {
875 *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
876 intelDestroyContext(driContextPriv);
877 return false;
878 }
879
880 brw_init_state(brw);
881
882 intelInitExtensions(ctx);
883
884 brw_init_surface_formats(brw);
885
886 brw->max_vs_threads = devinfo->max_vs_threads;
887 brw->max_hs_threads = devinfo->max_hs_threads;
888 brw->max_ds_threads = devinfo->max_ds_threads;
889 brw->max_gs_threads = devinfo->max_gs_threads;
890 brw->max_wm_threads = devinfo->max_wm_threads;
891 brw->max_cs_threads = devinfo->max_cs_threads;
892 brw->urb.size = devinfo->urb.size;
893 brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
894 brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
895 brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
896 brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
897 brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
898
899 brw_adjust_cs_context_constants(brw);
900
901 /* Estimate the size of the mappable aperture into the GTT. There's an
902 * ioctl to get the whole GTT size, but not one to get the mappable subset.
903 * It turns out it's basically always 256MB, though some ancient hardware
904 * was smaller.
905 */
906 uint32_t gtt_size = 256 * 1024 * 1024;
907
908 /* We don't want to map two objects such that a memcpy between them would
909 * just fault one mapping in and then the other over and over forever. So
910 * we would need to divide the GTT size by 2. Additionally, some GTT is
911 * taken up by things like the framebuffer and the ringbuffer and such, so
912 * be more conservative.
913 */
914 brw->max_gtt_map_object_size = gtt_size / 4;
915
916 if (brw->gen == 6)
917 brw->urb.gs_present = false;
918
919 brw->prim_restart.in_progress = false;
920 brw->prim_restart.enable_cut_index = false;
921 brw->gs.enabled = false;
922 brw->sf.viewport_transform_enable = true;
923
924 brw->predicate.state = BRW_PREDICATE_STATE_RENDER;
925
926 brw->use_resource_streamer = screen->has_resource_streamer &&
927 (brw_env_var_as_boolean("INTEL_USE_HW_BT", false) ||
928 brw_env_var_as_boolean("INTEL_USE_GATHER", false));
929
930 ctx->VertexProgram._MaintainTnlProgram = true;
931 ctx->FragmentProgram._MaintainTexEnvProgram = true;
932
933 brw_draw_init( brw );
934
935 if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
936 /* Turn on some extra GL_ARB_debug_output generation. */
937 brw->perf_debug = true;
938 }
939
940 if ((flags & __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS) != 0)
941 ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB;
942
943 if (INTEL_DEBUG & DEBUG_SHADER_TIME)
944 brw_init_shader_time(brw);
945
946 _mesa_compute_version(ctx);
947
948 _mesa_initialize_dispatch_tables(ctx);
949 _mesa_initialize_vbo_vtxfmt(ctx);
950
951 if (ctx->Extensions.AMD_performance_monitor) {
952 brw_init_performance_monitors(brw);
953 }
954
955 vbo_use_buffer_objects(ctx);
956 vbo_always_unmap_buffers(ctx);
957
958 return true;
959 }
960
961 void
962 intelDestroyContext(__DRIcontext * driContextPriv)
963 {
964 struct brw_context *brw =
965 (struct brw_context *) driContextPriv->driverPrivate;
966 struct gl_context *ctx = &brw->ctx;
967
968 /* Dump a final BMP in case the application doesn't call SwapBuffers */
969 if (INTEL_DEBUG & DEBUG_AUB) {
970 intel_batchbuffer_flush(brw);
971 aub_dump_bmp(&brw->ctx);
972 }
973
974 _mesa_meta_free(&brw->ctx);
975 brw_meta_fast_clear_free(brw);
976
977 if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
978 /* Force a report. */
979 brw->shader_time.report_time = 0;
980
981 brw_collect_and_report_shader_time(brw);
982 brw_destroy_shader_time(brw);
983 }
984
985 brw_destroy_state(brw);
986 brw_draw_destroy(brw);
987
988 drm_intel_bo_unreference(brw->curbe.curbe_bo);
989 if (brw->vs.base.scratch_bo)
990 drm_intel_bo_unreference(brw->vs.base.scratch_bo);
991 if (brw->gs.base.scratch_bo)
992 drm_intel_bo_unreference(brw->gs.base.scratch_bo);
993 if (brw->wm.base.scratch_bo)
994 drm_intel_bo_unreference(brw->wm.base.scratch_bo);
995
996 gen7_reset_hw_bt_pool_offsets(brw);
997 drm_intel_bo_unreference(brw->hw_bt_pool.bo);
998 brw->hw_bt_pool.bo = NULL;
999
1000 drm_intel_gem_context_destroy(brw->hw_ctx);
1001
1002 if (ctx->swrast_context) {
1003 _swsetup_DestroyContext(&brw->ctx);
1004 _tnl_DestroyContext(&brw->ctx);
1005 }
1006 _vbo_DestroyContext(&brw->ctx);
1007
1008 if (ctx->swrast_context)
1009 _swrast_DestroyContext(&brw->ctx);
1010
1011 brw_fini_pipe_control(brw);
1012 intel_batchbuffer_free(brw);
1013
1014 drm_intel_bo_unreference(brw->throttle_batch[1]);
1015 drm_intel_bo_unreference(brw->throttle_batch[0]);
1016 brw->throttle_batch[1] = NULL;
1017 brw->throttle_batch[0] = NULL;
1018
1019 driDestroyOptionCache(&brw->optionCache);
1020
1021 /* free the Mesa context */
1022 _mesa_free_context_data(&brw->ctx);
1023
1024 ralloc_free(brw);
1025 driContextPriv->driverPrivate = NULL;
1026 }
1027
1028 GLboolean
1029 intelUnbindContext(__DRIcontext * driContextPriv)
1030 {
1031 /* Unset current context and dispath table */
1032 _mesa_make_current(NULL, NULL, NULL);
1033
1034 return true;
1035 }
1036
1037 /**
1038 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1039 * on window system framebuffers.
1040 *
1041 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1042 * your renderbuffer can do sRGB encode, and you can flip a switch that does
1043 * sRGB encode if the renderbuffer can handle it. You can ask specifically
1044 * for a visual where you're guaranteed to be capable, but it turns out that
1045 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1046 * incapable ones, because there's no difference between the two in resources
1047 * used. Applications thus get built that accidentally rely on the default
1048 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds
1049 * great...
1050 *
1051 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1052 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1053 * So they removed the enable knob and made it "if the renderbuffer is sRGB
1054 * capable, do sRGB encode". Then, for your window system renderbuffers, you
1055 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1056 * and get no sRGB encode (assuming that both kinds of visual are available).
1057 * Thus our choice to support sRGB by default on our visuals for desktop would
1058 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1059 *
1060 * Unfortunately, renderbuffer setup happens before a context is created. So
1061 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1062 * context (without an sRGB visual, though we don't have sRGB visuals exposed
1063 * yet), we go turn that back off before anyone finds out.
1064 */
1065 static void
1066 intel_gles3_srgb_workaround(struct brw_context *brw,
1067 struct gl_framebuffer *fb)
1068 {
1069 struct gl_context *ctx = &brw->ctx;
1070
1071 if (_mesa_is_desktop_gl(ctx) || !fb->Visual.sRGBCapable)
1072 return;
1073
1074 /* Some day when we support the sRGB capable bit on visuals available for
1075 * GLES, we'll need to respect that and not disable things here.
1076 */
1077 fb->Visual.sRGBCapable = false;
1078 for (int i = 0; i < BUFFER_COUNT; i++) {
1079 if (fb->Attachment[i].Renderbuffer &&
1080 fb->Attachment[i].Renderbuffer->Format == MESA_FORMAT_B8G8R8A8_SRGB) {
1081 fb->Attachment[i].Renderbuffer->Format = MESA_FORMAT_B8G8R8A8_UNORM;
1082 }
1083 }
1084 }
1085
1086 GLboolean
1087 intelMakeCurrent(__DRIcontext * driContextPriv,
1088 __DRIdrawable * driDrawPriv,
1089 __DRIdrawable * driReadPriv)
1090 {
1091 struct brw_context *brw;
1092 GET_CURRENT_CONTEXT(curCtx);
1093
1094 if (driContextPriv)
1095 brw = (struct brw_context *) driContextPriv->driverPrivate;
1096 else
1097 brw = NULL;
1098
1099 /* According to the glXMakeCurrent() man page: "Pending commands to
1100 * the previous context, if any, are flushed before it is released."
1101 * But only flush if we're actually changing contexts.
1102 */
1103 if (brw_context(curCtx) && brw_context(curCtx) != brw) {
1104 _mesa_flush(curCtx);
1105 }
1106
1107 if (driContextPriv) {
1108 struct gl_context *ctx = &brw->ctx;
1109 struct gl_framebuffer *fb, *readFb;
1110
1111 if (driDrawPriv == NULL) {
1112 fb = _mesa_get_incomplete_framebuffer();
1113 } else {
1114 fb = driDrawPriv->driverPrivate;
1115 driContextPriv->dri2.draw_stamp = driDrawPriv->dri2.stamp - 1;
1116 }
1117
1118 if (driReadPriv == NULL) {
1119 readFb = _mesa_get_incomplete_framebuffer();
1120 } else {
1121 readFb = driReadPriv->driverPrivate;
1122 driContextPriv->dri2.read_stamp = driReadPriv->dri2.stamp - 1;
1123 }
1124
1125 /* The sRGB workaround changes the renderbuffer's format. We must change
1126 * the format before the renderbuffer's miptree get's allocated, otherwise
1127 * the formats of the renderbuffer and its miptree will differ.
1128 */
1129 intel_gles3_srgb_workaround(brw, fb);
1130 intel_gles3_srgb_workaround(brw, readFb);
1131
1132 /* If the context viewport hasn't been initialized, force a call out to
1133 * the loader to get buffers so we have a drawable size for the initial
1134 * viewport. */
1135 if (!brw->ctx.ViewportInitialized)
1136 intel_prepare_render(brw);
1137
1138 _mesa_make_current(ctx, fb, readFb);
1139 } else {
1140 _mesa_make_current(NULL, NULL, NULL);
1141 }
1142
1143 return true;
1144 }
1145
1146 void
1147 intel_resolve_for_dri2_flush(struct brw_context *brw,
1148 __DRIdrawable *drawable)
1149 {
1150 if (brw->gen < 6) {
1151 /* MSAA and fast color clear are not supported, so don't waste time
1152 * checking whether a resolve is needed.
1153 */
1154 return;
1155 }
1156
1157 struct gl_framebuffer *fb = drawable->driverPrivate;
1158 struct intel_renderbuffer *rb;
1159
1160 /* Usually, only the back buffer will need to be downsampled. However,
1161 * the front buffer will also need it if the user has rendered into it.
1162 */
1163 static const gl_buffer_index buffers[2] = {
1164 BUFFER_BACK_LEFT,
1165 BUFFER_FRONT_LEFT,
1166 };
1167
1168 for (int i = 0; i < 2; ++i) {
1169 rb = intel_get_renderbuffer(fb, buffers[i]);
1170 if (rb == NULL || rb->mt == NULL)
1171 continue;
1172 if (rb->mt->num_samples <= 1)
1173 intel_miptree_resolve_color(brw, rb->mt);
1174 else
1175 intel_renderbuffer_downsample(brw, rb);
1176 }
1177 }
1178
1179 static unsigned
1180 intel_bits_per_pixel(const struct intel_renderbuffer *rb)
1181 {
1182 return _mesa_get_format_bytes(intel_rb_format(rb)) * 8;
1183 }
1184
1185 static void
1186 intel_query_dri2_buffers(struct brw_context *brw,
1187 __DRIdrawable *drawable,
1188 __DRIbuffer **buffers,
1189 int *count);
1190
1191 static void
1192 intel_process_dri2_buffer(struct brw_context *brw,
1193 __DRIdrawable *drawable,
1194 __DRIbuffer *buffer,
1195 struct intel_renderbuffer *rb,
1196 const char *buffer_name);
1197
1198 static void
1199 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable);
1200
1201 static void
1202 intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1203 {
1204 struct gl_framebuffer *fb = drawable->driverPrivate;
1205 struct intel_renderbuffer *rb;
1206 __DRIbuffer *buffers = NULL;
1207 int i, count;
1208 const char *region_name;
1209
1210 /* Set this up front, so that in case our buffers get invalidated
1211 * while we're getting new buffers, we don't clobber the stamp and
1212 * thus ignore the invalidate. */
1213 drawable->lastStamp = drawable->dri2.stamp;
1214
1215 if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1216 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1217
1218 intel_query_dri2_buffers(brw, drawable, &buffers, &count);
1219
1220 if (buffers == NULL)
1221 return;
1222
1223 for (i = 0; i < count; i++) {
1224 switch (buffers[i].attachment) {
1225 case __DRI_BUFFER_FRONT_LEFT:
1226 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1227 region_name = "dri2 front buffer";
1228 break;
1229
1230 case __DRI_BUFFER_FAKE_FRONT_LEFT:
1231 rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1232 region_name = "dri2 fake front buffer";
1233 break;
1234
1235 case __DRI_BUFFER_BACK_LEFT:
1236 rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1237 region_name = "dri2 back buffer";
1238 break;
1239
1240 case __DRI_BUFFER_DEPTH:
1241 case __DRI_BUFFER_HIZ:
1242 case __DRI_BUFFER_DEPTH_STENCIL:
1243 case __DRI_BUFFER_STENCIL:
1244 case __DRI_BUFFER_ACCUM:
1245 default:
1246 fprintf(stderr,
1247 "unhandled buffer attach event, attachment type %d\n",
1248 buffers[i].attachment);
1249 return;
1250 }
1251
1252 intel_process_dri2_buffer(brw, drawable, &buffers[i], rb, region_name);
1253 }
1254
1255 }
1256
1257 void
1258 intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
1259 {
1260 struct brw_context *brw = context->driverPrivate;
1261 __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1262
1263 /* Set this up front, so that in case our buffers get invalidated
1264 * while we're getting new buffers, we don't clobber the stamp and
1265 * thus ignore the invalidate. */
1266 drawable->lastStamp = drawable->dri2.stamp;
1267
1268 if (unlikely(INTEL_DEBUG & DEBUG_DRI))
1269 fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
1270
1271 if (screen->image.loader)
1272 intel_update_image_buffers(brw, drawable);
1273 else
1274 intel_update_dri2_buffers(brw, drawable);
1275
1276 driUpdateFramebufferSize(&brw->ctx, drawable);
1277 }
1278
1279 /**
1280 * intel_prepare_render should be called anywhere that curent read/drawbuffer
1281 * state is required.
1282 */
1283 void
1284 intel_prepare_render(struct brw_context *brw)
1285 {
1286 struct gl_context *ctx = &brw->ctx;
1287 __DRIcontext *driContext = brw->driContext;
1288 __DRIdrawable *drawable;
1289
1290 drawable = driContext->driDrawablePriv;
1291 if (drawable && drawable->dri2.stamp != driContext->dri2.draw_stamp) {
1292 if (drawable->lastStamp != drawable->dri2.stamp)
1293 intel_update_renderbuffers(driContext, drawable);
1294 driContext->dri2.draw_stamp = drawable->dri2.stamp;
1295 }
1296
1297 drawable = driContext->driReadablePriv;
1298 if (drawable && drawable->dri2.stamp != driContext->dri2.read_stamp) {
1299 if (drawable->lastStamp != drawable->dri2.stamp)
1300 intel_update_renderbuffers(driContext, drawable);
1301 driContext->dri2.read_stamp = drawable->dri2.stamp;
1302 }
1303
1304 /* If we're currently rendering to the front buffer, the rendering
1305 * that will happen next will probably dirty the front buffer. So
1306 * mark it as dirty here.
1307 */
1308 if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
1309 brw->front_buffer_dirty = true;
1310 }
1311
1312 /**
1313 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1314 *
1315 * To determine which DRI buffers to request, examine the renderbuffers
1316 * attached to the drawable's framebuffer. Then request the buffers with
1317 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1318 *
1319 * This is called from intel_update_renderbuffers().
1320 *
1321 * \param drawable Drawable whose buffers are queried.
1322 * \param buffers [out] List of buffers returned by DRI2 query.
1323 * \param buffer_count [out] Number of buffers returned.
1324 *
1325 * \see intel_update_renderbuffers()
1326 * \see DRI2GetBuffers()
1327 * \see DRI2GetBuffersWithFormat()
1328 */
1329 static void
1330 intel_query_dri2_buffers(struct brw_context *brw,
1331 __DRIdrawable *drawable,
1332 __DRIbuffer **buffers,
1333 int *buffer_count)
1334 {
1335 __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1336 struct gl_framebuffer *fb = drawable->driverPrivate;
1337 int i = 0;
1338 unsigned attachments[8];
1339
1340 struct intel_renderbuffer *front_rb;
1341 struct intel_renderbuffer *back_rb;
1342
1343 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1344 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1345
1346 memset(attachments, 0, sizeof(attachments));
1347 if ((_mesa_is_front_buffer_drawing(fb) ||
1348 _mesa_is_front_buffer_reading(fb) ||
1349 !back_rb) && front_rb) {
1350 /* If a fake front buffer is in use, then querying for
1351 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1352 * the real front buffer to the fake front buffer. So before doing the
1353 * query, we need to make sure all the pending drawing has landed in the
1354 * real front buffer.
1355 */
1356 intel_batchbuffer_flush(brw);
1357 intel_flush_front(&brw->ctx);
1358
1359 attachments[i++] = __DRI_BUFFER_FRONT_LEFT;
1360 attachments[i++] = intel_bits_per_pixel(front_rb);
1361 } else if (front_rb && brw->front_buffer_dirty) {
1362 /* We have pending front buffer rendering, but we aren't querying for a
1363 * front buffer. If the front buffer we have is a fake front buffer,
1364 * the X server is going to throw it away when it processes the query.
1365 * So before doing the query, make sure all the pending drawing has
1366 * landed in the real front buffer.
1367 */
1368 intel_batchbuffer_flush(brw);
1369 intel_flush_front(&brw->ctx);
1370 }
1371
1372 if (back_rb) {
1373 attachments[i++] = __DRI_BUFFER_BACK_LEFT;
1374 attachments[i++] = intel_bits_per_pixel(back_rb);
1375 }
1376
1377 assert(i <= ARRAY_SIZE(attachments));
1378
1379 *buffers = screen->dri2.loader->getBuffersWithFormat(drawable,
1380 &drawable->w,
1381 &drawable->h,
1382 attachments, i / 2,
1383 buffer_count,
1384 drawable->loaderPrivate);
1385 }
1386
1387 /**
1388 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1389 *
1390 * This is called from intel_update_renderbuffers().
1391 *
1392 * \par Note:
1393 * DRI buffers whose attachment point is DRI2BufferStencil or
1394 * DRI2BufferDepthStencil are handled as special cases.
1395 *
1396 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1397 * that is passed to drm_intel_bo_gem_create_from_name().
1398 *
1399 * \see intel_update_renderbuffers()
1400 */
1401 static void
1402 intel_process_dri2_buffer(struct brw_context *brw,
1403 __DRIdrawable *drawable,
1404 __DRIbuffer *buffer,
1405 struct intel_renderbuffer *rb,
1406 const char *buffer_name)
1407 {
1408 struct gl_framebuffer *fb = drawable->driverPrivate;
1409 drm_intel_bo *bo;
1410
1411 if (!rb)
1412 return;
1413
1414 unsigned num_samples = rb->Base.Base.NumSamples;
1415
1416 /* We try to avoid closing and reopening the same BO name, because the first
1417 * use of a mapping of the buffer involves a bunch of page faulting which is
1418 * moderately expensive.
1419 */
1420 struct intel_mipmap_tree *last_mt;
1421 if (num_samples == 0)
1422 last_mt = rb->mt;
1423 else
1424 last_mt = rb->singlesample_mt;
1425
1426 uint32_t old_name = 0;
1427 if (last_mt) {
1428 /* The bo already has a name because the miptree was created by a
1429 * previous call to intel_process_dri2_buffer(). If a bo already has a
1430 * name, then drm_intel_bo_flink() is a low-cost getter. It does not
1431 * create a new name.
1432 */
1433 drm_intel_bo_flink(last_mt->bo, &old_name);
1434 }
1435
1436 if (old_name == buffer->name)
1437 return;
1438
1439 if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
1440 fprintf(stderr,
1441 "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1442 buffer->name, buffer->attachment,
1443 buffer->cpp, buffer->pitch);
1444 }
1445
1446 bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
1447 buffer->name);
1448 if (!bo) {
1449 fprintf(stderr,
1450 "Failed to open BO for returned DRI2 buffer "
1451 "(%dx%d, %s, named %d).\n"
1452 "This is likely a bug in the X Server that will lead to a "
1453 "crash soon.\n",
1454 drawable->w, drawable->h, buffer_name, buffer->name);
1455 return;
1456 }
1457
1458 intel_update_winsys_renderbuffer_miptree(brw, rb, bo,
1459 drawable->w, drawable->h,
1460 buffer->pitch);
1461
1462 if (_mesa_is_front_buffer_drawing(fb) &&
1463 (buffer->attachment == __DRI_BUFFER_FRONT_LEFT ||
1464 buffer->attachment == __DRI_BUFFER_FAKE_FRONT_LEFT) &&
1465 rb->Base.Base.NumSamples > 1) {
1466 intel_renderbuffer_upsample(brw, rb);
1467 }
1468
1469 assert(rb->mt);
1470
1471 drm_intel_bo_unreference(bo);
1472 }
1473
1474 /**
1475 * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1476 *
1477 * To determine which DRI buffers to request, examine the renderbuffers
1478 * attached to the drawable's framebuffer. Then request the buffers from
1479 * the image loader
1480 *
1481 * This is called from intel_update_renderbuffers().
1482 *
1483 * \param drawable Drawable whose buffers are queried.
1484 * \param buffers [out] List of buffers returned by DRI2 query.
1485 * \param buffer_count [out] Number of buffers returned.
1486 *
1487 * \see intel_update_renderbuffers()
1488 */
1489
1490 static void
1491 intel_update_image_buffer(struct brw_context *intel,
1492 __DRIdrawable *drawable,
1493 struct intel_renderbuffer *rb,
1494 __DRIimage *buffer,
1495 enum __DRIimageBufferMask buffer_type)
1496 {
1497 struct gl_framebuffer *fb = drawable->driverPrivate;
1498
1499 if (!rb || !buffer->bo)
1500 return;
1501
1502 unsigned num_samples = rb->Base.Base.NumSamples;
1503
1504 /* Check and see if we're already bound to the right
1505 * buffer object
1506 */
1507 struct intel_mipmap_tree *last_mt;
1508 if (num_samples == 0)
1509 last_mt = rb->mt;
1510 else
1511 last_mt = rb->singlesample_mt;
1512
1513 if (last_mt && last_mt->bo == buffer->bo)
1514 return;
1515
1516 intel_update_winsys_renderbuffer_miptree(intel, rb, buffer->bo,
1517 buffer->width, buffer->height,
1518 buffer->pitch);
1519
1520 if (_mesa_is_front_buffer_drawing(fb) &&
1521 buffer_type == __DRI_IMAGE_BUFFER_FRONT &&
1522 rb->Base.Base.NumSamples > 1) {
1523 intel_renderbuffer_upsample(intel, rb);
1524 }
1525 }
1526
1527 static void
1528 intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
1529 {
1530 struct gl_framebuffer *fb = drawable->driverPrivate;
1531 __DRIscreen *screen = brw->intelScreen->driScrnPriv;
1532 struct intel_renderbuffer *front_rb;
1533 struct intel_renderbuffer *back_rb;
1534 struct __DRIimageList images;
1535 unsigned int format;
1536 uint32_t buffer_mask = 0;
1537
1538 front_rb = intel_get_renderbuffer(fb, BUFFER_FRONT_LEFT);
1539 back_rb = intel_get_renderbuffer(fb, BUFFER_BACK_LEFT);
1540
1541 if (back_rb)
1542 format = intel_rb_format(back_rb);
1543 else if (front_rb)
1544 format = intel_rb_format(front_rb);
1545 else
1546 return;
1547
1548 if (front_rb && (_mesa_is_front_buffer_drawing(fb) ||
1549 _mesa_is_front_buffer_reading(fb) || !back_rb)) {
1550 buffer_mask |= __DRI_IMAGE_BUFFER_FRONT;
1551 }
1552
1553 if (back_rb)
1554 buffer_mask |= __DRI_IMAGE_BUFFER_BACK;
1555
1556 (*screen->image.loader->getBuffers) (drawable,
1557 driGLFormatToImageFormat(format),
1558 &drawable->dri2.stamp,
1559 drawable->loaderPrivate,
1560 buffer_mask,
1561 &images);
1562
1563 if (images.image_mask & __DRI_IMAGE_BUFFER_FRONT) {
1564 drawable->w = images.front->width;
1565 drawable->h = images.front->height;
1566 intel_update_image_buffer(brw,
1567 drawable,
1568 front_rb,
1569 images.front,
1570 __DRI_IMAGE_BUFFER_FRONT);
1571 }
1572 if (images.image_mask & __DRI_IMAGE_BUFFER_BACK) {
1573 drawable->w = images.back->width;
1574 drawable->h = images.back->height;
1575 intel_update_image_buffer(brw,
1576 drawable,
1577 back_rb,
1578 images.back,
1579 __DRI_IMAGE_BUFFER_BACK);
1580 }
1581 }