2 Copyright 2003 VMware, Inc.
3 Copyright (C) Intel Corp. 2006. All Rights Reserved.
4 Intel funded Tungsten Graphics to
5 develop this 3D driver.
7 Permission is hereby granted, free of charge, to any person obtaining
8 a copy of this software and associated documentation files (the
9 "Software"), to deal in the Software without restriction, including
10 without limitation the rights to use, copy, modify, merge, publish,
11 distribute, sublicense, and/or sell copies of the Software, and to
12 permit persons to whom the Software is furnished to do so, subject to
13 the following conditions:
15 The above copyright notice and this permission notice (including the
16 next paragraph) shall be included in all copies or substantial
17 portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 **********************************************************************/
30 * Keith Whitwell <keithw@vmware.com>
34 #include "compiler/nir/nir.h"
35 #include "main/api_exec.h"
36 #include "main/context.h"
37 #include "main/fbobject.h"
38 #include "main/extensions.h"
39 #include "main/imports.h"
40 #include "main/macros.h"
41 #include "main/points.h"
42 #include "main/version.h"
43 #include "main/vtxfmt.h"
44 #include "main/texobj.h"
45 #include "main/framebuffer.h"
47 #include "vbo/vbo_context.h"
49 #include "drivers/common/driverfuncs.h"
50 #include "drivers/common/meta.h"
53 #include "brw_context.h"
54 #include "brw_defines.h"
55 #include "brw_blorp.h"
57 #include "brw_state.h"
59 #include "intel_batchbuffer.h"
60 #include "intel_buffer_objects.h"
61 #include "intel_buffers.h"
62 #include "intel_fbo.h"
63 #include "intel_mipmap_tree.h"
64 #include "intel_pixel.h"
65 #include "intel_image.h"
66 #include "intel_tex.h"
67 #include "intel_tex_obj.h"
69 #include "swrast_setup/swrast_setup.h"
71 #include "tnl/t_pipeline.h"
72 #include "util/ralloc.h"
73 #include "util/debug.h"
76 /***************************************
77 * Mesa's Driver Functions
78 ***************************************/
80 const char *const brw_vendor_string
= "Intel Open Source Technology Center";
83 get_bsw_model(const struct intel_screen
*screen
)
85 switch (screen
->eu_total
) {
96 brw_get_renderer_string(const struct intel_screen
*screen
)
99 static char buffer
[128];
102 switch (screen
->deviceID
) {
104 #define CHIPSET(id, symbol, str) case id: chipset = str; break;
105 #include "pci_ids/i965_pci_ids.h"
107 chipset
= "Unknown Intel Chipset";
111 /* Braswell branding is funny, so we have to fix it up here */
112 if (screen
->deviceID
== 0x22B1) {
113 bsw
= strdup(chipset
);
114 char *needle
= strstr(bsw
, "XXX");
116 memcpy(needle
, get_bsw_model(screen
), 3);
121 (void) driGetRendererString(buffer
, chipset
, 0);
126 static const GLubyte
*
127 intel_get_string(struct gl_context
* ctx
, GLenum name
)
129 const struct brw_context
*const brw
= brw_context(ctx
);
133 return (GLubyte
*) brw_vendor_string
;
137 (GLubyte
*) brw_get_renderer_string(brw
->screen
);
145 intel_viewport(struct gl_context
*ctx
)
147 struct brw_context
*brw
= brw_context(ctx
);
148 __DRIcontext
*driContext
= brw
->driContext
;
150 if (_mesa_is_winsys_fbo(ctx
->DrawBuffer
)) {
151 if (driContext
->driDrawablePriv
)
152 dri2InvalidateDrawable(driContext
->driDrawablePriv
);
153 if (driContext
->driReadablePriv
)
154 dri2InvalidateDrawable(driContext
->driReadablePriv
);
159 intel_update_framebuffer(struct gl_context
*ctx
,
160 struct gl_framebuffer
*fb
)
162 struct brw_context
*brw
= brw_context(ctx
);
164 /* Quantize the derived default number of samples
166 fb
->DefaultGeometry
._NumSamples
=
167 intel_quantize_num_samples(brw
->screen
,
168 fb
->DefaultGeometry
.NumSamples
);
172 intel_disable_rb_aux_buffer(struct brw_context
*brw
, const struct brw_bo
*bo
)
174 const struct gl_framebuffer
*fb
= brw
->ctx
.DrawBuffer
;
177 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
178 const struct intel_renderbuffer
*irb
=
179 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
181 if (irb
&& irb
->mt
->bo
== bo
) {
182 found
= brw
->draw_aux_buffer_disabled
[i
] = true;
190 intel_update_state(struct gl_context
* ctx
, GLuint new_state
)
192 struct brw_context
*brw
= brw_context(ctx
);
193 struct intel_texture_object
*tex_obj
;
194 struct intel_renderbuffer
*depth_irb
;
196 if (ctx
->swrast_context
)
197 _swrast_InvalidateState(ctx
, new_state
);
198 _vbo_InvalidateState(ctx
, new_state
);
200 brw
->NewGLState
|= new_state
;
202 _mesa_unlock_context_textures(ctx
);
204 /* Resolve the depth buffer's HiZ buffer. */
205 depth_irb
= intel_get_renderbuffer(ctx
->DrawBuffer
, BUFFER_DEPTH
);
206 if (depth_irb
&& depth_irb
->mt
) {
207 intel_miptree_slice_resolve_hiz(brw
, depth_irb
->mt
,
209 depth_irb
->mt_layer
);
212 memset(brw
->draw_aux_buffer_disabled
, 0,
213 sizeof(brw
->draw_aux_buffer_disabled
));
215 /* Resolve depth buffer and render cache of each enabled texture. */
216 int maxEnabledUnit
= ctx
->Texture
._MaxEnabledTexImageUnit
;
217 for (int i
= 0; i
<= maxEnabledUnit
; i
++) {
218 if (!ctx
->Texture
.Unit
[i
]._Current
)
220 tex_obj
= intel_texture_object(ctx
->Texture
.Unit
[i
]._Current
);
221 if (!tex_obj
|| !tex_obj
->mt
)
224 /* We need inte_texture_object::_Format to be valid */
225 intel_finalize_mipmap_tree(brw
, i
);
228 intel_miptree_prepare_texture(brw
, tex_obj
->mt
, tex_obj
->_Format
,
231 if (!aux_supported
&& brw
->gen
>= 9 &&
232 intel_disable_rb_aux_buffer(brw
, tex_obj
->mt
->bo
)) {
233 perf_debug("Sampling renderbuffer with non-compressible format - "
234 "turning off compression");
237 brw_render_cache_set_check_flush(brw
, tex_obj
->mt
->bo
);
239 if (tex_obj
->base
.StencilSampling
||
240 tex_obj
->mt
->format
== MESA_FORMAT_S_UINT8
) {
241 intel_update_r8stencil(brw
, tex_obj
->mt
);
245 /* Resolve color for each active shader image. */
246 for (unsigned i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
247 const struct gl_program
*prog
= ctx
->_Shader
->CurrentProgram
[i
];
249 if (unlikely(prog
&& prog
->info
.num_images
)) {
250 for (unsigned j
= 0; j
< prog
->info
.num_images
; j
++) {
251 struct gl_image_unit
*u
=
252 &ctx
->ImageUnits
[prog
->sh
.ImageUnits
[j
]];
253 tex_obj
= intel_texture_object(u
->TexObj
);
255 if (tex_obj
&& tex_obj
->mt
) {
256 intel_miptree_prepare_image(brw
, tex_obj
->mt
);
258 if (intel_miptree_is_lossless_compressed(brw
, tex_obj
->mt
) &&
259 intel_disable_rb_aux_buffer(brw
, tex_obj
->mt
->bo
)) {
260 perf_debug("Using renderbuffer as shader image - turning "
261 "off lossless compression");
264 brw_render_cache_set_check_flush(brw
, tex_obj
->mt
->bo
);
270 /* Resolve color buffers for non-coherent framebuffer fetch. */
271 if (!ctx
->Extensions
.MESA_shader_framebuffer_fetch
&&
272 ctx
->FragmentProgram
._Current
&&
273 ctx
->FragmentProgram
._Current
->info
.outputs_read
) {
274 const struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
276 for (unsigned i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
277 const struct intel_renderbuffer
*irb
=
278 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
281 intel_miptree_prepare_fb_fetch(brw
, irb
->mt
, irb
->mt_level
,
282 irb
->mt_layer
, irb
->layer_count
);
287 struct gl_framebuffer
*fb
= ctx
->DrawBuffer
;
288 for (int i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
289 struct intel_renderbuffer
*irb
=
290 intel_renderbuffer(fb
->_ColorDrawBuffers
[i
]);
292 if (irb
== NULL
|| irb
->mt
== NULL
)
295 intel_miptree_prepare_render(brw
, irb
->mt
, irb
->mt_level
,
296 irb
->mt_layer
, irb
->layer_count
,
297 ctx
->Color
.sRGBEnabled
);
300 _mesa_lock_context_textures(ctx
);
302 if (new_state
& _NEW_BUFFERS
) {
303 intel_update_framebuffer(ctx
, ctx
->DrawBuffer
);
304 if (ctx
->DrawBuffer
!= ctx
->ReadBuffer
)
305 intel_update_framebuffer(ctx
, ctx
->ReadBuffer
);
309 #define flushFront(screen) ((screen)->image.loader ? (screen)->image.loader->flushFrontBuffer : (screen)->dri2.loader->flushFrontBuffer)
312 intel_flush_front(struct gl_context
*ctx
)
314 struct brw_context
*brw
= brw_context(ctx
);
315 __DRIcontext
*driContext
= brw
->driContext
;
316 __DRIdrawable
*driDrawable
= driContext
->driDrawablePriv
;
317 __DRIscreen
*const dri_screen
= brw
->screen
->driScrnPriv
;
319 if (brw
->front_buffer_dirty
&& _mesa_is_winsys_fbo(ctx
->DrawBuffer
)) {
320 if (flushFront(dri_screen
) && driDrawable
&&
321 driDrawable
->loaderPrivate
) {
323 /* Resolve before flushing FAKE_FRONT_LEFT to FRONT_LEFT.
325 * This potentially resolves both front and back buffer. It
326 * is unnecessary to resolve the back, but harms nothing except
327 * performance. And no one cares about front-buffer render
330 intel_resolve_for_dri2_flush(brw
, driDrawable
);
331 intel_batchbuffer_flush(brw
);
333 flushFront(dri_screen
)(driDrawable
, driDrawable
->loaderPrivate
);
335 /* We set the dirty bit in intel_prepare_render() if we're
336 * front buffer rendering once we get there.
338 brw
->front_buffer_dirty
= false;
344 intel_glFlush(struct gl_context
*ctx
)
346 struct brw_context
*brw
= brw_context(ctx
);
348 intel_batchbuffer_flush(brw
);
349 intel_flush_front(ctx
);
351 brw
->need_flush_throttle
= true;
355 intel_finish(struct gl_context
* ctx
)
357 struct brw_context
*brw
= brw_context(ctx
);
361 if (brw
->batch
.last_bo
)
362 brw_bo_wait_rendering(brw
, brw
->batch
.last_bo
);
366 brw_init_driver_functions(struct brw_context
*brw
,
367 struct dd_function_table
*functions
)
369 _mesa_init_driver_functions(functions
);
371 /* GLX uses DRI2 invalidate events to handle window resizing.
372 * Unfortunately, EGL does not - libEGL is written in XCB (not Xlib),
373 * which doesn't provide a mechanism for snooping the event queues.
375 * So EGL still relies on viewport hacks to handle window resizing.
376 * This should go away with DRI3000.
378 if (!brw
->driContext
->driScreenPriv
->dri2
.useInvalidate
)
379 functions
->Viewport
= intel_viewport
;
381 functions
->Flush
= intel_glFlush
;
382 functions
->Finish
= intel_finish
;
383 functions
->GetString
= intel_get_string
;
384 functions
->UpdateState
= intel_update_state
;
386 intelInitTextureFuncs(functions
);
387 intelInitTextureImageFuncs(functions
);
388 intelInitTextureSubImageFuncs(functions
);
389 intelInitTextureCopyImageFuncs(functions
);
390 intelInitCopyImageFuncs(functions
);
391 intelInitClearFuncs(functions
);
392 intelInitBufferFuncs(functions
);
393 intelInitPixelFuncs(functions
);
394 intelInitBufferObjectFuncs(functions
);
395 brw_init_syncobj_functions(functions
);
396 brw_init_object_purgeable_functions(functions
);
398 brwInitFragProgFuncs( functions
);
399 brw_init_common_queryobj_functions(functions
);
400 if (brw
->gen
>= 8 || brw
->is_haswell
)
401 hsw_init_queryobj_functions(functions
);
402 else if (brw
->gen
>= 6)
403 gen6_init_queryobj_functions(functions
);
405 gen4_init_queryobj_functions(functions
);
406 brw_init_compute_functions(functions
);
408 brw_init_conditional_render_functions(functions
);
410 functions
->QueryInternalFormat
= brw_query_internal_format
;
412 functions
->NewTransformFeedback
= brw_new_transform_feedback
;
413 functions
->DeleteTransformFeedback
= brw_delete_transform_feedback
;
414 if (can_do_mi_math_and_lrr(brw
->screen
)) {
415 functions
->BeginTransformFeedback
= hsw_begin_transform_feedback
;
416 functions
->EndTransformFeedback
= hsw_end_transform_feedback
;
417 functions
->PauseTransformFeedback
= hsw_pause_transform_feedback
;
418 functions
->ResumeTransformFeedback
= hsw_resume_transform_feedback
;
419 } else if (brw
->gen
>= 7) {
420 functions
->BeginTransformFeedback
= gen7_begin_transform_feedback
;
421 functions
->EndTransformFeedback
= gen7_end_transform_feedback
;
422 functions
->PauseTransformFeedback
= gen7_pause_transform_feedback
;
423 functions
->ResumeTransformFeedback
= gen7_resume_transform_feedback
;
424 functions
->GetTransformFeedbackVertexCount
=
425 brw_get_transform_feedback_vertex_count
;
427 functions
->BeginTransformFeedback
= brw_begin_transform_feedback
;
428 functions
->EndTransformFeedback
= brw_end_transform_feedback
;
429 functions
->PauseTransformFeedback
= brw_pause_transform_feedback
;
430 functions
->ResumeTransformFeedback
= brw_resume_transform_feedback
;
431 functions
->GetTransformFeedbackVertexCount
=
432 brw_get_transform_feedback_vertex_count
;
436 functions
->GetSamplePosition
= gen6_get_sample_position
;
440 brw_initialize_context_constants(struct brw_context
*brw
)
442 struct gl_context
*ctx
= &brw
->ctx
;
443 const struct brw_compiler
*compiler
= brw
->screen
->compiler
;
445 const bool stage_exists
[MESA_SHADER_STAGES
] = {
446 [MESA_SHADER_VERTEX
] = true,
447 [MESA_SHADER_TESS_CTRL
] = brw
->gen
>= 7,
448 [MESA_SHADER_TESS_EVAL
] = brw
->gen
>= 7,
449 [MESA_SHADER_GEOMETRY
] = brw
->gen
>= 6,
450 [MESA_SHADER_FRAGMENT
] = true,
451 [MESA_SHADER_COMPUTE
] =
452 ((ctx
->API
== API_OPENGL_COMPAT
|| ctx
->API
== API_OPENGL_CORE
) &&
453 ctx
->Const
.MaxComputeWorkGroupSize
[0] >= 1024) ||
454 (ctx
->API
== API_OPENGLES2
&&
455 ctx
->Const
.MaxComputeWorkGroupSize
[0] >= 128) ||
456 _mesa_extension_override_enables
.ARB_compute_shader
,
459 unsigned num_stages
= 0;
460 for (int i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
465 unsigned max_samplers
=
466 brw
->gen
>= 8 || brw
->is_haswell
? BRW_MAX_TEX_UNIT
: 16;
468 ctx
->Const
.MaxDualSourceDrawBuffers
= 1;
469 ctx
->Const
.MaxDrawBuffers
= BRW_MAX_DRAW_BUFFERS
;
470 ctx
->Const
.MaxCombinedShaderOutputResources
=
471 MAX_IMAGE_UNITS
+ BRW_MAX_DRAW_BUFFERS
;
473 /* The timestamp register we can read for glGetTimestamp() is
474 * sometimes only 32 bits, before scaling to nanoseconds (depending
477 * Once scaled to nanoseconds the timestamp would roll over at a
478 * non-power-of-two, so an application couldn't use
479 * GL_QUERY_COUNTER_BITS to handle rollover correctly. Instead, we
480 * report 36 bits and truncate at that (rolling over 5 times as
481 * often as the HW counter), and when the 32-bit counter rolls
482 * over, it happens to also be at a rollover in the reported value
483 * from near (1<<36) to 0.
485 * The low 32 bits rolls over in ~343 seconds. Our 36-bit result
486 * rolls over every ~69 seconds.
488 ctx
->Const
.QueryCounterBits
.Timestamp
= 36;
490 ctx
->Const
.MaxTextureCoordUnits
= 8; /* Mesa limit */
491 ctx
->Const
.MaxImageUnits
= MAX_IMAGE_UNITS
;
493 ctx
->Const
.MaxRenderbufferSize
= 16384;
494 ctx
->Const
.MaxTextureLevels
= MIN2(15 /* 16384 */, MAX_TEXTURE_LEVELS
);
495 ctx
->Const
.MaxCubeTextureLevels
= 15; /* 16384 */
497 ctx
->Const
.MaxRenderbufferSize
= 8192;
498 ctx
->Const
.MaxTextureLevels
= MIN2(14 /* 8192 */, MAX_TEXTURE_LEVELS
);
499 ctx
->Const
.MaxCubeTextureLevels
= 14; /* 8192 */
501 ctx
->Const
.Max3DTextureLevels
= 12; /* 2048 */
502 ctx
->Const
.MaxArrayTextureLayers
= brw
->gen
>= 7 ? 2048 : 512;
503 ctx
->Const
.MaxTextureMbytes
= 1536;
504 ctx
->Const
.MaxTextureRectSize
= 1 << 12;
505 ctx
->Const
.MaxTextureMaxAnisotropy
= 16.0;
506 ctx
->Const
.MaxTextureLodBias
= 15.0;
507 ctx
->Const
.StripTextureBorder
= true;
509 ctx
->Const
.MaxProgramTextureGatherComponents
= 4;
510 ctx
->Const
.MinProgramTextureGatherOffset
= -32;
511 ctx
->Const
.MaxProgramTextureGatherOffset
= 31;
512 } else if (brw
->gen
== 6) {
513 ctx
->Const
.MaxProgramTextureGatherComponents
= 1;
514 ctx
->Const
.MinProgramTextureGatherOffset
= -8;
515 ctx
->Const
.MaxProgramTextureGatherOffset
= 7;
518 ctx
->Const
.MaxUniformBlockSize
= 65536;
520 for (int i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
521 struct gl_program_constants
*prog
= &ctx
->Const
.Program
[i
];
523 if (!stage_exists
[i
])
526 prog
->MaxTextureImageUnits
= max_samplers
;
528 prog
->MaxUniformBlocks
= BRW_MAX_UBO
;
529 prog
->MaxCombinedUniformComponents
=
530 prog
->MaxUniformComponents
+
531 ctx
->Const
.MaxUniformBlockSize
/ 4 * prog
->MaxUniformBlocks
;
533 prog
->MaxAtomicCounters
= MAX_ATOMIC_COUNTERS
;
534 prog
->MaxAtomicBuffers
= BRW_MAX_ABO
;
535 prog
->MaxImageUniforms
= compiler
->scalar_stage
[i
] ? BRW_MAX_IMAGES
: 0;
536 prog
->MaxShaderStorageBlocks
= BRW_MAX_SSBO
;
539 ctx
->Const
.MaxTextureUnits
=
540 MIN2(ctx
->Const
.MaxTextureCoordUnits
,
541 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxTextureImageUnits
);
543 ctx
->Const
.MaxUniformBufferBindings
= num_stages
* BRW_MAX_UBO
;
544 ctx
->Const
.MaxCombinedUniformBlocks
= num_stages
* BRW_MAX_UBO
;
545 ctx
->Const
.MaxCombinedAtomicBuffers
= num_stages
* BRW_MAX_ABO
;
546 ctx
->Const
.MaxCombinedShaderStorageBlocks
= num_stages
* BRW_MAX_SSBO
;
547 ctx
->Const
.MaxShaderStorageBufferBindings
= num_stages
* BRW_MAX_SSBO
;
548 ctx
->Const
.MaxCombinedTextureImageUnits
= num_stages
* max_samplers
;
549 ctx
->Const
.MaxCombinedImageUniforms
= num_stages
* BRW_MAX_IMAGES
;
552 /* Hardware only supports a limited number of transform feedback buffers.
553 * So we need to override the Mesa default (which is based only on software
556 ctx
->Const
.MaxTransformFeedbackBuffers
= BRW_MAX_SOL_BUFFERS
;
558 /* On Gen6, in the worst case, we use up one binding table entry per
559 * transform feedback component (see comments above the definition of
560 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
561 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
562 * BRW_MAX_SOL_BINDINGS.
564 * In "separate components" mode, we need to divide this value by
565 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
566 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
568 ctx
->Const
.MaxTransformFeedbackInterleavedComponents
= BRW_MAX_SOL_BINDINGS
;
569 ctx
->Const
.MaxTransformFeedbackSeparateComponents
=
570 BRW_MAX_SOL_BINDINGS
/ BRW_MAX_SOL_BUFFERS
;
572 ctx
->Const
.AlwaysUseGetTransformFeedbackVertexCount
=
573 !can_do_mi_math_and_lrr(brw
->screen
);
576 const int *msaa_modes
= intel_supported_msaa_modes(brw
->screen
);
577 const int clamp_max_samples
=
578 driQueryOptioni(&brw
->optionCache
, "clamp_max_samples");
580 if (clamp_max_samples
< 0) {
581 max_samples
= msaa_modes
[0];
583 /* Select the largest supported MSAA mode that does not exceed
587 for (int i
= 0; msaa_modes
[i
] != 0; ++i
) {
588 if (msaa_modes
[i
] <= clamp_max_samples
) {
589 max_samples
= msaa_modes
[i
];
595 ctx
->Const
.MaxSamples
= max_samples
;
596 ctx
->Const
.MaxColorTextureSamples
= max_samples
;
597 ctx
->Const
.MaxDepthTextureSamples
= max_samples
;
598 ctx
->Const
.MaxIntegerSamples
= max_samples
;
599 ctx
->Const
.MaxImageSamples
= 0;
601 /* gen6_set_sample_maps() sets SampleMap{2,4,8}x variables which are used
602 * to map indices of rectangular grid to sample numbers within a pixel.
603 * These variables are used by GL_EXT_framebuffer_multisample_blit_scaled
604 * extension implementation. For more details see the comment above
605 * gen6_set_sample_maps() definition.
607 gen6_set_sample_maps(ctx
);
609 ctx
->Const
.MinLineWidth
= 1.0;
610 ctx
->Const
.MinLineWidthAA
= 1.0;
612 ctx
->Const
.MaxLineWidth
= 7.375;
613 ctx
->Const
.MaxLineWidthAA
= 7.375;
614 ctx
->Const
.LineWidthGranularity
= 0.125;
616 ctx
->Const
.MaxLineWidth
= 7.0;
617 ctx
->Const
.MaxLineWidthAA
= 7.0;
618 ctx
->Const
.LineWidthGranularity
= 0.5;
621 /* For non-antialiased lines, we have to round the line width to the
622 * nearest whole number. Make sure that we don't advertise a line
623 * width that, when rounded, will be beyond the actual hardware
626 assert(roundf(ctx
->Const
.MaxLineWidth
) <= ctx
->Const
.MaxLineWidth
);
628 ctx
->Const
.MinPointSize
= 1.0;
629 ctx
->Const
.MinPointSizeAA
= 1.0;
630 ctx
->Const
.MaxPointSize
= 255.0;
631 ctx
->Const
.MaxPointSizeAA
= 255.0;
632 ctx
->Const
.PointSizeGranularity
= 1.0;
634 if (brw
->gen
>= 5 || brw
->is_g4x
)
635 ctx
->Const
.MaxClipPlanes
= 8;
637 ctx
->Const
.GLSLTessLevelsAsInputs
= true;
638 ctx
->Const
.LowerTCSPatchVerticesIn
= brw
->gen
>= 8;
639 ctx
->Const
.LowerTESPatchVerticesIn
= true;
640 ctx
->Const
.PrimitiveRestartForPatches
= true;
642 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeInstructions
= 16 * 1024;
643 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxAluInstructions
= 0;
644 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxTexInstructions
= 0;
645 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxTexIndirections
= 0;
646 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeAluInstructions
= 0;
647 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeTexInstructions
= 0;
648 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeTexIndirections
= 0;
649 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeAttribs
= 16;
650 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeTemps
= 256;
651 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeAddressRegs
= 1;
652 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeParameters
= 1024;
653 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxEnvParams
=
654 MIN2(ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxNativeParameters
,
655 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxEnvParams
);
657 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeInstructions
= 1024;
658 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeAluInstructions
= 1024;
659 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeTexInstructions
= 1024;
660 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeTexIndirections
= 1024;
661 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeAttribs
= 12;
662 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeTemps
= 256;
663 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeAddressRegs
= 0;
664 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeParameters
= 1024;
665 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxEnvParams
=
666 MIN2(ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxNativeParameters
,
667 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxEnvParams
);
669 /* Fragment shaders use real, 32-bit twos-complement integers for all
672 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
.RangeMin
= 31;
673 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
.RangeMax
= 30;
674 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
.Precision
= 0;
675 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].HighInt
= ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
;
676 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MediumInt
= ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].LowInt
;
678 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
.RangeMin
= 31;
679 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
.RangeMax
= 30;
680 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
.Precision
= 0;
681 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].HighInt
= ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
;
682 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MediumInt
= ctx
->Const
.Program
[MESA_SHADER_VERTEX
].LowInt
;
684 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
685 * but we're not sure how it's actually done for vertex order,
686 * that affect provoking vertex decision. Always use last vertex
687 * convention for quad primitive which works as expected for now.
690 ctx
->Const
.QuadsFollowProvokingVertexConvention
= false;
692 ctx
->Const
.NativeIntegers
= true;
693 ctx
->Const
.VertexID_is_zero_based
= true;
695 /* Regarding the CMP instruction, the Ivybridge PRM says:
697 * "For each enabled channel 0b or 1b is assigned to the appropriate flag
698 * bit and 0/all zeros or all ones (e.g, byte 0xFF, word 0xFFFF, DWord
699 * 0xFFFFFFFF) is assigned to dst."
701 * but PRMs for earlier generations say
703 * "In dword format, one GRF may store up to 8 results. When the register
704 * is used later as a vector of Booleans, as only LSB at each channel
705 * contains meaning [sic] data, software should make sure all higher bits
706 * are masked out (e.g. by 'and-ing' an [sic] 0x01 constant)."
708 * We select the representation of a true boolean uniform to be ~0, and fix
709 * the results of Gen <= 5 CMP instruction's with -(result & 1).
711 ctx
->Const
.UniformBooleanTrue
= ~0;
713 /* From the gen4 PRM, volume 4 page 127:
715 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field specifies
716 * the base address of the first element of the surface, computed in
717 * software by adding the surface base address to the byte offset of
718 * the element in the buffer."
720 * However, unaligned accesses are slower, so enforce buffer alignment.
722 ctx
->Const
.UniformBufferOffsetAlignment
= 16;
724 /* ShaderStorageBufferOffsetAlignment should be a cacheline (64 bytes) so
725 * that we can safely have the CPU and GPU writing the same SSBO on
726 * non-cachecoherent systems (our Atom CPUs). With UBOs, the GPU never
727 * writes, so there's no problem. For an SSBO, the GPU and the CPU can
728 * be updating disjoint regions of the buffer simultaneously and that will
729 * break if the regions overlap the same cacheline.
731 ctx
->Const
.ShaderStorageBufferOffsetAlignment
= 64;
732 ctx
->Const
.TextureBufferOffsetAlignment
= 16;
733 ctx
->Const
.MaxTextureBufferSize
= 128 * 1024 * 1024;
736 ctx
->Const
.MaxVarying
= 32;
737 ctx
->Const
.Program
[MESA_SHADER_VERTEX
].MaxOutputComponents
= 128;
738 ctx
->Const
.Program
[MESA_SHADER_GEOMETRY
].MaxInputComponents
= 64;
739 ctx
->Const
.Program
[MESA_SHADER_GEOMETRY
].MaxOutputComponents
= 128;
740 ctx
->Const
.Program
[MESA_SHADER_FRAGMENT
].MaxInputComponents
= 128;
741 ctx
->Const
.Program
[MESA_SHADER_TESS_CTRL
].MaxInputComponents
= 128;
742 ctx
->Const
.Program
[MESA_SHADER_TESS_CTRL
].MaxOutputComponents
= 128;
743 ctx
->Const
.Program
[MESA_SHADER_TESS_EVAL
].MaxInputComponents
= 128;
744 ctx
->Const
.Program
[MESA_SHADER_TESS_EVAL
].MaxOutputComponents
= 128;
747 /* We want the GLSL compiler to emit code that uses condition codes */
748 for (int i
= 0; i
< MESA_SHADER_STAGES
; i
++) {
749 ctx
->Const
.ShaderCompilerOptions
[i
] =
750 brw
->screen
->compiler
->glsl_compiler_options
[i
];
754 ctx
->Const
.MaxViewportWidth
= 32768;
755 ctx
->Const
.MaxViewportHeight
= 32768;
758 /* ARB_viewport_array, OES_viewport_array */
760 ctx
->Const
.MaxViewports
= GEN6_NUM_VIEWPORTS
;
761 ctx
->Const
.ViewportSubpixelBits
= 0;
763 /* Cast to float before negating because MaxViewportWidth is unsigned.
765 ctx
->Const
.ViewportBounds
.Min
= -(float)ctx
->Const
.MaxViewportWidth
;
766 ctx
->Const
.ViewportBounds
.Max
= ctx
->Const
.MaxViewportWidth
;
769 /* ARB_gpu_shader5 */
771 ctx
->Const
.MaxVertexStreams
= MIN2(4, MAX_VERTEX_STREAMS
);
773 /* ARB_framebuffer_no_attachments */
774 ctx
->Const
.MaxFramebufferWidth
= 16384;
775 ctx
->Const
.MaxFramebufferHeight
= 16384;
776 ctx
->Const
.MaxFramebufferLayers
= ctx
->Const
.MaxArrayTextureLayers
;
777 ctx
->Const
.MaxFramebufferSamples
= max_samples
;
779 /* OES_primitive_bounding_box */
780 ctx
->Const
.NoPrimitiveBoundingBoxOutput
= true;
784 brw_initialize_cs_context_constants(struct brw_context
*brw
)
786 struct gl_context
*ctx
= &brw
->ctx
;
787 const struct intel_screen
*screen
= brw
->screen
;
788 struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
790 /* FINISHME: Do this for all platforms that the kernel supports */
791 if (brw
->is_cherryview
&&
792 screen
->subslice_total
> 0 && screen
->eu_total
> 0) {
793 /* Logical CS threads = EUs per subslice * 7 threads per EU */
794 uint32_t max_cs_threads
= screen
->eu_total
/ screen
->subslice_total
* 7;
796 /* Fuse configurations may give more threads than expected, never less. */
797 if (max_cs_threads
> devinfo
->max_cs_threads
)
798 devinfo
->max_cs_threads
= max_cs_threads
;
801 /* Maximum number of scalar compute shader invocations that can be run in
802 * parallel in the same subslice assuming SIMD32 dispatch.
804 * We don't advertise more than 64 threads, because we are limited to 64 by
805 * our usage of thread_width_max in the gpgpu walker command. This only
806 * currently impacts Haswell, which otherwise might be able to advertise 70
807 * threads. With SIMD32 and 64 threads, Haswell still provides twice the
808 * required the number of invocation needed for ARB_compute_shader.
810 const unsigned max_threads
= MIN2(64, devinfo
->max_cs_threads
);
811 const uint32_t max_invocations
= 32 * max_threads
;
812 ctx
->Const
.MaxComputeWorkGroupSize
[0] = max_invocations
;
813 ctx
->Const
.MaxComputeWorkGroupSize
[1] = max_invocations
;
814 ctx
->Const
.MaxComputeWorkGroupSize
[2] = max_invocations
;
815 ctx
->Const
.MaxComputeWorkGroupInvocations
= max_invocations
;
816 ctx
->Const
.MaxComputeSharedMemorySize
= 64 * 1024;
820 * Process driconf (drirc) options, setting appropriate context flags.
822 * intelInitExtensions still pokes at optionCache directly, in order to
823 * avoid advertising various extensions. No flags are set, so it makes
824 * sense to continue doing that there.
827 brw_process_driconf_options(struct brw_context
*brw
)
829 struct gl_context
*ctx
= &brw
->ctx
;
831 driOptionCache
*options
= &brw
->optionCache
;
832 driParseConfigFiles(options
, &brw
->screen
->optionCache
,
833 brw
->driContext
->driScreenPriv
->myNum
, "i965");
835 int bo_reuse_mode
= driQueryOptioni(options
, "bo_reuse");
836 switch (bo_reuse_mode
) {
837 case DRI_CONF_BO_REUSE_DISABLED
:
839 case DRI_CONF_BO_REUSE_ALL
:
840 brw_bufmgr_enable_reuse(brw
->bufmgr
);
844 if (INTEL_DEBUG
& DEBUG_NO_HIZ
) {
845 brw
->has_hiz
= false;
846 /* On gen6, you can only do separate stencil with HIZ. */
848 brw
->has_separate_stencil
= false;
851 if (driQueryOptionb(options
, "always_flush_batch")) {
852 fprintf(stderr
, "flushing batchbuffer before/after each draw call\n");
853 brw
->always_flush_batch
= true;
856 if (driQueryOptionb(options
, "always_flush_cache")) {
857 fprintf(stderr
, "flushing GPU caches before/after each draw call\n");
858 brw
->always_flush_cache
= true;
861 if (driQueryOptionb(options
, "disable_throttling")) {
862 fprintf(stderr
, "disabling flush throttling\n");
863 brw
->disable_throttling
= true;
866 brw
->precompile
= driQueryOptionb(&brw
->optionCache
, "shader_precompile");
868 if (driQueryOptionb(&brw
->optionCache
, "precise_trig"))
869 brw
->screen
->compiler
->precise_trig
= true;
871 ctx
->Const
.ForceGLSLExtensionsWarn
=
872 driQueryOptionb(options
, "force_glsl_extensions_warn");
874 ctx
->Const
.ForceGLSLVersion
=
875 driQueryOptioni(options
, "force_glsl_version");
877 ctx
->Const
.DisableGLSLLineContinuations
=
878 driQueryOptionb(options
, "disable_glsl_line_continuations");
880 ctx
->Const
.AllowGLSLExtensionDirectiveMidShader
=
881 driQueryOptionb(options
, "allow_glsl_extension_directive_midshader");
883 ctx
->Const
.AllowGLSLBuiltinVariableRedeclaration
=
884 driQueryOptionb(options
, "allow_glsl_builtin_variable_redeclaration");
886 ctx
->Const
.AllowHigherCompatVersion
=
887 driQueryOptionb(options
, "allow_higher_compat_version");
889 ctx
->Const
.ForceGLSLAbsSqrt
=
890 driQueryOptionb(options
, "force_glsl_abs_sqrt");
892 ctx
->Const
.GLSLZeroInit
= driQueryOptionb(options
, "glsl_zero_init");
894 brw
->dual_color_blend_by_location
=
895 driQueryOptionb(options
, "dual_color_blend_by_location");
899 brwCreateContext(gl_api api
,
900 const struct gl_config
*mesaVis
,
901 __DRIcontext
*driContextPriv
,
902 unsigned major_version
,
903 unsigned minor_version
,
906 unsigned *dri_ctx_error
,
907 void *sharedContextPrivate
)
909 struct gl_context
*shareCtx
= (struct gl_context
*) sharedContextPrivate
;
910 struct intel_screen
*screen
= driContextPriv
->driScreenPriv
->driverPrivate
;
911 const struct gen_device_info
*devinfo
= &screen
->devinfo
;
912 struct dd_function_table functions
;
914 /* Only allow the __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS flag if the kernel
915 * provides us with context reset notifications.
917 uint32_t allowed_flags
= __DRI_CTX_FLAG_DEBUG
918 | __DRI_CTX_FLAG_FORWARD_COMPATIBLE
;
920 if (screen
->has_context_reset_notification
)
921 allowed_flags
|= __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS
;
923 if (flags
& ~allowed_flags
) {
924 *dri_ctx_error
= __DRI_CTX_ERROR_UNKNOWN_FLAG
;
928 struct brw_context
*brw
= rzalloc(NULL
, struct brw_context
);
930 fprintf(stderr
, "%s: failed to alloc context\n", __func__
);
931 *dri_ctx_error
= __DRI_CTX_ERROR_NO_MEMORY
;
935 driContextPriv
->driverPrivate
= brw
;
936 brw
->driContext
= driContextPriv
;
937 brw
->screen
= screen
;
938 brw
->bufmgr
= screen
->bufmgr
;
940 brw
->gen
= devinfo
->gen
;
941 brw
->gt
= devinfo
->gt
;
942 brw
->is_g4x
= devinfo
->is_g4x
;
943 brw
->is_baytrail
= devinfo
->is_baytrail
;
944 brw
->is_haswell
= devinfo
->is_haswell
;
945 brw
->is_cherryview
= devinfo
->is_cherryview
;
946 brw
->is_broxton
= devinfo
->is_broxton
;
947 brw
->has_llc
= devinfo
->has_llc
;
948 brw
->has_hiz
= devinfo
->has_hiz_and_separate_stencil
;
949 brw
->has_separate_stencil
= devinfo
->has_hiz_and_separate_stencil
;
950 brw
->has_pln
= devinfo
->has_pln
;
951 brw
->has_compr4
= devinfo
->has_compr4
;
952 brw
->has_surface_tile_offset
= devinfo
->has_surface_tile_offset
;
953 brw
->has_negative_rhw_bug
= devinfo
->has_negative_rhw_bug
;
954 brw
->needs_unlit_centroid_workaround
=
955 devinfo
->needs_unlit_centroid_workaround
;
957 brw
->must_use_separate_stencil
= devinfo
->must_use_separate_stencil
;
958 brw
->has_swizzling
= screen
->hw_has_swizzling
;
960 isl_device_init(&brw
->isl_dev
, devinfo
, screen
->hw_has_swizzling
);
962 brw
->vs
.base
.stage
= MESA_SHADER_VERTEX
;
963 brw
->tcs
.base
.stage
= MESA_SHADER_TESS_CTRL
;
964 brw
->tes
.base
.stage
= MESA_SHADER_TESS_EVAL
;
965 brw
->gs
.base
.stage
= MESA_SHADER_GEOMETRY
;
966 brw
->wm
.base
.stage
= MESA_SHADER_FRAGMENT
;
968 gen8_init_vtable_surface_functions(brw
);
969 brw
->vtbl
.emit_depth_stencil_hiz
= gen8_emit_depth_stencil_hiz
;
970 } else if (brw
->gen
>= 7) {
971 gen7_init_vtable_surface_functions(brw
);
972 brw
->vtbl
.emit_depth_stencil_hiz
= gen7_emit_depth_stencil_hiz
;
973 } else if (brw
->gen
>= 6) {
974 gen6_init_vtable_surface_functions(brw
);
975 brw
->vtbl
.emit_depth_stencil_hiz
= gen6_emit_depth_stencil_hiz
;
977 gen4_init_vtable_surface_functions(brw
);
978 brw
->vtbl
.emit_depth_stencil_hiz
= brw_emit_depth_stencil_hiz
;
981 brw_init_driver_functions(brw
, &functions
);
984 functions
.GetGraphicsResetStatus
= brw_get_graphics_reset_status
;
986 struct gl_context
*ctx
= &brw
->ctx
;
988 if (!_mesa_initialize_context(ctx
, api
, mesaVis
, shareCtx
, &functions
)) {
989 *dri_ctx_error
= __DRI_CTX_ERROR_NO_MEMORY
;
990 fprintf(stderr
, "%s: failed to init mesa context\n", __func__
);
991 intelDestroyContext(driContextPriv
);
995 driContextSetFlags(ctx
, flags
);
997 /* Initialize the software rasterizer and helper modules.
999 * As of GL 3.1 core, the gen4+ driver doesn't need the swrast context for
1000 * software fallbacks (which we have to support on legacy GL to do weird
1001 * glDrawPixels(), glBitmap(), and other functions).
1003 if (api
!= API_OPENGL_CORE
&& api
!= API_OPENGLES2
) {
1004 _swrast_CreateContext(ctx
);
1007 _vbo_CreateContext(ctx
);
1008 if (ctx
->swrast_context
) {
1009 _tnl_CreateContext(ctx
);
1010 TNL_CONTEXT(ctx
)->Driver
.RunPipeline
= _tnl_run_pipeline
;
1011 _swsetup_CreateContext(ctx
);
1013 /* Configure swrast to match hardware characteristics: */
1014 _swrast_allow_pixel_fog(ctx
, false);
1015 _swrast_allow_vertex_fog(ctx
, true);
1018 _mesa_meta_init(ctx
);
1020 brw_process_driconf_options(brw
);
1022 if (INTEL_DEBUG
& DEBUG_PERF
)
1023 brw
->perf_debug
= true;
1025 brw_initialize_cs_context_constants(brw
);
1026 brw_initialize_context_constants(brw
);
1028 ctx
->Const
.ResetStrategy
= notify_reset
1029 ? GL_LOSE_CONTEXT_ON_RESET_ARB
: GL_NO_RESET_NOTIFICATION_ARB
;
1031 /* Reinitialize the context point state. It depends on ctx->Const values. */
1032 _mesa_init_point(ctx
);
1034 intel_fbo_init(brw
);
1036 intel_batchbuffer_init(&brw
->batch
, brw
->bufmgr
, brw
->has_llc
);
1038 if (brw
->gen
>= 6) {
1039 /* Create a new hardware context. Using a hardware context means that
1040 * our GPU state will be saved/restored on context switch, allowing us
1041 * to assume that the GPU is in the same state we left it in.
1043 * This is required for transform feedback buffer offsets, query objects,
1044 * and also allows us to reduce how much state we have to emit.
1046 brw
->hw_ctx
= brw_create_hw_context(brw
->bufmgr
);
1049 fprintf(stderr
, "Failed to create hardware context.\n");
1050 intelDestroyContext(driContextPriv
);
1055 if (brw_init_pipe_control(brw
, devinfo
)) {
1056 *dri_ctx_error
= __DRI_CTX_ERROR_NO_MEMORY
;
1057 intelDestroyContext(driContextPriv
);
1061 brw_init_state(brw
);
1063 intelInitExtensions(ctx
);
1065 brw_init_surface_formats(brw
);
1067 brw_blorp_init(brw
);
1069 brw
->urb
.size
= devinfo
->urb
.size
;
1072 brw
->urb
.gs_present
= false;
1074 brw
->prim_restart
.in_progress
= false;
1075 brw
->prim_restart
.enable_cut_index
= false;
1076 brw
->gs
.enabled
= false;
1077 brw
->clip
.viewport_count
= 1;
1079 brw
->predicate
.state
= BRW_PREDICATE_STATE_RENDER
;
1081 brw
->max_gtt_map_object_size
= screen
->max_gtt_map_object_size
;
1083 ctx
->VertexProgram
._MaintainTnlProgram
= true;
1084 ctx
->FragmentProgram
._MaintainTexEnvProgram
= true;
1086 brw_draw_init( brw
);
1088 if ((flags
& __DRI_CTX_FLAG_DEBUG
) != 0) {
1089 /* Turn on some extra GL_ARB_debug_output generation. */
1090 brw
->perf_debug
= true;
1093 if ((flags
& __DRI_CTX_FLAG_ROBUST_BUFFER_ACCESS
) != 0) {
1094 ctx
->Const
.ContextFlags
|= GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT_ARB
;
1095 ctx
->Const
.RobustAccess
= GL_TRUE
;
1098 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
)
1099 brw_init_shader_time(brw
);
1101 _mesa_compute_version(ctx
);
1103 _mesa_initialize_dispatch_tables(ctx
);
1104 _mesa_initialize_vbo_vtxfmt(ctx
);
1106 if (ctx
->Extensions
.INTEL_performance_query
)
1107 brw_init_performance_queries(brw
);
1109 vbo_use_buffer_objects(ctx
);
1110 vbo_always_unmap_buffers(ctx
);
1116 intelDestroyContext(__DRIcontext
* driContextPriv
)
1118 struct brw_context
*brw
=
1119 (struct brw_context
*) driContextPriv
->driverPrivate
;
1120 struct gl_context
*ctx
= &brw
->ctx
;
1122 _mesa_meta_free(&brw
->ctx
);
1124 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
) {
1125 /* Force a report. */
1126 brw
->shader_time
.report_time
= 0;
1128 brw_collect_and_report_shader_time(brw
);
1129 brw_destroy_shader_time(brw
);
1133 blorp_finish(&brw
->blorp
);
1135 brw_destroy_state(brw
);
1136 brw_draw_destroy(brw
);
1138 brw_bo_unreference(brw
->curbe
.curbe_bo
);
1139 if (brw
->vs
.base
.scratch_bo
)
1140 brw_bo_unreference(brw
->vs
.base
.scratch_bo
);
1141 if (brw
->tcs
.base
.scratch_bo
)
1142 brw_bo_unreference(brw
->tcs
.base
.scratch_bo
);
1143 if (brw
->tes
.base
.scratch_bo
)
1144 brw_bo_unreference(brw
->tes
.base
.scratch_bo
);
1145 if (brw
->gs
.base
.scratch_bo
)
1146 brw_bo_unreference(brw
->gs
.base
.scratch_bo
);
1147 if (brw
->wm
.base
.scratch_bo
)
1148 brw_bo_unreference(brw
->wm
.base
.scratch_bo
);
1150 brw_destroy_hw_context(brw
->bufmgr
, brw
->hw_ctx
);
1152 if (ctx
->swrast_context
) {
1153 _swsetup_DestroyContext(&brw
->ctx
);
1154 _tnl_DestroyContext(&brw
->ctx
);
1156 _vbo_DestroyContext(&brw
->ctx
);
1158 if (ctx
->swrast_context
)
1159 _swrast_DestroyContext(&brw
->ctx
);
1161 brw_fini_pipe_control(brw
);
1162 intel_batchbuffer_free(&brw
->batch
);
1164 brw_bo_unreference(brw
->throttle_batch
[1]);
1165 brw_bo_unreference(brw
->throttle_batch
[0]);
1166 brw
->throttle_batch
[1] = NULL
;
1167 brw
->throttle_batch
[0] = NULL
;
1169 driDestroyOptionCache(&brw
->optionCache
);
1171 /* free the Mesa context */
1172 _mesa_free_context_data(&brw
->ctx
);
1175 driContextPriv
->driverPrivate
= NULL
;
1179 intelUnbindContext(__DRIcontext
* driContextPriv
)
1181 /* Unset current context and dispath table */
1182 _mesa_make_current(NULL
, NULL
, NULL
);
1188 * Fixes up the context for GLES23 with our default-to-sRGB-capable behavior
1189 * on window system framebuffers.
1191 * Desktop GL is fairly reasonable in its handling of sRGB: You can ask if
1192 * your renderbuffer can do sRGB encode, and you can flip a switch that does
1193 * sRGB encode if the renderbuffer can handle it. You can ask specifically
1194 * for a visual where you're guaranteed to be capable, but it turns out that
1195 * everyone just makes all their ARGB8888 visuals capable and doesn't offer
1196 * incapable ones, because there's no difference between the two in resources
1197 * used. Applications thus get built that accidentally rely on the default
1198 * visual choice being sRGB, so we make ours sRGB capable. Everything sounds
1201 * But for GLES2/3, they decided that it was silly to not turn on sRGB encode
1202 * for sRGB renderbuffers you made with the GL_EXT_texture_sRGB equivalent.
1203 * So they removed the enable knob and made it "if the renderbuffer is sRGB
1204 * capable, do sRGB encode". Then, for your window system renderbuffers, you
1205 * can ask for sRGB visuals and get sRGB encode, or not ask for sRGB visuals
1206 * and get no sRGB encode (assuming that both kinds of visual are available).
1207 * Thus our choice to support sRGB by default on our visuals for desktop would
1208 * result in broken rendering of GLES apps that aren't expecting sRGB encode.
1210 * Unfortunately, renderbuffer setup happens before a context is created. So
1211 * in intel_screen.c we always set up sRGB, and here, if you're a GLES2/3
1212 * context (without an sRGB visual, though we don't have sRGB visuals exposed
1213 * yet), we go turn that back off before anyone finds out.
1216 intel_gles3_srgb_workaround(struct brw_context
*brw
,
1217 struct gl_framebuffer
*fb
)
1219 struct gl_context
*ctx
= &brw
->ctx
;
1221 if (_mesa_is_desktop_gl(ctx
) || !fb
->Visual
.sRGBCapable
)
1224 /* Some day when we support the sRGB capable bit on visuals available for
1225 * GLES, we'll need to respect that and not disable things here.
1227 fb
->Visual
.sRGBCapable
= false;
1228 for (int i
= 0; i
< BUFFER_COUNT
; i
++) {
1229 struct gl_renderbuffer
*rb
= fb
->Attachment
[i
].Renderbuffer
;
1231 rb
->Format
= _mesa_get_srgb_format_linear(rb
->Format
);
1236 intelMakeCurrent(__DRIcontext
* driContextPriv
,
1237 __DRIdrawable
* driDrawPriv
,
1238 __DRIdrawable
* driReadPriv
)
1240 struct brw_context
*brw
;
1241 GET_CURRENT_CONTEXT(curCtx
);
1244 brw
= (struct brw_context
*) driContextPriv
->driverPrivate
;
1248 /* According to the glXMakeCurrent() man page: "Pending commands to
1249 * the previous context, if any, are flushed before it is released."
1250 * But only flush if we're actually changing contexts.
1252 if (brw_context(curCtx
) && brw_context(curCtx
) != brw
) {
1253 _mesa_flush(curCtx
);
1256 if (driContextPriv
) {
1257 struct gl_context
*ctx
= &brw
->ctx
;
1258 struct gl_framebuffer
*fb
, *readFb
;
1260 if (driDrawPriv
== NULL
) {
1261 fb
= _mesa_get_incomplete_framebuffer();
1263 fb
= driDrawPriv
->driverPrivate
;
1264 driContextPriv
->dri2
.draw_stamp
= driDrawPriv
->dri2
.stamp
- 1;
1267 if (driReadPriv
== NULL
) {
1268 readFb
= _mesa_get_incomplete_framebuffer();
1270 readFb
= driReadPriv
->driverPrivate
;
1271 driContextPriv
->dri2
.read_stamp
= driReadPriv
->dri2
.stamp
- 1;
1274 /* The sRGB workaround changes the renderbuffer's format. We must change
1275 * the format before the renderbuffer's miptree get's allocated, otherwise
1276 * the formats of the renderbuffer and its miptree will differ.
1278 intel_gles3_srgb_workaround(brw
, fb
);
1279 intel_gles3_srgb_workaround(brw
, readFb
);
1281 /* If the context viewport hasn't been initialized, force a call out to
1282 * the loader to get buffers so we have a drawable size for the initial
1284 if (!brw
->ctx
.ViewportInitialized
)
1285 intel_prepare_render(brw
);
1287 _mesa_make_current(ctx
, fb
, readFb
);
1289 _mesa_make_current(NULL
, NULL
, NULL
);
1296 intel_resolve_for_dri2_flush(struct brw_context
*brw
,
1297 __DRIdrawable
*drawable
)
1300 /* MSAA and fast color clear are not supported, so don't waste time
1301 * checking whether a resolve is needed.
1306 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1307 struct intel_renderbuffer
*rb
;
1309 /* Usually, only the back buffer will need to be downsampled. However,
1310 * the front buffer will also need it if the user has rendered into it.
1312 static const gl_buffer_index buffers
[2] = {
1317 for (int i
= 0; i
< 2; ++i
) {
1318 rb
= intel_get_renderbuffer(fb
, buffers
[i
]);
1319 if (rb
== NULL
|| rb
->mt
== NULL
)
1321 if (rb
->mt
->num_samples
<= 1) {
1322 assert(rb
->mt_layer
== 0 && rb
->mt_level
== 0 &&
1323 rb
->layer_count
== 1);
1324 intel_miptree_prepare_access(brw
, rb
->mt
, 0, 1, 0, 1, false, false);
1326 intel_renderbuffer_downsample(brw
, rb
);
1332 intel_bits_per_pixel(const struct intel_renderbuffer
*rb
)
1334 return _mesa_get_format_bytes(intel_rb_format(rb
)) * 8;
1338 intel_query_dri2_buffers(struct brw_context
*brw
,
1339 __DRIdrawable
*drawable
,
1340 __DRIbuffer
**buffers
,
1344 intel_process_dri2_buffer(struct brw_context
*brw
,
1345 __DRIdrawable
*drawable
,
1346 __DRIbuffer
*buffer
,
1347 struct intel_renderbuffer
*rb
,
1348 const char *buffer_name
);
1351 intel_update_image_buffers(struct brw_context
*brw
, __DRIdrawable
*drawable
);
1354 intel_update_dri2_buffers(struct brw_context
*brw
, __DRIdrawable
*drawable
)
1356 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1357 struct intel_renderbuffer
*rb
;
1358 __DRIbuffer
*buffers
= NULL
;
1360 const char *region_name
;
1362 /* Set this up front, so that in case our buffers get invalidated
1363 * while we're getting new buffers, we don't clobber the stamp and
1364 * thus ignore the invalidate. */
1365 drawable
->lastStamp
= drawable
->dri2
.stamp
;
1367 if (unlikely(INTEL_DEBUG
& DEBUG_DRI
))
1368 fprintf(stderr
, "enter %s, drawable %p\n", __func__
, drawable
);
1370 intel_query_dri2_buffers(brw
, drawable
, &buffers
, &count
);
1372 if (buffers
== NULL
)
1375 for (int i
= 0; i
< count
; i
++) {
1376 switch (buffers
[i
].attachment
) {
1377 case __DRI_BUFFER_FRONT_LEFT
:
1378 rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1379 region_name
= "dri2 front buffer";
1382 case __DRI_BUFFER_FAKE_FRONT_LEFT
:
1383 rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1384 region_name
= "dri2 fake front buffer";
1387 case __DRI_BUFFER_BACK_LEFT
:
1388 rb
= intel_get_renderbuffer(fb
, BUFFER_BACK_LEFT
);
1389 region_name
= "dri2 back buffer";
1392 case __DRI_BUFFER_DEPTH
:
1393 case __DRI_BUFFER_HIZ
:
1394 case __DRI_BUFFER_DEPTH_STENCIL
:
1395 case __DRI_BUFFER_STENCIL
:
1396 case __DRI_BUFFER_ACCUM
:
1399 "unhandled buffer attach event, attachment type %d\n",
1400 buffers
[i
].attachment
);
1404 intel_process_dri2_buffer(brw
, drawable
, &buffers
[i
], rb
, region_name
);
1410 intel_update_renderbuffers(__DRIcontext
*context
, __DRIdrawable
*drawable
)
1412 struct brw_context
*brw
= context
->driverPrivate
;
1413 __DRIscreen
*dri_screen
= brw
->screen
->driScrnPriv
;
1415 /* Set this up front, so that in case our buffers get invalidated
1416 * while we're getting new buffers, we don't clobber the stamp and
1417 * thus ignore the invalidate. */
1418 drawable
->lastStamp
= drawable
->dri2
.stamp
;
1420 if (unlikely(INTEL_DEBUG
& DEBUG_DRI
))
1421 fprintf(stderr
, "enter %s, drawable %p\n", __func__
, drawable
);
1423 if (dri_screen
->image
.loader
)
1424 intel_update_image_buffers(brw
, drawable
);
1426 intel_update_dri2_buffers(brw
, drawable
);
1428 driUpdateFramebufferSize(&brw
->ctx
, drawable
);
1432 * intel_prepare_render should be called anywhere that curent read/drawbuffer
1433 * state is required.
1436 intel_prepare_render(struct brw_context
*brw
)
1438 struct gl_context
*ctx
= &brw
->ctx
;
1439 __DRIcontext
*driContext
= brw
->driContext
;
1440 __DRIdrawable
*drawable
;
1442 drawable
= driContext
->driDrawablePriv
;
1443 if (drawable
&& drawable
->dri2
.stamp
!= driContext
->dri2
.draw_stamp
) {
1444 if (drawable
->lastStamp
!= drawable
->dri2
.stamp
)
1445 intel_update_renderbuffers(driContext
, drawable
);
1446 driContext
->dri2
.draw_stamp
= drawable
->dri2
.stamp
;
1449 drawable
= driContext
->driReadablePriv
;
1450 if (drawable
&& drawable
->dri2
.stamp
!= driContext
->dri2
.read_stamp
) {
1451 if (drawable
->lastStamp
!= drawable
->dri2
.stamp
)
1452 intel_update_renderbuffers(driContext
, drawable
);
1453 driContext
->dri2
.read_stamp
= drawable
->dri2
.stamp
;
1456 /* If we're currently rendering to the front buffer, the rendering
1457 * that will happen next will probably dirty the front buffer. So
1458 * mark it as dirty here.
1460 if (_mesa_is_front_buffer_drawing(ctx
->DrawBuffer
))
1461 brw
->front_buffer_dirty
= true;
1465 * \brief Query DRI2 to obtain a DRIdrawable's buffers.
1467 * To determine which DRI buffers to request, examine the renderbuffers
1468 * attached to the drawable's framebuffer. Then request the buffers with
1469 * DRI2GetBuffers() or DRI2GetBuffersWithFormat().
1471 * This is called from intel_update_renderbuffers().
1473 * \param drawable Drawable whose buffers are queried.
1474 * \param buffers [out] List of buffers returned by DRI2 query.
1475 * \param buffer_count [out] Number of buffers returned.
1477 * \see intel_update_renderbuffers()
1478 * \see DRI2GetBuffers()
1479 * \see DRI2GetBuffersWithFormat()
1482 intel_query_dri2_buffers(struct brw_context
*brw
,
1483 __DRIdrawable
*drawable
,
1484 __DRIbuffer
**buffers
,
1487 __DRIscreen
*dri_screen
= brw
->screen
->driScrnPriv
;
1488 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1490 unsigned attachments
[8];
1492 struct intel_renderbuffer
*front_rb
;
1493 struct intel_renderbuffer
*back_rb
;
1495 front_rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1496 back_rb
= intel_get_renderbuffer(fb
, BUFFER_BACK_LEFT
);
1498 memset(attachments
, 0, sizeof(attachments
));
1499 if ((_mesa_is_front_buffer_drawing(fb
) ||
1500 _mesa_is_front_buffer_reading(fb
) ||
1501 !back_rb
) && front_rb
) {
1502 /* If a fake front buffer is in use, then querying for
1503 * __DRI_BUFFER_FRONT_LEFT will cause the server to copy the image from
1504 * the real front buffer to the fake front buffer. So before doing the
1505 * query, we need to make sure all the pending drawing has landed in the
1506 * real front buffer.
1508 intel_batchbuffer_flush(brw
);
1509 intel_flush_front(&brw
->ctx
);
1511 attachments
[i
++] = __DRI_BUFFER_FRONT_LEFT
;
1512 attachments
[i
++] = intel_bits_per_pixel(front_rb
);
1513 } else if (front_rb
&& brw
->front_buffer_dirty
) {
1514 /* We have pending front buffer rendering, but we aren't querying for a
1515 * front buffer. If the front buffer we have is a fake front buffer,
1516 * the X server is going to throw it away when it processes the query.
1517 * So before doing the query, make sure all the pending drawing has
1518 * landed in the real front buffer.
1520 intel_batchbuffer_flush(brw
);
1521 intel_flush_front(&brw
->ctx
);
1525 attachments
[i
++] = __DRI_BUFFER_BACK_LEFT
;
1526 attachments
[i
++] = intel_bits_per_pixel(back_rb
);
1529 assert(i
<= ARRAY_SIZE(attachments
));
1532 dri_screen
->dri2
.loader
->getBuffersWithFormat(drawable
,
1537 drawable
->loaderPrivate
);
1541 * \brief Assign a DRI buffer's DRM region to a renderbuffer.
1543 * This is called from intel_update_renderbuffers().
1546 * DRI buffers whose attachment point is DRI2BufferStencil or
1547 * DRI2BufferDepthStencil are handled as special cases.
1549 * \param buffer_name is a human readable name, such as "dri2 front buffer",
1550 * that is passed to brw_bo_gem_create_from_name().
1552 * \see intel_update_renderbuffers()
1555 intel_process_dri2_buffer(struct brw_context
*brw
,
1556 __DRIdrawable
*drawable
,
1557 __DRIbuffer
*buffer
,
1558 struct intel_renderbuffer
*rb
,
1559 const char *buffer_name
)
1561 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1567 unsigned num_samples
= rb
->Base
.Base
.NumSamples
;
1569 /* We try to avoid closing and reopening the same BO name, because the first
1570 * use of a mapping of the buffer involves a bunch of page faulting which is
1571 * moderately expensive.
1573 struct intel_mipmap_tree
*last_mt
;
1574 if (num_samples
== 0)
1577 last_mt
= rb
->singlesample_mt
;
1579 uint32_t old_name
= 0;
1581 /* The bo already has a name because the miptree was created by a
1582 * previous call to intel_process_dri2_buffer(). If a bo already has a
1583 * name, then brw_bo_flink() is a low-cost getter. It does not
1584 * create a new name.
1586 brw_bo_flink(last_mt
->bo
, &old_name
);
1589 if (old_name
== buffer
->name
)
1592 if (unlikely(INTEL_DEBUG
& DEBUG_DRI
)) {
1594 "attaching buffer %d, at %d, cpp %d, pitch %d\n",
1595 buffer
->name
, buffer
->attachment
,
1596 buffer
->cpp
, buffer
->pitch
);
1599 bo
= brw_bo_gem_create_from_name(brw
->bufmgr
, buffer_name
,
1603 "Failed to open BO for returned DRI2 buffer "
1604 "(%dx%d, %s, named %d).\n"
1605 "This is likely a bug in the X Server that will lead to a "
1607 drawable
->w
, drawable
->h
, buffer_name
, buffer
->name
);
1611 intel_update_winsys_renderbuffer_miptree(brw
, rb
, bo
,
1612 drawable
->w
, drawable
->h
,
1615 if (_mesa_is_front_buffer_drawing(fb
) &&
1616 (buffer
->attachment
== __DRI_BUFFER_FRONT_LEFT
||
1617 buffer
->attachment
== __DRI_BUFFER_FAKE_FRONT_LEFT
) &&
1618 rb
->Base
.Base
.NumSamples
> 1) {
1619 intel_renderbuffer_upsample(brw
, rb
);
1624 brw_bo_unreference(bo
);
1628 * \brief Query DRI image loader to obtain a DRIdrawable's buffers.
1630 * To determine which DRI buffers to request, examine the renderbuffers
1631 * attached to the drawable's framebuffer. Then request the buffers from
1634 * This is called from intel_update_renderbuffers().
1636 * \param drawable Drawable whose buffers are queried.
1637 * \param buffers [out] List of buffers returned by DRI2 query.
1638 * \param buffer_count [out] Number of buffers returned.
1640 * \see intel_update_renderbuffers()
1644 intel_update_image_buffer(struct brw_context
*intel
,
1645 __DRIdrawable
*drawable
,
1646 struct intel_renderbuffer
*rb
,
1648 enum __DRIimageBufferMask buffer_type
)
1650 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1652 if (!rb
|| !buffer
->bo
)
1655 unsigned num_samples
= rb
->Base
.Base
.NumSamples
;
1657 /* Check and see if we're already bound to the right
1660 struct intel_mipmap_tree
*last_mt
;
1661 if (num_samples
== 0)
1664 last_mt
= rb
->singlesample_mt
;
1666 if (last_mt
&& last_mt
->bo
== buffer
->bo
)
1669 intel_update_winsys_renderbuffer_miptree(intel
, rb
, buffer
->bo
,
1670 buffer
->width
, buffer
->height
,
1673 if (_mesa_is_front_buffer_drawing(fb
) &&
1674 buffer_type
== __DRI_IMAGE_BUFFER_FRONT
&&
1675 rb
->Base
.Base
.NumSamples
> 1) {
1676 intel_renderbuffer_upsample(intel
, rb
);
1681 intel_update_image_buffers(struct brw_context
*brw
, __DRIdrawable
*drawable
)
1683 struct gl_framebuffer
*fb
= drawable
->driverPrivate
;
1684 __DRIscreen
*dri_screen
= brw
->screen
->driScrnPriv
;
1685 struct intel_renderbuffer
*front_rb
;
1686 struct intel_renderbuffer
*back_rb
;
1687 struct __DRIimageList images
;
1689 uint32_t buffer_mask
= 0;
1692 front_rb
= intel_get_renderbuffer(fb
, BUFFER_FRONT_LEFT
);
1693 back_rb
= intel_get_renderbuffer(fb
, BUFFER_BACK_LEFT
);
1696 format
= intel_rb_format(back_rb
);
1698 format
= intel_rb_format(front_rb
);
1702 if (front_rb
&& (_mesa_is_front_buffer_drawing(fb
) ||
1703 _mesa_is_front_buffer_reading(fb
) || !back_rb
)) {
1704 buffer_mask
|= __DRI_IMAGE_BUFFER_FRONT
;
1708 buffer_mask
|= __DRI_IMAGE_BUFFER_BACK
;
1710 ret
= dri_screen
->image
.loader
->getBuffers(drawable
,
1711 driGLFormatToImageFormat(format
),
1712 &drawable
->dri2
.stamp
,
1713 drawable
->loaderPrivate
,
1719 if (images
.image_mask
& __DRI_IMAGE_BUFFER_FRONT
) {
1720 drawable
->w
= images
.front
->width
;
1721 drawable
->h
= images
.front
->height
;
1722 intel_update_image_buffer(brw
,
1726 __DRI_IMAGE_BUFFER_FRONT
);
1729 if (images
.image_mask
& __DRI_IMAGE_BUFFER_BACK
) {
1730 drawable
->w
= images
.back
->width
;
1731 drawable
->h
= images
.back
->height
;
1732 intel_update_image_buffer(brw
,
1736 __DRI_IMAGE_BUFFER_BACK
);