i965/gen7: Reduce GT1 WM thread count according to updated BSpec.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/imports.h"
34 #include "main/macros.h"
35 #include "main/simple_list.h"
36
37 #include "vbo/vbo_context.h"
38
39 #include "brw_context.h"
40 #include "brw_defines.h"
41 #include "brw_draw.h"
42 #include "brw_state.h"
43
44 #include "intel_fbo.h"
45 #include "intel_mipmap_tree.h"
46 #include "intel_regions.h"
47 #include "intel_span.h"
48 #include "intel_tex.h"
49 #include "intel_tex_obj.h"
50
51 #include "tnl/t_pipeline.h"
52 #include "glsl/ralloc.h"
53
54 /***************************************
55 * Mesa's Driver Functions
56 ***************************************/
57
58 static void brwInitDriverFunctions(struct intel_screen *screen,
59 struct dd_function_table *functions)
60 {
61 intelInitDriverFunctions( functions );
62
63 brwInitFragProgFuncs( functions );
64 brw_init_queryobj_functions(functions);
65
66 functions->BeginTransformFeedback = brw_begin_transform_feedback;
67
68 if (screen->gen >= 7)
69 functions->EndTransformFeedback = gen7_end_transform_feedback;
70 else
71 functions->EndTransformFeedback = brw_end_transform_feedback;
72 }
73
74 bool
75 brwCreateContext(int api,
76 const struct gl_config *mesaVis,
77 __DRIcontext *driContextPriv,
78 void *sharedContextPrivate)
79 {
80 __DRIscreen *sPriv = driContextPriv->driScreenPriv;
81 struct intel_screen *screen = sPriv->driverPrivate;
82 struct dd_function_table functions;
83 struct brw_context *brw = rzalloc(NULL, struct brw_context);
84 struct intel_context *intel = &brw->intel;
85 struct gl_context *ctx = &intel->ctx;
86 unsigned i;
87
88 if (!brw) {
89 printf("%s: failed to alloc context\n", __FUNCTION__);
90 return false;
91 }
92
93 brwInitDriverFunctions(screen, &functions);
94
95 if (!intelInitContext( intel, api, mesaVis, driContextPriv,
96 sharedContextPrivate, &functions )) {
97 printf("%s: failed to init intel context\n", __FUNCTION__);
98 FREE(brw);
99 return false;
100 }
101
102 brwInitVtbl( brw );
103
104 brw_init_surface_formats(brw);
105
106 /* Initialize swrast, tnl driver tables: */
107 intelInitSpanFuncs(ctx);
108
109 TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
110
111 ctx->Const.MaxDualSourceDrawBuffers = 1;
112 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
113 ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
114 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
115 ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
116 ctx->Const.MaxTextureImageUnits);
117 ctx->Const.MaxVertexTextureImageUnits = BRW_MAX_TEX_UNIT;
118 ctx->Const.MaxCombinedTextureImageUnits =
119 ctx->Const.MaxVertexTextureImageUnits +
120 ctx->Const.MaxTextureImageUnits;
121
122 ctx->Const.MaxTextureLevels = 14; /* 8192 */
123 if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
124 ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
125 ctx->Const.Max3DTextureLevels = 9;
126 ctx->Const.MaxCubeTextureLevels = 12;
127
128 if (intel->gen >= 7)
129 ctx->Const.MaxArrayTextureLayers = 2048;
130 else
131 ctx->Const.MaxArrayTextureLayers = 512;
132
133 ctx->Const.MaxTextureRectSize = (1<<12);
134
135 ctx->Const.MaxTextureMaxAnisotropy = 16.0;
136
137 /* Hardware only supports a limited number of transform feedback buffers.
138 * So we need to override the Mesa default (which is based only on software
139 * limits).
140 */
141 ctx->Const.MaxTransformFeedbackBuffers = BRW_MAX_SOL_BUFFERS;
142
143 /* On Gen6, in the worst case, we use up one binding table entry per
144 * transform feedback component (see comments above the definition of
145 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
146 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
147 * BRW_MAX_SOL_BINDINGS.
148 *
149 * In "separate components" mode, we need to divide this value by
150 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
151 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
152 */
153 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
154 ctx->Const.MaxTransformFeedbackSeparateComponents =
155 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
156
157 if (intel->gen == 6)
158 ctx->Const.MaxSamples = 4;
159 else if (intel->gen >= 7)
160 ctx->Const.MaxSamples = 8;
161
162 /* if conformance mode is set, swrast can handle any size AA point */
163 ctx->Const.MaxPointSizeAA = 255.0;
164
165 /* We want the GLSL compiler to emit code that uses condition codes */
166 for (i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
167 ctx->ShaderCompilerOptions[i].MaxIfDepth = intel->gen < 6 ? 16 : UINT_MAX;
168 ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
169 ctx->ShaderCompilerOptions[i].EmitNVTempInitialization = true;
170 ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
171 ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
172 ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
173 ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true;
174
175 ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform =
176 (i == MESA_SHADER_FRAGMENT);
177 ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
178 (i == MESA_SHADER_FRAGMENT);
179 ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
180 }
181
182 ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
183 ctx->Const.VertexProgram.MaxAluInstructions = 0;
184 ctx->Const.VertexProgram.MaxTexInstructions = 0;
185 ctx->Const.VertexProgram.MaxTexIndirections = 0;
186 ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
187 ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
188 ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
189 ctx->Const.VertexProgram.MaxNativeAttribs = 16;
190 ctx->Const.VertexProgram.MaxNativeTemps = 256;
191 ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
192 ctx->Const.VertexProgram.MaxNativeParameters = 1024;
193 ctx->Const.VertexProgram.MaxEnvParams =
194 MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
195 ctx->Const.VertexProgram.MaxEnvParams);
196
197 ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024);
198 ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024);
199 ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024);
200 ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024);
201 ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
202 ctx->Const.FragmentProgram.MaxNativeTemps = 256;
203 ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
204 ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
205 ctx->Const.FragmentProgram.MaxEnvParams =
206 MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
207 ctx->Const.FragmentProgram.MaxEnvParams);
208
209 /* Fragment shaders use real, 32-bit twos-complement integers for all
210 * integer types.
211 */
212 ctx->Const.FragmentProgram.LowInt.RangeMin = 31;
213 ctx->Const.FragmentProgram.LowInt.RangeMax = 30;
214 ctx->Const.FragmentProgram.LowInt.Precision = 0;
215 ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.MediumInt
216 = ctx->Const.FragmentProgram.LowInt;
217
218 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
219 but we're not sure how it's actually done for vertex order,
220 that affect provoking vertex decision. Always use last vertex
221 convention for quad primitive which works as expected for now. */
222 if (intel->gen >= 6)
223 ctx->Const.QuadsFollowProvokingVertexConvention = false;
224
225 if (intel->is_g4x || intel->gen >= 5) {
226 brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
227 brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
228 brw->has_surface_tile_offset = true;
229 if (intel->gen < 6)
230 brw->has_compr4 = true;
231 brw->has_aa_line_parameters = true;
232 brw->has_pln = true;
233 } else {
234 brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
235 brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
236 }
237
238 /* WM maximum threads is number of EUs times number of threads per EU. */
239 if (intel->gen >= 7) {
240 if (intel->gt == 1) {
241 brw->max_wm_threads = 48;
242 brw->max_vs_threads = 36;
243 brw->max_gs_threads = 36;
244 brw->urb.size = 128;
245 brw->urb.max_vs_entries = 512;
246 brw->urb.max_gs_entries = 192;
247 } else if (intel->gt == 2) {
248 brw->max_wm_threads = 172;
249 brw->max_vs_threads = 128;
250 brw->max_gs_threads = 128;
251 brw->urb.size = 256;
252 brw->urb.max_vs_entries = 704;
253 brw->urb.max_gs_entries = 320;
254 } else {
255 assert(!"Unknown gen7 device.");
256 }
257 } else if (intel->gen == 6) {
258 if (intel->gt == 2) {
259 brw->max_wm_threads = 80;
260 brw->max_vs_threads = 60;
261 brw->max_gs_threads = 60;
262 brw->urb.size = 64; /* volume 5c.5 section 5.1 */
263 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
264 brw->urb.max_gs_entries = 256;
265 } else {
266 brw->max_wm_threads = 40;
267 brw->max_vs_threads = 24;
268 brw->max_gs_threads = 21; /* conservative; 24 if rendering disabled */
269 brw->urb.size = 32; /* volume 5c.5 section 5.1 */
270 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
271 brw->urb.max_gs_entries = 256;
272 }
273 brw->urb.gen6_gs_previously_active = false;
274 } else if (intel->gen == 5) {
275 brw->urb.size = 1024;
276 brw->max_vs_threads = 72;
277 brw->max_gs_threads = 32;
278 brw->max_wm_threads = 12 * 6;
279 } else if (intel->is_g4x) {
280 brw->urb.size = 384;
281 brw->max_vs_threads = 32;
282 brw->max_gs_threads = 2;
283 brw->max_wm_threads = 10 * 5;
284 } else if (intel->gen < 6) {
285 brw->urb.size = 256;
286 brw->max_vs_threads = 16;
287 brw->max_gs_threads = 2;
288 brw->max_wm_threads = 8 * 4;
289 brw->has_negative_rhw_bug = true;
290 }
291
292 if (intel->gen <= 7) {
293 brw->needs_unlit_centroid_workaround = true;
294 }
295
296 brw->prim_restart.in_progress = false;
297 brw->prim_restart.enable_cut_index = false;
298 intel->hw_ctx = drm_intel_gem_context_create(intel->bufmgr);
299
300 brw_init_state( brw );
301
302 brw->curbe.last_buf = calloc(1, 4096);
303 brw->curbe.next_buf = calloc(1, 4096);
304
305 brw->state.dirty.mesa = ~0;
306 brw->state.dirty.brw = ~0;
307
308 brw->emit_state_always = 0;
309
310 intel->batch.need_workaround_flush = true;
311
312 ctx->VertexProgram._MaintainTnlProgram = true;
313 ctx->FragmentProgram._MaintainTexEnvProgram = true;
314
315 brw_draw_init( brw );
316
317 brw->precompile = driQueryOptionb(&intel->optionCache, "shader_precompile");
318
319 ctx->Const.NativeIntegers = true;
320 ctx->Const.UniformBooleanTrue = 1;
321
322 ctx->Const.ForceGLSLExtensionsWarn = driQueryOptionb(&intel->optionCache, "force_glsl_extensions_warn");
323
324 return true;
325 }
326