mesa: add/update comments in _mesa_copy_buffer_subdata()
[mesa.git] / src / mesa / drivers / dri / i965 / brw_context.c
1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <keith@tungstengraphics.com>
30 */
31
32
33 #include "main/imports.h"
34 #include "main/macros.h"
35 #include "main/simple_list.h"
36
37 #include "vbo/vbo_context.h"
38
39 #include "brw_context.h"
40 #include "brw_defines.h"
41 #include "brw_draw.h"
42 #include "brw_state.h"
43
44 #include "gen6_hiz.h"
45
46 #include "intel_fbo.h"
47 #include "intel_mipmap_tree.h"
48 #include "intel_regions.h"
49 #include "intel_span.h"
50 #include "intel_tex.h"
51 #include "intel_tex_obj.h"
52
53 #include "tnl/t_pipeline.h"
54 #include "glsl/ralloc.h"
55
56 /***************************************
57 * Mesa's Driver Functions
58 ***************************************/
59
60 /**
61 * \brief Prepare for entry into glBegin/glEnd block.
62 *
63 * Resolve buffers before entering a glBegin/glEnd block. This is
64 * necessary to prevent recursive calls to FLUSH_VERTICES.
65 *
66 * This resolves the depth buffer of each enabled depth texture and the HiZ
67 * buffer of the attached depth renderbuffer.
68 *
69 * Details
70 * -------
71 * When vertices are queued during a glBegin/glEnd block, those vertices must
72 * be drawn before any rendering state changes. To ensure this, Mesa calls
73 * FLUSH_VERTICES as a prehook to such state changes. Therefore,
74 * FLUSH_VERTICES itself cannot change rendering state without falling into a
75 * recursive trap.
76 *
77 * This precludes meta-ops, namely buffer resolves, from occurring while any
78 * vertices are queued. To prevent that situation, we resolve some buffers on
79 * entering a glBegin/glEnd
80 *
81 * \see brwCleanupExecEnd()
82 */
83 static void brwPrepareExecBegin(struct gl_context *ctx)
84 {
85 struct brw_context *brw = brw_context(ctx);
86 struct intel_context *intel = &brw->intel;
87 struct intel_renderbuffer *draw_irb;
88 struct intel_texture_object *tex_obj;
89
90 if (!intel->has_hiz) {
91 /* The context uses no feature that requires buffer resolves. */
92 return;
93 }
94
95 /* Resolve each enabled texture. */
96 for (int i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
97 if (!ctx->Texture.Unit[i]._ReallyEnabled)
98 continue;
99 tex_obj = intel_texture_object(ctx->Texture.Unit[i]._Current);
100 if (!tex_obj || !tex_obj->mt)
101 continue;
102 intel_miptree_all_slices_resolve_depth(intel, tex_obj->mt);
103 }
104
105 /* Resolve the attached depth buffer. */
106 draw_irb = intel_get_renderbuffer(ctx->DrawBuffer, BUFFER_DEPTH);
107 if (draw_irb) {
108 intel_renderbuffer_resolve_hiz(intel, draw_irb);
109 }
110 }
111
112 static void brwInitDriverFunctions(struct intel_screen *screen,
113 struct dd_function_table *functions)
114 {
115 intelInitDriverFunctions( functions );
116
117 brwInitFragProgFuncs( functions );
118 brw_init_queryobj_functions(functions);
119
120 functions->PrepareExecBegin = brwPrepareExecBegin;
121 functions->BeginTransformFeedback = brw_begin_transform_feedback;
122
123 if (screen->gen >= 7)
124 functions->EndTransformFeedback = gen7_end_transform_feedback;
125 else
126 functions->EndTransformFeedback = brw_end_transform_feedback;
127 }
128
129 bool
130 brwCreateContext(int api,
131 const struct gl_config *mesaVis,
132 __DRIcontext *driContextPriv,
133 void *sharedContextPrivate)
134 {
135 __DRIscreen *sPriv = driContextPriv->driScreenPriv;
136 struct intel_screen *screen = sPriv->driverPrivate;
137 struct dd_function_table functions;
138 struct brw_context *brw = rzalloc(NULL, struct brw_context);
139 struct intel_context *intel = &brw->intel;
140 struct gl_context *ctx = &intel->ctx;
141 unsigned i;
142
143 if (!brw) {
144 printf("%s: failed to alloc context\n", __FUNCTION__);
145 return false;
146 }
147
148 brwInitDriverFunctions(screen, &functions);
149
150 if (!intelInitContext( intel, api, mesaVis, driContextPriv,
151 sharedContextPrivate, &functions )) {
152 printf("%s: failed to init intel context\n", __FUNCTION__);
153 FREE(brw);
154 return false;
155 }
156
157 brwInitVtbl( brw );
158
159 brw_init_surface_formats(brw);
160
161 /* Initialize swrast, tnl driver tables: */
162 intelInitSpanFuncs(ctx);
163
164 TNL_CONTEXT(ctx)->Driver.RunPipeline = _tnl_run_pipeline;
165
166 ctx->Const.MaxDrawBuffers = BRW_MAX_DRAW_BUFFERS;
167 ctx->Const.MaxTextureImageUnits = BRW_MAX_TEX_UNIT;
168 ctx->Const.MaxTextureCoordUnits = 8; /* Mesa limit */
169 ctx->Const.MaxTextureUnits = MIN2(ctx->Const.MaxTextureCoordUnits,
170 ctx->Const.MaxTextureImageUnits);
171 ctx->Const.MaxVertexTextureImageUnits = BRW_MAX_TEX_UNIT;
172 ctx->Const.MaxCombinedTextureImageUnits =
173 ctx->Const.MaxVertexTextureImageUnits +
174 ctx->Const.MaxTextureImageUnits;
175
176 ctx->Const.MaxTextureLevels = 14; /* 8192 */
177 if (ctx->Const.MaxTextureLevels > MAX_TEXTURE_LEVELS)
178 ctx->Const.MaxTextureLevels = MAX_TEXTURE_LEVELS;
179 ctx->Const.Max3DTextureLevels = 9;
180 ctx->Const.MaxCubeTextureLevels = 12;
181 /* minimum maximum. Users are likely to run into memory problems
182 * even at this size, since 64 * 2048 * 2048 * 4 = 1GB and we can't
183 * address that much.
184 */
185 ctx->Const.MaxArrayTextureLayers = 64;
186 ctx->Const.MaxTextureRectSize = (1<<12);
187
188 ctx->Const.MaxTextureMaxAnisotropy = 16.0;
189
190 /* Hardware only supports a limited number of transform feedback buffers.
191 * So we need to override the Mesa default (which is based only on software
192 * limits).
193 */
194 ctx->Const.MaxTransformFeedbackSeparateAttribs = BRW_MAX_SOL_BUFFERS;
195
196 /* On Gen6, in the worst case, we use up one binding table entry per
197 * transform feedback component (see comments above the definition of
198 * BRW_MAX_SOL_BINDINGS, in brw_context.h), so we need to advertise a value
199 * for MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS equal to
200 * BRW_MAX_SOL_BINDINGS.
201 *
202 * In "separate components" mode, we need to divide this value by
203 * BRW_MAX_SOL_BUFFERS, so that the total number of binding table entries
204 * used up by all buffers will not exceed BRW_MAX_SOL_BINDINGS.
205 */
206 ctx->Const.MaxTransformFeedbackInterleavedComponents = BRW_MAX_SOL_BINDINGS;
207 ctx->Const.MaxTransformFeedbackSeparateComponents =
208 BRW_MAX_SOL_BINDINGS / BRW_MAX_SOL_BUFFERS;
209
210 /* if conformance mode is set, swrast can handle any size AA point */
211 ctx->Const.MaxPointSizeAA = 255.0;
212
213 /* We want the GLSL compiler to emit code that uses condition codes */
214 for (i = 0; i <= MESA_SHADER_FRAGMENT; i++) {
215 ctx->ShaderCompilerOptions[i].MaxIfDepth = intel->gen < 6 ? 16 : UINT_MAX;
216 ctx->ShaderCompilerOptions[i].EmitCondCodes = true;
217 ctx->ShaderCompilerOptions[i].EmitNVTempInitialization = true;
218 ctx->ShaderCompilerOptions[i].EmitNoNoise = true;
219 ctx->ShaderCompilerOptions[i].EmitNoMainReturn = true;
220 ctx->ShaderCompilerOptions[i].EmitNoIndirectInput = true;
221 ctx->ShaderCompilerOptions[i].EmitNoIndirectOutput = true;
222
223 ctx->ShaderCompilerOptions[i].EmitNoIndirectUniform =
224 (i == MESA_SHADER_FRAGMENT);
225 ctx->ShaderCompilerOptions[i].EmitNoIndirectTemp =
226 (i == MESA_SHADER_FRAGMENT);
227 ctx->ShaderCompilerOptions[i].LowerClipDistance = true;
228 }
229
230 ctx->Const.VertexProgram.MaxNativeInstructions = (16 * 1024);
231 ctx->Const.VertexProgram.MaxAluInstructions = 0;
232 ctx->Const.VertexProgram.MaxTexInstructions = 0;
233 ctx->Const.VertexProgram.MaxTexIndirections = 0;
234 ctx->Const.VertexProgram.MaxNativeAluInstructions = 0;
235 ctx->Const.VertexProgram.MaxNativeTexInstructions = 0;
236 ctx->Const.VertexProgram.MaxNativeTexIndirections = 0;
237 ctx->Const.VertexProgram.MaxNativeAttribs = 16;
238 ctx->Const.VertexProgram.MaxNativeTemps = 256;
239 ctx->Const.VertexProgram.MaxNativeAddressRegs = 1;
240 ctx->Const.VertexProgram.MaxNativeParameters = 1024;
241 ctx->Const.VertexProgram.MaxEnvParams =
242 MIN2(ctx->Const.VertexProgram.MaxNativeParameters,
243 ctx->Const.VertexProgram.MaxEnvParams);
244
245 ctx->Const.FragmentProgram.MaxNativeInstructions = (16 * 1024);
246 ctx->Const.FragmentProgram.MaxNativeAluInstructions = (16 * 1024);
247 ctx->Const.FragmentProgram.MaxNativeTexInstructions = (16 * 1024);
248 ctx->Const.FragmentProgram.MaxNativeTexIndirections = (16 * 1024);
249 ctx->Const.FragmentProgram.MaxNativeAttribs = 12;
250 ctx->Const.FragmentProgram.MaxNativeTemps = 256;
251 ctx->Const.FragmentProgram.MaxNativeAddressRegs = 0;
252 ctx->Const.FragmentProgram.MaxNativeParameters = 1024;
253 ctx->Const.FragmentProgram.MaxEnvParams =
254 MIN2(ctx->Const.FragmentProgram.MaxNativeParameters,
255 ctx->Const.FragmentProgram.MaxEnvParams);
256
257 /* Fragment shaders use real, 32-bit twos-complement integers for all
258 * integer types.
259 */
260 ctx->Const.FragmentProgram.LowInt.RangeMin = 31;
261 ctx->Const.FragmentProgram.LowInt.RangeMax = 30;
262 ctx->Const.FragmentProgram.LowInt.Precision = 0;
263 ctx->Const.FragmentProgram.HighInt = ctx->Const.FragmentProgram.MediumInt
264 = ctx->Const.FragmentProgram.LowInt;
265
266 /* Gen6 converts quads to polygon in beginning of 3D pipeline,
267 but we're not sure how it's actually done for vertex order,
268 that affect provoking vertex decision. Always use last vertex
269 convention for quad primitive which works as expected for now. */
270 if (intel->gen >= 6)
271 ctx->Const.QuadsFollowProvokingVertexConvention = false;
272
273 if (intel->is_g4x || intel->gen >= 5) {
274 brw->CMD_VF_STATISTICS = GM45_3DSTATE_VF_STATISTICS;
275 brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_GM45;
276 brw->has_surface_tile_offset = true;
277 if (intel->gen < 6)
278 brw->has_compr4 = true;
279 brw->has_aa_line_parameters = true;
280 brw->has_pln = true;
281 } else {
282 brw->CMD_VF_STATISTICS = GEN4_3DSTATE_VF_STATISTICS;
283 brw->CMD_PIPELINE_SELECT = CMD_PIPELINE_SELECT_965;
284 }
285
286 /* WM maximum threads is number of EUs times number of threads per EU. */
287 if (intel->gen >= 7) {
288 if (intel->gt == 1) {
289 brw->max_wm_threads = 86;
290 brw->max_vs_threads = 36;
291 brw->max_gs_threads = 36;
292 brw->urb.size = 128;
293 brw->urb.max_vs_entries = 512;
294 brw->urb.max_gs_entries = 192;
295 } else if (intel->gt == 2) {
296 brw->max_wm_threads = 86;
297 brw->max_vs_threads = 128;
298 brw->max_gs_threads = 128;
299 brw->urb.size = 256;
300 brw->urb.max_vs_entries = 704;
301 brw->urb.max_gs_entries = 320;
302 } else {
303 assert(!"Unknown gen7 device.");
304 }
305 } else if (intel->gen == 6) {
306 if (intel->gt == 2) {
307 /* This could possibly be 80, but is supposed to require
308 * disabling of WIZ hashing (bit 6 of GT_MODE, 0x20d0) and a
309 * GPU reset to change.
310 */
311 brw->max_wm_threads = 40;
312 brw->max_vs_threads = 60;
313 brw->max_gs_threads = 60;
314 brw->urb.size = 64; /* volume 5c.5 section 5.1 */
315 brw->urb.max_vs_entries = 256; /* volume 2a (see 3DSTATE_URB) */
316 brw->urb.max_gs_entries = 256;
317 } else {
318 brw->max_wm_threads = 40;
319 brw->max_vs_threads = 24;
320 brw->max_gs_threads = 21; /* conservative; 24 if rendering disabled */
321 brw->urb.size = 32; /* volume 5c.5 section 5.1 */
322 brw->urb.max_vs_entries = 128; /* volume 2a (see 3DSTATE_URB) */
323 brw->urb.max_gs_entries = 256;
324 }
325 brw->urb.gen6_gs_previously_active = false;
326 } else if (intel->gen == 5) {
327 brw->urb.size = 1024;
328 brw->max_vs_threads = 72;
329 brw->max_gs_threads = 32;
330 brw->max_wm_threads = 12 * 6;
331 } else if (intel->is_g4x) {
332 brw->urb.size = 384;
333 brw->max_vs_threads = 32;
334 brw->max_gs_threads = 2;
335 brw->max_wm_threads = 10 * 5;
336 } else if (intel->gen < 6) {
337 brw->urb.size = 256;
338 brw->max_vs_threads = 16;
339 brw->max_gs_threads = 2;
340 brw->max_wm_threads = 8 * 4;
341 brw->has_negative_rhw_bug = true;
342 }
343
344 brw_init_state( brw );
345
346 brw->curbe.last_buf = calloc(1, 4096);
347 brw->curbe.next_buf = calloc(1, 4096);
348
349 brw->state.dirty.mesa = ~0;
350 brw->state.dirty.brw = ~0;
351
352 brw->emit_state_always = 0;
353
354 intel->batch.need_workaround_flush = true;
355
356 ctx->VertexProgram._MaintainTnlProgram = true;
357 ctx->FragmentProgram._MaintainTexEnvProgram = true;
358
359 brw_draw_init( brw );
360
361 brw->new_vs_backend = (getenv("INTEL_OLD_VS") == NULL);
362 brw->precompile = driQueryOptionb(&intel->optionCache, "shader_precompile");
363
364 /* If we're using the new shader backend, we require integer uniforms
365 * stored as actual integers.
366 */
367 if (brw->new_vs_backend) {
368 ctx->Const.NativeIntegers = true;
369 ctx->Const.UniformBooleanTrue = 1;
370 }
371
372 return true;
373 }
374