i965/fs: Make emit_spill/unspill static functions taking builder as argument.
[mesa.git] / src / mesa / drivers / dri / i965 / gen8_draw_upload.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "main/bufferobj.h"
25 #include "main/context.h"
26 #include "main/enums.h"
27 #include "main/macros.h"
28
29 #include "brw_draw.h"
30 #include "brw_defines.h"
31 #include "brw_context.h"
32 #include "brw_state.h"
33
34 #include "intel_batchbuffer.h"
35 #include "intel_buffer_objects.h"
36
37 static bool
38 is_passthru_format(uint32_t format)
39 {
40 switch (format) {
41 case BRW_SURFACEFORMAT_R64_PASSTHRU:
42 case BRW_SURFACEFORMAT_R64G64_PASSTHRU:
43 case BRW_SURFACEFORMAT_R64G64B64_PASSTHRU:
44 case BRW_SURFACEFORMAT_R64G64B64A64_PASSTHRU:
45 return true;
46 default:
47 return false;
48 }
49 }
50
51 static void
52 gen8_emit_vertices(struct brw_context *brw)
53 {
54 struct gl_context *ctx = &brw->ctx;
55 uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
56 bool uses_edge_flag;
57
58 brw_prepare_vertices(brw);
59 brw_prepare_shader_draw_parameters(brw);
60
61 uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
62 ctx->Polygon.BackMode != GL_FILL);
63
64 if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
65 unsigned vue = brw->vb.nr_enabled;
66
67 /* The element for the edge flags must always be last, so we have to
68 * insert the SGVS before it in that case.
69 */
70 if (uses_edge_flag) {
71 assert(vue > 0);
72 vue--;
73 }
74
75 WARN_ONCE(vue >= 33,
76 "Trying to insert VID/IID past 33rd vertex element, "
77 "need to reorder the vertex attrbutes.");
78
79 unsigned dw1 = 0;
80 if (brw->vs.prog_data->uses_vertexid) {
81 dw1 |= GEN8_SGVS_ENABLE_VERTEX_ID |
82 (2 << GEN8_SGVS_VERTEX_ID_COMPONENT_SHIFT) | /* .z channel */
83 (vue << GEN8_SGVS_VERTEX_ID_ELEMENT_OFFSET_SHIFT);
84 }
85
86 if (brw->vs.prog_data->uses_instanceid) {
87 dw1 |= GEN8_SGVS_ENABLE_INSTANCE_ID |
88 (3 << GEN8_SGVS_INSTANCE_ID_COMPONENT_SHIFT) | /* .w channel */
89 (vue << GEN8_SGVS_INSTANCE_ID_ELEMENT_OFFSET_SHIFT);
90 }
91
92 BEGIN_BATCH(2);
93 OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2));
94 OUT_BATCH(dw1);
95 ADVANCE_BATCH();
96
97 BEGIN_BATCH(3);
98 OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
99 OUT_BATCH(vue | GEN8_VF_INSTANCING_ENABLE);
100 OUT_BATCH(0);
101 ADVANCE_BATCH();
102 } else {
103 BEGIN_BATCH(2);
104 OUT_BATCH(_3DSTATE_VF_SGVS << 16 | (2 - 2));
105 OUT_BATCH(0);
106 ADVANCE_BATCH();
107 }
108
109 /* If the VS doesn't read any inputs (calculating vertex position from
110 * a state variable for some reason, for example), emit a single pad
111 * VERTEX_ELEMENT struct and bail.
112 *
113 * The stale VB state stays in place, but they don't do anything unless
114 * a VE loads from them.
115 */
116 if (brw->vb.nr_enabled == 0) {
117 BEGIN_BATCH(3);
118 OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (3 - 2));
119 OUT_BATCH((0 << GEN6_VE0_INDEX_SHIFT) |
120 GEN6_VE0_VALID |
121 (BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_VE0_FORMAT_SHIFT) |
122 (0 << BRW_VE0_SRC_OFFSET_SHIFT));
123 OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
124 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
125 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
126 (BRW_VE1_COMPONENT_STORE_1_FLT << BRW_VE1_COMPONENT_3_SHIFT));
127 ADVANCE_BATCH();
128 return;
129 }
130
131 /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
132 const bool uses_draw_params =
133 brw->vs.prog_data->uses_basevertex ||
134 brw->vs.prog_data->uses_baseinstance;
135 const unsigned nr_buffers = brw->vb.nr_buffers +
136 uses_draw_params + brw->vs.prog_data->uses_drawid;
137
138 if (nr_buffers) {
139 assert(nr_buffers <= 33);
140
141 BEGIN_BATCH(1 + 4 * nr_buffers);
142 OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (4 * nr_buffers - 1));
143 for (unsigned i = 0; i < brw->vb.nr_buffers; i++) {
144 struct brw_vertex_buffer *buffer = &brw->vb.buffers[i];
145 uint32_t dw0 = 0;
146
147 dw0 |= i << GEN6_VB0_INDEX_SHIFT;
148 dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
149 dw0 |= buffer->stride << BRW_VB0_PITCH_SHIFT;
150 dw0 |= mocs_wb << 16;
151
152 OUT_BATCH(dw0);
153 OUT_RELOC64(buffer->bo, I915_GEM_DOMAIN_VERTEX, 0, buffer->offset);
154 OUT_BATCH(buffer->bo->size);
155 }
156
157 if (uses_draw_params) {
158 OUT_BATCH(brw->vb.nr_buffers << GEN6_VB0_INDEX_SHIFT |
159 GEN7_VB0_ADDRESS_MODIFYENABLE |
160 mocs_wb << 16);
161 OUT_RELOC64(brw->draw.draw_params_bo, I915_GEM_DOMAIN_VERTEX, 0,
162 brw->draw.draw_params_offset);
163 OUT_BATCH(brw->draw.draw_params_bo->size);
164 }
165
166 if (brw->vs.prog_data->uses_drawid) {
167 OUT_BATCH((brw->vb.nr_buffers + 1) << GEN6_VB0_INDEX_SHIFT |
168 GEN7_VB0_ADDRESS_MODIFYENABLE |
169 mocs_wb << 16);
170 OUT_RELOC64(brw->draw.draw_id_bo, I915_GEM_DOMAIN_VERTEX, 0,
171 brw->draw.draw_id_offset);
172 OUT_BATCH(brw->draw.draw_id_bo->size);
173 }
174 ADVANCE_BATCH();
175 }
176
177 /* Normally we don't need an element for the SGVS attribute because the
178 * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
179 * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if
180 * we're using draw parameters then we need an element for the those
181 * values. Additionally if there is an edge flag element then the SGVS
182 * can't be inserted past that so we need a dummy element to ensure that
183 * the edge flag is the last one.
184 */
185 const bool needs_sgvs_element = (brw->vs.prog_data->uses_basevertex ||
186 brw->vs.prog_data->uses_baseinstance ||
187 ((brw->vs.prog_data->uses_instanceid ||
188 brw->vs.prog_data->uses_vertexid) &&
189 uses_edge_flag));
190 const unsigned nr_elements =
191 brw->vb.nr_enabled + needs_sgvs_element + brw->vs.prog_data->uses_drawid;
192
193 /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
194 * presumably for VertexID/InstanceID.
195 */
196 assert(nr_elements <= 34);
197
198 struct brw_vertex_element *gen6_edgeflag_input = NULL;
199
200 BEGIN_BATCH(1 + nr_elements * 2);
201 OUT_BATCH((_3DSTATE_VERTEX_ELEMENTS << 16) | (2 * nr_elements - 1));
202 for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
203 struct brw_vertex_element *input = brw->vb.enabled[i];
204 uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
205 uint32_t comp0 = BRW_VE1_COMPONENT_STORE_SRC;
206 uint32_t comp1 = BRW_VE1_COMPONENT_STORE_SRC;
207 uint32_t comp2 = BRW_VE1_COMPONENT_STORE_SRC;
208 uint32_t comp3 = BRW_VE1_COMPONENT_STORE_SRC;
209
210 /* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
211 * "Any SourceElementFormat of *64*_PASSTHRU cannot be used with an
212 * element which has edge flag enabled."
213 */
214 assert(!(is_passthru_format(format) && uses_edge_flag));
215
216 /* The gen4 driver expects edgeflag to come in as a float, and passes
217 * that float on to the tests in the clipper. Mesa's current vertex
218 * attribute value for EdgeFlag is stored as a float, which works out.
219 * glEdgeFlagPointer, on the other hand, gives us an unnormalized
220 * integer ubyte. Just rewrite that to convert to a float.
221 */
222 if (input == &brw->vb.inputs[VERT_ATTRIB_EDGEFLAG]) {
223 /* Gen6+ passes edgeflag as sideband along with the vertex, instead
224 * of in the VUE. We have to upload it sideband as the last vertex
225 * element according to the B-Spec.
226 */
227 gen6_edgeflag_input = input;
228 continue;
229 }
230
231 switch (input->glarray->Size) {
232 case 0: comp0 = BRW_VE1_COMPONENT_STORE_0;
233 case 1: comp1 = BRW_VE1_COMPONENT_STORE_0;
234 case 2: comp2 = BRW_VE1_COMPONENT_STORE_0;
235 case 3: comp3 = input->glarray->Integer ? BRW_VE1_COMPONENT_STORE_1_INT
236 : BRW_VE1_COMPONENT_STORE_1_FLT;
237 break;
238 }
239
240 /* From the BDW PRM, Volume 2d, page 586 (VERTEX_ELEMENT_STATE):
241 *
242 * "When SourceElementFormat is set to one of the *64*_PASSTHRU
243 * formats, 64-bit components are stored in the URB without any
244 * conversion. In this case, vertex elements must be written as 128
245 * or 256 bits, with VFCOMP_STORE_0 being used to pad the output
246 * as required. E.g., if R64_PASSTHRU is used to copy a 64-bit Red
247 * component into the URB, Component 1 must be specified as
248 * VFCOMP_STORE_0 (with Components 2,3 set to VFCOMP_NOSTORE)
249 * in order to output a 128-bit vertex element, or Components 1-3 must
250 * be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex
251 * element. Likewise, use of R64G64B64_PASSTHRU requires Component 3
252 * to be specified as VFCOMP_STORE_0 in order to output a 256-bit vertex
253 * element."
254 */
255 if (input->glarray->Doubles) {
256 switch (input->glarray->Size) {
257 case 0:
258 case 1:
259 case 2:
260 /* Use 128-bits instead of 256-bits to write double and dvec2
261 * vertex elements.
262 */
263 comp2 = BRW_VE1_COMPONENT_NOSTORE;
264 comp3 = BRW_VE1_COMPONENT_NOSTORE;
265 break;
266 case 3:
267 /* Pad the output using VFCOMP_STORE_0 as suggested
268 * by the BDW PRM.
269 */
270 comp3 = BRW_VE1_COMPONENT_STORE_0;
271 break;
272 }
273 }
274
275 OUT_BATCH((input->buffer << GEN6_VE0_INDEX_SHIFT) |
276 GEN6_VE0_VALID |
277 (format << BRW_VE0_FORMAT_SHIFT) |
278 (input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
279
280 OUT_BATCH((comp0 << BRW_VE1_COMPONENT_0_SHIFT) |
281 (comp1 << BRW_VE1_COMPONENT_1_SHIFT) |
282 (comp2 << BRW_VE1_COMPONENT_2_SHIFT) |
283 (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
284 }
285
286 if (needs_sgvs_element) {
287 if (brw->vs.prog_data->uses_basevertex ||
288 brw->vs.prog_data->uses_baseinstance) {
289 OUT_BATCH(GEN6_VE0_VALID |
290 brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
291 BRW_SURFACEFORMAT_R32G32_UINT << BRW_VE0_FORMAT_SHIFT);
292 OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
293 (BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_1_SHIFT) |
294 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
295 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
296 } else {
297 OUT_BATCH(GEN6_VE0_VALID);
298 OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
299 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
300 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
301 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
302 }
303 }
304
305 if (brw->vs.prog_data->uses_drawid) {
306 OUT_BATCH(GEN6_VE0_VALID |
307 ((brw->vb.nr_buffers + 1) << GEN6_VE0_INDEX_SHIFT) |
308 (BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT));
309 OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
310 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
311 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
312 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
313 }
314
315 if (gen6_edgeflag_input) {
316 uint32_t format =
317 brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
318
319 OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
320 GEN6_VE0_VALID |
321 GEN6_VE0_EDGE_FLAG_ENABLE |
322 (format << BRW_VE0_FORMAT_SHIFT) |
323 (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
324 OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
325 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
326 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
327 (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
328 }
329 ADVANCE_BATCH();
330
331 for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) {
332 const struct brw_vertex_element *input = brw->vb.enabled[i];
333 const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer];
334 unsigned element_index;
335
336 /* The edge flag element is reordered to be the last one in the code
337 * above so we need to compensate for that in the element indices used
338 * below.
339 */
340 if (input == gen6_edgeflag_input)
341 element_index = nr_elements - 1;
342 else
343 element_index = j++;
344
345 BEGIN_BATCH(3);
346 OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
347 OUT_BATCH(element_index |
348 (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
349 OUT_BATCH(buffer->step_rate);
350 ADVANCE_BATCH();
351 }
352
353 if (brw->vs.prog_data->uses_drawid) {
354 const unsigned element = brw->vb.nr_enabled + needs_sgvs_element;
355 BEGIN_BATCH(3);
356 OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
357 OUT_BATCH(element);
358 OUT_BATCH(0);
359 ADVANCE_BATCH();
360 }
361 }
362
363 const struct brw_tracked_state gen8_vertices = {
364 .dirty = {
365 .mesa = _NEW_POLYGON,
366 .brw = BRW_NEW_BATCH |
367 BRW_NEW_BLORP |
368 BRW_NEW_VERTICES |
369 BRW_NEW_VS_PROG_DATA,
370 },
371 .emit = gen8_emit_vertices,
372 };
373
374 static void
375 gen8_emit_index_buffer(struct brw_context *brw)
376 {
377 const struct _mesa_index_buffer *index_buffer = brw->ib.ib;
378 uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
379
380 if (index_buffer == NULL)
381 return;
382
383 BEGIN_BATCH(5);
384 OUT_BATCH(CMD_INDEX_BUFFER << 16 | (5 - 2));
385 OUT_BATCH(brw_get_index_type(index_buffer->type) | mocs_wb);
386 OUT_RELOC64(brw->ib.bo, I915_GEM_DOMAIN_VERTEX, 0, 0);
387 OUT_BATCH(brw->ib.bo->size);
388 ADVANCE_BATCH();
389 }
390
391 const struct brw_tracked_state gen8_index_buffer = {
392 .dirty = {
393 .mesa = 0,
394 .brw = BRW_NEW_BATCH |
395 BRW_NEW_BLORP |
396 BRW_NEW_INDEX_BUFFER,
397 },
398 .emit = gen8_emit_index_buffer,
399 };
400
401 static void
402 gen8_emit_vf_topology(struct brw_context *brw)
403 {
404 BEGIN_BATCH(2);
405 OUT_BATCH(_3DSTATE_VF_TOPOLOGY << 16 | (2 - 2));
406 OUT_BATCH(brw->primitive);
407 ADVANCE_BATCH();
408 }
409
410 const struct brw_tracked_state gen8_vf_topology = {
411 .dirty = {
412 .mesa = 0,
413 .brw = BRW_NEW_BLORP |
414 BRW_NEW_PRIMITIVE,
415 },
416 .emit = gen8_emit_vf_topology,
417 };