i965: use pack/unpackDouble lowering
[mesa.git] / src / gallium / drivers / ilo / core / ilo_state_sol.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2015 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "ilo_debug.h"
29 #include "ilo_vma.h"
30 #include "ilo_state_sol.h"
31
32 static bool
33 sol_stream_validate_gen7(const struct ilo_dev *dev,
34 const struct ilo_state_sol_stream_info *stream)
35 {
36 uint8_t i;
37
38 ILO_DEV_ASSERT(dev, 7, 8);
39
40 assert(stream->vue_read_base + stream->vue_read_count <=
41 stream->cv_vue_attr_count);
42
43 /*
44 * From the Ivy Bridge PRM, volume 2 part 1, page 200:
45 *
46 * "(Stream 0 Vertex Read Offset)
47 * Format: U1 count of 256-bit units
48 *
49 * Specifies amount of data to skip over before reading back Stream 0
50 * vertex data. Must be zero if the GS is enabled and the Output
51 * Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B
52 * unit)."
53 *
54 * "(Stream 0 Vertex Read Length)
55 * Format: U5-1 count of 256-bit units
56 *
57 * Specifies amount of vertex data to read back for Stream 0 vertices,
58 * starting at the Stream 0 Vertex Read Offset location. Maximum
59 * readback is 17 256-bit units (34 128-bit vertex attributes). Read
60 * data past the end of the valid vertex data has undefined contents,
61 * and therefore shouldn't be used to source stream out data. Must be
62 * zero (i.e., read length = 256b) if the GS is enabled and the Output
63 * Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B
64 * unit)."
65 */
66 assert(stream->vue_read_base == 0 || stream->vue_read_base == 2);
67 assert(stream->vue_read_count <= 34);
68
69 assert(stream->decl_count <= ILO_STATE_SOL_MAX_DECL_COUNT);
70
71 for (i = 0; i < stream->decl_count; i++) {
72 const struct ilo_state_sol_decl_info *decl = &stream->decls[i];
73
74 assert(decl->is_hole || decl->attr < stream->vue_read_count);
75
76 /*
77 * From the Ivy Bridge PRM, volume 2 part 1, page 205:
78 *
79 * "There is only enough internal storage for the 128-bit vertex
80 * header and 32 128-bit vertex attributes."
81 */
82 assert(decl->attr < 33);
83
84 assert(decl->component_base < 4 &&
85 decl->component_base + decl->component_count <= 4);
86 assert(decl->buffer < ILO_STATE_SOL_MAX_BUFFER_COUNT);
87 }
88
89 return true;
90 }
91
92 static bool
93 sol_validate_gen7(const struct ilo_dev *dev,
94 const struct ilo_state_sol_info *info)
95 {
96 uint8_t i;
97
98 ILO_DEV_ASSERT(dev, 7, 8);
99
100 /*
101 * From the Ivy Bridge PRM, volume 2 part 1, page 198:
102 *
103 * "This bit (Render Stream Select) is used even if SO Function Enable
104 * is DISABLED."
105 *
106 * From the Haswell PRM, volume 2b, page 796:
107 *
108 * "SO Function Enable must also be ENABLED in order for thiis field
109 * (Render Stream Select) to select a stream for rendering. When SO
110 * Function Enable is DISABLED and Rendering Disable is cleared (i.e.,
111 * rendering is enabled), StreamID is ignored downstream of the SO
112 * stage, allowing any stream to be rendered."
113 *
114 * We want Gen7 behavior, but we have to require users to follow Gen7.5
115 * behavior: info->sol_enable must be set for info->render_stream to work.
116 */
117
118 for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
119 if (!sol_stream_validate_gen7(dev, &info->streams[i]))
120 return false;
121 }
122
123 /*
124 * From the Ivy Bridge PRM, volume 2 part 1, page 208:
125 *
126 * "(Surface Pitch)
127 * [0,2048] Must be 0 or a multiple of 4 Bytes."
128 */
129 for (i = 0; i < ARRAY_SIZE(info->buffer_strides); i++) {
130 assert(info->buffer_strides[i] <= 2048 &&
131 info->buffer_strides[i] % 4 == 0);
132 }
133
134 return true;
135 }
136
137 static bool
138 sol_set_gen7_3DSTATE_STREAMOUT(struct ilo_state_sol *sol,
139 const struct ilo_dev *dev,
140 const struct ilo_state_sol_info *info)
141 {
142 struct {
143 uint8_t offset;
144 uint8_t len;
145 } vue_read[ILO_STATE_SOL_MAX_STREAM_COUNT];
146 uint8_t i;
147 uint32_t dw1, dw2;
148
149 ILO_DEV_ASSERT(dev, 7, 8);
150
151 if (!sol_validate_gen7(dev, info))
152 return false;
153
154 for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
155 const struct ilo_state_sol_stream_info *stream = &info->streams[i];
156
157 vue_read[i].offset = stream->vue_read_base / 2;
158 /*
159 * In pairs minus 1. URB entries are aligned to 512-bits. There is no
160 * need to worry about reading past entries.
161 */
162 vue_read[i].len = (stream->vue_read_count + 1) / 2;
163 if (vue_read[i].len)
164 vue_read[i].len--;
165 }
166
167 dw1 = info->render_stream << GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT |
168 info->tristrip_reorder << GEN7_SO_DW1_REORDER_MODE__SHIFT;
169
170 if (info->sol_enable)
171 dw1 |= GEN7_SO_DW1_SO_ENABLE;
172
173 if (info->render_disable)
174 dw1 |= GEN7_SO_DW1_RENDER_DISABLE;
175
176 if (info->stats_enable)
177 dw1 |= GEN7_SO_DW1_STATISTICS;
178
179 if (ilo_dev_gen(dev) < ILO_GEN(8)) {
180 const uint8_t buffer_enables = ((bool) info->buffer_strides[3]) << 3 |
181 ((bool) info->buffer_strides[2]) << 2 |
182 ((bool) info->buffer_strides[1]) << 1 |
183 ((bool) info->buffer_strides[0]);
184 dw1 |= buffer_enables << GEN7_SO_DW1_BUFFER_ENABLES__SHIFT;
185 }
186
187 dw2 = vue_read[3].offset << GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT |
188 vue_read[3].len << GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT |
189 vue_read[2].offset << GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT |
190 vue_read[2].len << GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT |
191 vue_read[1].offset << GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT |
192 vue_read[1].len << GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT |
193 vue_read[0].offset << GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT |
194 vue_read[0].len << GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT;
195
196 STATIC_ASSERT(ARRAY_SIZE(sol->streamout) >= 2);
197 sol->streamout[0] = dw1;
198 sol->streamout[1] = dw2;
199
200 memcpy(sol->strides, info->buffer_strides, sizeof(sol->strides));
201
202 return true;
203 }
204
205 static bool
206 sol_set_gen7_3DSTATE_SO_DECL_LIST(struct ilo_state_sol *sol,
207 const struct ilo_dev *dev,
208 const struct ilo_state_sol_info *info,
209 uint8_t max_decl_count)
210 {
211 uint64_t decl_list[ILO_STATE_SOL_MAX_DECL_COUNT];
212 uint8_t decl_counts[ILO_STATE_SOL_MAX_STREAM_COUNT];
213 uint8_t buffer_selects[ILO_STATE_SOL_MAX_STREAM_COUNT];
214 uint32_t dw1, dw2;
215 uint8_t i, j;
216
217 ILO_DEV_ASSERT(dev, 7, 8);
218
219 memset(decl_list, 0, sizeof(decl_list[0]) * max_decl_count);
220
221 for (i = 0; i < ARRAY_SIZE(info->streams); i++) {
222 const struct ilo_state_sol_stream_info *stream = &info->streams[i];
223
224 assert(stream->decl_count <= max_decl_count);
225 decl_counts[i] = stream->decl_count;
226 buffer_selects[i] = 0;
227
228 for (j = 0; j < stream->decl_count; j++) {
229 const struct ilo_state_sol_decl_info *decl = &stream->decls[j];
230 const uint8_t mask = ((1 << decl->component_count) - 1) <<
231 decl->component_base;
232 uint16_t val;
233
234 val = decl->buffer << GEN7_SO_DECL_OUTPUT_SLOT__SHIFT |
235 mask << GEN7_SO_DECL_COMPONENT_MASK__SHIFT;
236
237 if (decl->is_hole)
238 val |= GEN7_SO_DECL_HOLE_FLAG;
239 else
240 val |= decl->attr << GEN7_SO_DECL_REG_INDEX__SHIFT;
241
242 decl_list[j] |= (uint64_t) val << (16 * i);
243 buffer_selects[i] |= 1 << decl->buffer;
244 }
245 }
246
247 dw1 = buffer_selects[3] << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT |
248 buffer_selects[2] << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT |
249 buffer_selects[1] << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT |
250 buffer_selects[0] << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT;
251 dw2 = decl_counts[3] << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT |
252 decl_counts[2] << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT |
253 decl_counts[1] << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT |
254 decl_counts[0] << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT;
255
256 STATIC_ASSERT(ARRAY_SIZE(sol->so_decl) >= 2);
257 sol->so_decl[0] = dw1;
258 sol->so_decl[1] = dw2;
259
260 STATIC_ASSERT(ARRAY_SIZE(sol->decl[0]) == 2);
261 memcpy(sol->decl, decl_list, sizeof(sol->decl[0]) * max_decl_count);
262 sol->decl_count = max_decl_count;
263
264 return true;
265 }
266
267 static bool
268 sol_buffer_validate_gen7(const struct ilo_dev *dev,
269 const struct ilo_state_sol_buffer_info *info)
270 {
271 ILO_DEV_ASSERT(dev, 7, 8);
272
273 /*
274 * From the Ivy Bridge PRM, volume 2 part 1, page 208:
275 *
276 * "(Surface Base Address) This field specifies the starting DWord
277 * address..."
278 */
279 assert(info->offset % 4 == 0);
280
281 if (info->vma) {
282 assert(info->vma->vm_alignment % 4 == 0);
283 assert(info->size && info->offset + info->size <= info->vma->vm_size);
284 }
285
286 /* Gen8+ only */
287 if (info->write_offset_load || info->write_offset_save) {
288 assert(ilo_dev_gen(dev) >= ILO_GEN(8) && info->write_offset_vma);
289 assert(info->write_offset_offset + sizeof(uint32_t) <=
290 info->write_offset_vma->vm_size);
291 }
292
293 /*
294 * From the Broadwell PRM, volume 2b, page 206:
295 *
296 * "This field (Stream Offset) specifies the Offset in stream output
297 * buffer to start at, or whether to append to the end of an existing
298 * buffer. The Offset must be DWORD aligned."
299 */
300 if (info->write_offset_imm_enable) {
301 assert(info->write_offset_load);
302 assert(info->write_offset_imm % 4 == 0);
303 }
304
305 return true;
306 }
307
308 static uint32_t
309 sol_buffer_get_gen6_size(const struct ilo_dev *dev,
310 const struct ilo_state_sol_buffer_info *info)
311 {
312 ILO_DEV_ASSERT(dev, 6, 8);
313
314 /*
315 * From the Ivy Bridge PRM, volume 2 part 1, page 208:
316 *
317 * "(Surface End Address) This field specifies the ending DWord
318 * address..."
319 */
320 return (info->vma) ? info->size & ~3 : 0;
321 }
322
323 static bool
324 sol_buffer_set_gen7_3dstate_so_buffer(struct ilo_state_sol_buffer *sb,
325 const struct ilo_dev *dev,
326 const struct ilo_state_sol_buffer_info *info)
327 {
328 const uint32_t size = sol_buffer_get_gen6_size(dev, info);
329
330 ILO_DEV_ASSERT(dev, 7, 7.5);
331
332 if (!sol_buffer_validate_gen7(dev, info))
333 return false;
334
335 STATIC_ASSERT(ARRAY_SIZE(sb->so_buf) >= 2);
336 sb->so_buf[0] = info->offset;
337 sb->so_buf[1] = (size) ? info->offset + size : 0;
338
339 return true;
340 }
341
342 static bool
343 sol_buffer_set_gen8_3dstate_so_buffer(struct ilo_state_sol_buffer *sb,
344 const struct ilo_dev *dev,
345 const struct ilo_state_sol_buffer_info *info)
346 {
347 const uint32_t size = sol_buffer_get_gen6_size(dev, info);
348 uint32_t dw1;
349
350 ILO_DEV_ASSERT(dev, 8, 8);
351
352 if (!sol_buffer_validate_gen7(dev, info))
353 return false;
354
355 dw1 = 0;
356
357 if (info->vma)
358 dw1 |= GEN8_SO_BUF_DW1_ENABLE;
359 if (info->write_offset_load)
360 dw1 |= GEN8_SO_BUF_DW1_OFFSET_WRITE_ENABLE;
361 if (info->write_offset_save)
362 dw1 |= GEN8_SO_BUF_DW1_OFFSET_ENABLE;
363
364 STATIC_ASSERT(ARRAY_SIZE(sb->so_buf) >= 4);
365 sb->so_buf[0] = dw1;
366 sb->so_buf[1] = info->offset;
367
368 /*
369 * From the Broadwell PRM, volume 2b, page 205:
370 *
371 * "This field (Surface Size) specifies the size of buffer in number
372 * DWords minus 1 of the buffer in Graphics Memory."
373 */
374 sb->so_buf[2] = (size) ? size / 4 - 1 : 0;
375
376 /* load from imm or sb->write_offset_bo */
377 sb->so_buf[3] = (info->write_offset_imm_enable) ?
378 info->write_offset_imm : ~0u;
379
380 return true;
381 }
382
383 bool
384 ilo_state_sol_init(struct ilo_state_sol *sol,
385 const struct ilo_dev *dev,
386 const struct ilo_state_sol_info *info)
387 {
388 bool ret = true;
389
390 assert(ilo_is_zeroed(sol, sizeof(*sol)));
391 assert(ilo_is_zeroed(info->data, info->data_size));
392
393 if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
394 uint8_t max_decl_count, i;
395
396 max_decl_count = info->streams[0].decl_count;
397 for (i = 1; i < ARRAY_SIZE(info->streams); i++) {
398 if (max_decl_count < info->streams[i].decl_count)
399 max_decl_count = info->streams[i].decl_count;
400 }
401
402 assert(ilo_state_sol_data_size(dev, max_decl_count) <= info->data_size);
403 sol->decl = (uint32_t (*)[2]) info->data;
404
405 ret &= sol_set_gen7_3DSTATE_STREAMOUT(sol, dev, info);
406 ret &= sol_set_gen7_3DSTATE_SO_DECL_LIST(sol, dev, info, max_decl_count);
407 }
408
409 assert(ret);
410
411 return ret;
412 }
413
414 bool
415 ilo_state_sol_init_disabled(struct ilo_state_sol *sol,
416 const struct ilo_dev *dev,
417 bool render_disable)
418 {
419 struct ilo_state_sol_info info;
420
421 memset(&info, 0, sizeof(info));
422 info.render_disable = render_disable;
423
424 return ilo_state_sol_init(sol, dev, &info);
425 }
426
427 uint32_t
428 ilo_state_sol_buffer_size(const struct ilo_dev *dev, uint32_t size,
429 uint32_t *alignment)
430 {
431 /* DWord aligned without padding */
432 *alignment = 4;
433 return size;
434 }
435
436 bool
437 ilo_state_sol_buffer_init(struct ilo_state_sol_buffer *sb,
438 const struct ilo_dev *dev,
439 const struct ilo_state_sol_buffer_info *info)
440 {
441 bool ret = true;
442
443 assert(ilo_is_zeroed(sb, sizeof(*sb)));
444
445 if (ilo_dev_gen(dev) >= ILO_GEN(8))
446 ret &= sol_buffer_set_gen8_3dstate_so_buffer(sb, dev, info);
447 else
448 ret &= sol_buffer_set_gen7_3dstate_so_buffer(sb, dev, info);
449
450 sb->vma = info->vma;
451 sb->write_offset_vma = info->write_offset_vma;
452
453 assert(ret);
454
455 return ret;
456 }
457
458 bool
459 ilo_state_sol_buffer_init_disabled(struct ilo_state_sol_buffer *sb,
460 const struct ilo_dev *dev)
461 {
462 struct ilo_state_sol_buffer_info info;
463
464 memset(&info, 0, sizeof(info));
465
466 return ilo_state_sol_buffer_init(sb, dev, &info);
467 }