2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2015 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "ilo_debug.h"
30 #include "ilo_state_sol.h"
33 sol_stream_validate_gen7(const struct ilo_dev
*dev
,
34 const struct ilo_state_sol_stream_info
*stream
)
38 ILO_DEV_ASSERT(dev
, 7, 8);
40 assert(stream
->vue_read_base
+ stream
->vue_read_count
<=
41 stream
->cv_vue_attr_count
);
44 * From the Ivy Bridge PRM, volume 2 part 1, page 200:
46 * "(Stream 0 Vertex Read Offset)
47 * Format: U1 count of 256-bit units
49 * Specifies amount of data to skip over before reading back Stream 0
50 * vertex data. Must be zero if the GS is enabled and the Output
51 * Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B
54 * "(Stream 0 Vertex Read Length)
55 * Format: U5-1 count of 256-bit units
57 * Specifies amount of vertex data to read back for Stream 0 vertices,
58 * starting at the Stream 0 Vertex Read Offset location. Maximum
59 * readback is 17 256-bit units (34 128-bit vertex attributes). Read
60 * data past the end of the valid vertex data has undefined contents,
61 * and therefore shouldn't be used to source stream out data. Must be
62 * zero (i.e., read length = 256b) if the GS is enabled and the Output
63 * Vertex Size field in 3DSTATE_GS is programmed to 0 (i.e., one 16B
66 assert(stream
->vue_read_base
== 0 || stream
->vue_read_base
== 2);
67 assert(stream
->vue_read_count
<= 34);
69 assert(stream
->decl_count
<= ILO_STATE_SOL_MAX_DECL_COUNT
);
71 for (i
= 0; i
< stream
->decl_count
; i
++) {
72 const struct ilo_state_sol_decl_info
*decl
= &stream
->decls
[i
];
74 assert(decl
->is_hole
|| decl
->attr
< stream
->vue_read_count
);
77 * From the Ivy Bridge PRM, volume 2 part 1, page 205:
79 * "There is only enough internal storage for the 128-bit vertex
80 * header and 32 128-bit vertex attributes."
82 assert(decl
->attr
< 33);
84 assert(decl
->component_base
< 4 &&
85 decl
->component_base
+ decl
->component_count
<= 4);
86 assert(decl
->buffer
< ILO_STATE_SOL_MAX_BUFFER_COUNT
);
93 sol_validate_gen7(const struct ilo_dev
*dev
,
94 const struct ilo_state_sol_info
*info
)
98 ILO_DEV_ASSERT(dev
, 7, 8);
101 * From the Ivy Bridge PRM, volume 2 part 1, page 198:
103 * "This bit (Render Stream Select) is used even if SO Function Enable
106 * From the Haswell PRM, volume 2b, page 796:
108 * "SO Function Enable must also be ENABLED in order for thiis field
109 * (Render Stream Select) to select a stream for rendering. When SO
110 * Function Enable is DISABLED and Rendering Disable is cleared (i.e.,
111 * rendering is enabled), StreamID is ignored downstream of the SO
112 * stage, allowing any stream to be rendered."
114 * We want Gen7 behavior, but we have to require users to follow Gen7.5
115 * behavior: info->sol_enable must be set for info->render_stream to work.
118 for (i
= 0; i
< ARRAY_SIZE(info
->streams
); i
++) {
119 if (!sol_stream_validate_gen7(dev
, &info
->streams
[i
]))
124 * From the Ivy Bridge PRM, volume 2 part 1, page 208:
127 * [0,2048] Must be 0 or a multiple of 4 Bytes."
129 for (i
= 0; i
< ARRAY_SIZE(info
->buffer_strides
); i
++) {
130 assert(info
->buffer_strides
[i
] <= 2048 &&
131 info
->buffer_strides
[i
] % 4 == 0);
138 sol_set_gen7_3DSTATE_STREAMOUT(struct ilo_state_sol
*sol
,
139 const struct ilo_dev
*dev
,
140 const struct ilo_state_sol_info
*info
)
145 } vue_read
[ILO_STATE_SOL_MAX_STREAM_COUNT
];
149 ILO_DEV_ASSERT(dev
, 7, 8);
151 if (!sol_validate_gen7(dev
, info
))
154 for (i
= 0; i
< ARRAY_SIZE(info
->streams
); i
++) {
155 const struct ilo_state_sol_stream_info
*stream
= &info
->streams
[i
];
157 vue_read
[i
].offset
= stream
->vue_read_base
/ 2;
159 * In pairs minus 1. URB entries are aligned to 512-bits. There is no
160 * need to worry about reading past entries.
162 vue_read
[i
].len
= (stream
->vue_read_count
+ 1) / 2;
167 dw1
= info
->render_stream
<< GEN7_SO_DW1_RENDER_STREAM_SELECT__SHIFT
|
168 info
->tristrip_reorder
<< GEN7_SO_DW1_REORDER_MODE__SHIFT
;
170 if (info
->sol_enable
)
171 dw1
|= GEN7_SO_DW1_SO_ENABLE
;
173 if (info
->render_disable
)
174 dw1
|= GEN7_SO_DW1_RENDER_DISABLE
;
176 if (info
->stats_enable
)
177 dw1
|= GEN7_SO_DW1_STATISTICS
;
179 if (ilo_dev_gen(dev
) < ILO_GEN(8)) {
180 const uint8_t buffer_enables
= ((bool) info
->buffer_strides
[3]) << 3 |
181 ((bool) info
->buffer_strides
[2]) << 2 |
182 ((bool) info
->buffer_strides
[1]) << 1 |
183 ((bool) info
->buffer_strides
[0]);
184 dw1
|= buffer_enables
<< GEN7_SO_DW1_BUFFER_ENABLES__SHIFT
;
187 dw2
= vue_read
[3].offset
<< GEN7_SO_DW2_STREAM3_READ_OFFSET__SHIFT
|
188 vue_read
[3].len
<< GEN7_SO_DW2_STREAM3_READ_LEN__SHIFT
|
189 vue_read
[2].offset
<< GEN7_SO_DW2_STREAM2_READ_OFFSET__SHIFT
|
190 vue_read
[2].len
<< GEN7_SO_DW2_STREAM2_READ_LEN__SHIFT
|
191 vue_read
[1].offset
<< GEN7_SO_DW2_STREAM1_READ_OFFSET__SHIFT
|
192 vue_read
[1].len
<< GEN7_SO_DW2_STREAM1_READ_LEN__SHIFT
|
193 vue_read
[0].offset
<< GEN7_SO_DW2_STREAM0_READ_OFFSET__SHIFT
|
194 vue_read
[0].len
<< GEN7_SO_DW2_STREAM0_READ_LEN__SHIFT
;
196 STATIC_ASSERT(ARRAY_SIZE(sol
->streamout
) >= 2);
197 sol
->streamout
[0] = dw1
;
198 sol
->streamout
[1] = dw2
;
200 memcpy(sol
->strides
, info
->buffer_strides
, sizeof(sol
->strides
));
206 sol_set_gen7_3DSTATE_SO_DECL_LIST(struct ilo_state_sol
*sol
,
207 const struct ilo_dev
*dev
,
208 const struct ilo_state_sol_info
*info
,
209 uint8_t max_decl_count
)
211 uint64_t decl_list
[ILO_STATE_SOL_MAX_DECL_COUNT
];
212 uint8_t decl_counts
[ILO_STATE_SOL_MAX_STREAM_COUNT
];
213 uint8_t buffer_selects
[ILO_STATE_SOL_MAX_STREAM_COUNT
];
217 ILO_DEV_ASSERT(dev
, 7, 8);
219 memset(decl_list
, 0, sizeof(decl_list
[0]) * max_decl_count
);
221 for (i
= 0; i
< ARRAY_SIZE(info
->streams
); i
++) {
222 const struct ilo_state_sol_stream_info
*stream
= &info
->streams
[i
];
224 assert(stream
->decl_count
<= max_decl_count
);
225 decl_counts
[i
] = stream
->decl_count
;
226 buffer_selects
[i
] = 0;
228 for (j
= 0; j
< stream
->decl_count
; j
++) {
229 const struct ilo_state_sol_decl_info
*decl
= &stream
->decls
[j
];
230 const uint8_t mask
= ((1 << decl
->component_count
) - 1) <<
231 decl
->component_base
;
234 val
= decl
->buffer
<< GEN7_SO_DECL_OUTPUT_SLOT__SHIFT
|
235 mask
<< GEN7_SO_DECL_COMPONENT_MASK__SHIFT
;
238 val
|= GEN7_SO_DECL_HOLE_FLAG
;
240 val
|= decl
->attr
<< GEN7_SO_DECL_REG_INDEX__SHIFT
;
242 decl_list
[j
] |= (uint64_t) val
<< (16 * i
);
243 buffer_selects
[i
] |= 1 << decl
->buffer
;
247 dw1
= buffer_selects
[3] << GEN7_SO_DECL_DW1_STREAM3_BUFFER_SELECTS__SHIFT
|
248 buffer_selects
[2] << GEN7_SO_DECL_DW1_STREAM2_BUFFER_SELECTS__SHIFT
|
249 buffer_selects
[1] << GEN7_SO_DECL_DW1_STREAM1_BUFFER_SELECTS__SHIFT
|
250 buffer_selects
[0] << GEN7_SO_DECL_DW1_STREAM0_BUFFER_SELECTS__SHIFT
;
251 dw2
= decl_counts
[3] << GEN7_SO_DECL_DW2_STREAM3_ENTRY_COUNT__SHIFT
|
252 decl_counts
[2] << GEN7_SO_DECL_DW2_STREAM2_ENTRY_COUNT__SHIFT
|
253 decl_counts
[1] << GEN7_SO_DECL_DW2_STREAM1_ENTRY_COUNT__SHIFT
|
254 decl_counts
[0] << GEN7_SO_DECL_DW2_STREAM0_ENTRY_COUNT__SHIFT
;
256 STATIC_ASSERT(ARRAY_SIZE(sol
->so_decl
) >= 2);
257 sol
->so_decl
[0] = dw1
;
258 sol
->so_decl
[1] = dw2
;
260 STATIC_ASSERT(ARRAY_SIZE(sol
->decl
[0]) == 2);
261 memcpy(sol
->decl
, decl_list
, sizeof(sol
->decl
[0]) * max_decl_count
);
262 sol
->decl_count
= max_decl_count
;
268 sol_buffer_validate_gen7(const struct ilo_dev
*dev
,
269 const struct ilo_state_sol_buffer_info
*info
)
271 ILO_DEV_ASSERT(dev
, 7, 8);
274 * From the Ivy Bridge PRM, volume 2 part 1, page 208:
276 * "(Surface Base Address) This field specifies the starting DWord
279 assert(info
->offset
% 4 == 0);
282 assert(info
->vma
->vm_alignment
% 4 == 0);
283 assert(info
->size
&& info
->offset
+ info
->size
<= info
->vma
->vm_size
);
287 if (info
->write_offset_load
|| info
->write_offset_save
) {
288 assert(ilo_dev_gen(dev
) >= ILO_GEN(8) && info
->write_offset_vma
);
289 assert(info
->write_offset_offset
+ sizeof(uint32_t) <=
290 info
->write_offset_vma
->vm_size
);
294 * From the Broadwell PRM, volume 2b, page 206:
296 * "This field (Stream Offset) specifies the Offset in stream output
297 * buffer to start at, or whether to append to the end of an existing
298 * buffer. The Offset must be DWORD aligned."
300 if (info
->write_offset_imm_enable
) {
301 assert(info
->write_offset_load
);
302 assert(info
->write_offset_imm
% 4 == 0);
309 sol_buffer_get_gen6_size(const struct ilo_dev
*dev
,
310 const struct ilo_state_sol_buffer_info
*info
)
312 ILO_DEV_ASSERT(dev
, 6, 8);
315 * From the Ivy Bridge PRM, volume 2 part 1, page 208:
317 * "(Surface End Address) This field specifies the ending DWord
320 return (info
->vma
) ? info
->size
& ~3 : 0;
324 sol_buffer_set_gen7_3dstate_so_buffer(struct ilo_state_sol_buffer
*sb
,
325 const struct ilo_dev
*dev
,
326 const struct ilo_state_sol_buffer_info
*info
)
328 const uint32_t size
= sol_buffer_get_gen6_size(dev
, info
);
330 ILO_DEV_ASSERT(dev
, 7, 7.5);
332 if (!sol_buffer_validate_gen7(dev
, info
))
335 STATIC_ASSERT(ARRAY_SIZE(sb
->so_buf
) >= 2);
336 sb
->so_buf
[0] = info
->offset
;
337 sb
->so_buf
[1] = (size
) ? info
->offset
+ size
: 0;
343 sol_buffer_set_gen8_3dstate_so_buffer(struct ilo_state_sol_buffer
*sb
,
344 const struct ilo_dev
*dev
,
345 const struct ilo_state_sol_buffer_info
*info
)
347 const uint32_t size
= sol_buffer_get_gen6_size(dev
, info
);
350 ILO_DEV_ASSERT(dev
, 8, 8);
352 if (!sol_buffer_validate_gen7(dev
, info
))
358 dw1
|= GEN8_SO_BUF_DW1_ENABLE
;
359 if (info
->write_offset_load
)
360 dw1
|= GEN8_SO_BUF_DW1_OFFSET_WRITE_ENABLE
;
361 if (info
->write_offset_save
)
362 dw1
|= GEN8_SO_BUF_DW1_OFFSET_ENABLE
;
364 STATIC_ASSERT(ARRAY_SIZE(sb
->so_buf
) >= 4);
366 sb
->so_buf
[1] = info
->offset
;
369 * From the Broadwell PRM, volume 2b, page 205:
371 * "This field (Surface Size) specifies the size of buffer in number
372 * DWords minus 1 of the buffer in Graphics Memory."
374 sb
->so_buf
[2] = (size
) ? size
/ 4 - 1 : 0;
376 /* load from imm or sb->write_offset_bo */
377 sb
->so_buf
[3] = (info
->write_offset_imm_enable
) ?
378 info
->write_offset_imm
: ~0u;
384 ilo_state_sol_init(struct ilo_state_sol
*sol
,
385 const struct ilo_dev
*dev
,
386 const struct ilo_state_sol_info
*info
)
390 assert(ilo_is_zeroed(sol
, sizeof(*sol
)));
391 assert(ilo_is_zeroed(info
->data
, info
->data_size
));
393 if (ilo_dev_gen(dev
) >= ILO_GEN(7)) {
394 uint8_t max_decl_count
, i
;
396 max_decl_count
= info
->streams
[0].decl_count
;
397 for (i
= 1; i
< ARRAY_SIZE(info
->streams
); i
++) {
398 if (max_decl_count
< info
->streams
[i
].decl_count
)
399 max_decl_count
= info
->streams
[i
].decl_count
;
402 assert(ilo_state_sol_data_size(dev
, max_decl_count
) <= info
->data_size
);
403 sol
->decl
= (uint32_t (*)[2]) info
->data
;
405 ret
&= sol_set_gen7_3DSTATE_STREAMOUT(sol
, dev
, info
);
406 ret
&= sol_set_gen7_3DSTATE_SO_DECL_LIST(sol
, dev
, info
, max_decl_count
);
415 ilo_state_sol_init_disabled(struct ilo_state_sol
*sol
,
416 const struct ilo_dev
*dev
,
419 struct ilo_state_sol_info info
;
421 memset(&info
, 0, sizeof(info
));
422 info
.render_disable
= render_disable
;
424 return ilo_state_sol_init(sol
, dev
, &info
);
428 ilo_state_sol_buffer_size(const struct ilo_dev
*dev
, uint32_t size
,
431 /* DWord aligned without padding */
437 ilo_state_sol_buffer_init(struct ilo_state_sol_buffer
*sb
,
438 const struct ilo_dev
*dev
,
439 const struct ilo_state_sol_buffer_info
*info
)
443 assert(ilo_is_zeroed(sb
, sizeof(*sb
)));
445 if (ilo_dev_gen(dev
) >= ILO_GEN(8))
446 ret
&= sol_buffer_set_gen8_3dstate_so_buffer(sb
, dev
, info
);
448 ret
&= sol_buffer_set_gen7_3dstate_so_buffer(sb
, dev
, info
);
451 sb
->write_offset_vma
= info
->write_offset_vma
;
459 ilo_state_sol_buffer_init_disabled(struct ilo_state_sol_buffer
*sb
,
460 const struct ilo_dev
*dev
)
462 struct ilo_state_sol_buffer_info info
;
464 memset(&info
, 0, sizeof(info
));
466 return ilo_state_sol_buffer_init(sb
, dev
, &info
);