2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
34 #include "r600_priv.h"
35 #include "radeon_drm.h"
37 #include "pipe/p_compiler.h"
38 #include "util/u_inlines.h"
39 #include <pipebuffer/pb_bufmgr.h>
41 #define GROUP_FORCE_NEW_BLOCK 0
43 struct pipe_reference reference
;
48 struct pipe_reference reference
;
55 struct radeon_bo
*radeon_bo_pb_get_bo(struct pb_buffer
*_buf
);
56 int radeon_bo_map(struct radeon
*radeon
, struct radeon_bo
*bo
);
57 void radeon_bo_unmap(struct radeon
*radeon
, struct radeon_bo
*bo
);
58 void radeon_bo_reference(struct radeon
*radeon
,
59 struct radeon_bo
**dst
,
60 struct radeon_bo
*src
);
62 unsigned radeon_ws_bo_get_handle(struct radeon_ws_bo
*pb_bo
);
65 static void r600_context_queries_suspend(struct r600_context
*ctx
);
66 static void r600_context_queries_resume(struct r600_context
*ctx
);
68 static int r600_group_id_register_offset(struct r600_context
*ctx
, unsigned offset
)
70 for (int i
= 0; i
< ctx
->ngroups
; i
++) {
71 if (offset
>= ctx
->groups
[i
].start_offset
&& offset
<= ctx
->groups
[i
].end_offset
) {
78 int r600_context_add_block(struct r600_context
*ctx
, const struct r600_reg
*reg
, unsigned nreg
)
80 struct r600_group_block
*block
, *tmp
;
81 struct r600_group
*group
;
84 for (unsigned i
= 0, n
= 0; i
< nreg
; i
+= n
) {
87 /* register that need relocation are in their own group */
89 if (!reg
[i
].need_bo
) {
90 /* find number of consecutive registers */
91 for (j
= i
+ 1, r
= reg
[i
].offset
+ 4, n
= 1; j
< (nreg
- i
); j
++, n
++, r
+=4) {
92 if (reg
[i
].need_bo
|| r
!= reg
[j
].offset
) {
98 /* ignore new block balise */
99 if (reg
[i
].offset
== GROUP_FORCE_NEW_BLOCK
)
102 /* find into which group this block is */
103 group_id
= r600_group_id_register_offset(ctx
, reg
[i
].offset
);
104 assert(group_id
>= 0);
105 group
= &ctx
->groups
[group_id
];
107 /* allocate new block */
108 tmp
= realloc(group
->blocks
, (group
->nblocks
+ 1) * sizeof(struct r600_group_block
));
113 block
= &group
->blocks
[group
->nblocks
++];
114 for (int j
= 0; j
< n
; j
++) {
115 group
->offset_block_id
[((reg
[i
].offset
- group
->start_offset
) >> 2) + j
] = group
->nblocks
- 1;
118 /* initialize block */
119 memset(block
, 0, sizeof(struct r600_group_block
));
120 block
->start_offset
= reg
[i
].offset
;
121 block
->pm4_ndwords
= n
;
123 for (j
= 0; j
< n
; j
++) {
124 if (reg
[i
+j
].need_bo
) {
126 assert(block
->nbo
< R600_BLOCK_MAX_BO
);
127 block
->pm4_bo_index
[j
] = block
->nbo
;
128 block
->pm4
[block
->pm4_ndwords
++] = PKT3(PKT3_NOP
, 0);
129 block
->pm4
[block
->pm4_ndwords
++] = 0x00000000;
130 block
->reloc
[block
->nbo
].bo_pm4_index
[block
->reloc
[block
->nbo
].nreloc
++] = block
->pm4_ndwords
- 1;
133 for (j
= 0; j
< n
; j
++) {
134 if (reg
[i
+j
].flush_flags
) {
135 block
->pm4
[block
->pm4_ndwords
++] = PKT3(PKT3_SURFACE_SYNC
, 3);
136 block
->pm4
[block
->pm4_ndwords
++] = reg
[i
+j
].flush_flags
;
137 block
->pm4
[block
->pm4_ndwords
++] = 0xFFFFFFFF;
138 block
->pm4
[block
->pm4_ndwords
++] = 0x00000000;
139 block
->pm4
[block
->pm4_ndwords
++] = 0x0000000A;
140 block
->pm4
[block
->pm4_ndwords
++] = PKT3(PKT3_NOP
, 0);
141 block
->pm4
[block
->pm4_ndwords
++] = 0x00000000;
142 id
= block
->pm4_bo_index
[j
];
143 block
->reloc
[id
].bo_pm4_index
[block
->reloc
[id
].nreloc
++] = block
->pm4_ndwords
- 1;
146 /* check that we stay in limit */
147 assert(block
->pm4_ndwords
< R600_BLOCK_MAX_REG
);
152 int r600_group_init(struct r600_group
*group
, unsigned start_offset
, unsigned end_offset
)
154 group
->start_offset
= start_offset
;
155 group
->end_offset
= end_offset
;
157 group
->blocks
= NULL
;
158 group
->offset_block_id
= calloc((end_offset
- start_offset
) >> 2, sizeof(unsigned));
159 if (group
->offset_block_id
== NULL
)
164 static void r600_group_fini(struct r600_group
*group
)
166 free(group
->offset_block_id
);
170 /* R600/R700 configuration */
171 static const struct r600_reg r600_reg_list
[] = {
172 {0, 0, R_008C00_SQ_CONFIG
},
173 {0, 0, R_008C04_SQ_GPR_RESOURCE_MGMT_1
},
174 {0, 0, R_008C08_SQ_GPR_RESOURCE_MGMT_2
},
175 {0, 0, R_008C0C_SQ_THREAD_RESOURCE_MGMT
},
176 {0, 0, R_008C10_SQ_STACK_RESOURCE_MGMT_1
},
177 {0, 0, R_008C14_SQ_STACK_RESOURCE_MGMT_2
},
178 {0, 0, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ
},
179 {0, 0, R_009508_TA_CNTL_AUX
},
180 {0, 0, R_009714_VC_ENHANCE
},
181 {0, 0, R_009830_DB_DEBUG
},
182 {0, 0, R_009838_DB_WATERMARKS
},
183 {0, 0, R_028350_SX_MISC
},
184 {0, 0, R_0286C8_SPI_THREAD_GROUPING
},
185 {0, 0, R_0288A8_SQ_ESGS_RING_ITEMSIZE
},
186 {0, 0, R_0288AC_SQ_GSVS_RING_ITEMSIZE
},
187 {0, 0, R_0288B0_SQ_ESTMP_RING_ITEMSIZE
},
188 {0, 0, R_0288B4_SQ_GSTMP_RING_ITEMSIZE
},
189 {0, 0, R_0288B8_SQ_VSTMP_RING_ITEMSIZE
},
190 {0, 0, R_0288BC_SQ_PSTMP_RING_ITEMSIZE
},
191 {0, 0, R_0288C0_SQ_FBUF_RING_ITEMSIZE
},
192 {0, 0, R_0288C4_SQ_REDUC_RING_ITEMSIZE
},
193 {0, 0, R_0288C8_SQ_GS_VERT_ITEMSIZE
},
194 {0, 0, R_028A10_VGT_OUTPUT_PATH_CNTL
},
195 {0, 0, R_028A14_VGT_HOS_CNTL
},
196 {0, 0, R_028A18_VGT_HOS_MAX_TESS_LEVEL
},
197 {0, 0, R_028A1C_VGT_HOS_MIN_TESS_LEVEL
},
198 {0, 0, R_028A20_VGT_HOS_REUSE_DEPTH
},
199 {0, 0, R_028A24_VGT_GROUP_PRIM_TYPE
},
200 {0, 0, R_028A28_VGT_GROUP_FIRST_DECR
},
201 {0, 0, R_028A2C_VGT_GROUP_DECR
},
202 {0, 0, R_028A30_VGT_GROUP_VECT_0_CNTL
},
203 {0, 0, R_028A34_VGT_GROUP_VECT_1_CNTL
},
204 {0, 0, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL
},
205 {0, 0, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL
},
206 {0, 0, R_028A40_VGT_GS_MODE
},
207 {0, 0, R_028A4C_PA_SC_MODE_CNTL
},
208 {0, 0, R_028AB0_VGT_STRMOUT_EN
},
209 {0, 0, R_028AB4_VGT_REUSE_OFF
},
210 {0, 0, R_028AB8_VGT_VTX_CNT_EN
},
211 {0, 0, R_028B20_VGT_STRMOUT_BUFFER_EN
},
212 {0, 0, R_028028_DB_STENCIL_CLEAR
},
213 {0, 0, R_02802C_DB_DEPTH_CLEAR
},
214 {1, 0, R_028040_CB_COLOR0_BASE
},
215 {0, 0, R_0280A0_CB_COLOR0_INFO
},
216 {0, 0, R_028060_CB_COLOR0_SIZE
},
217 {0, 0, R_028080_CB_COLOR0_VIEW
},
218 {1, 0, R_0280E0_CB_COLOR0_FRAG
},
219 {1, 0, R_0280C0_CB_COLOR0_TILE
},
220 {0, 0, R_028100_CB_COLOR0_MASK
},
221 {1, 0, R_028044_CB_COLOR1_BASE
},
222 {0, 0, R_0280A4_CB_COLOR1_INFO
},
223 {0, 0, R_028064_CB_COLOR1_SIZE
},
224 {0, 0, R_028084_CB_COLOR1_VIEW
},
225 {1, 0, R_0280E4_CB_COLOR1_FRAG
},
226 {1, 0, R_0280C4_CB_COLOR1_TILE
},
227 {0, 0, R_028104_CB_COLOR1_MASK
},
228 {1, 0, R_028048_CB_COLOR2_BASE
},
229 {0, 0, R_0280A8_CB_COLOR2_INFO
},
230 {0, 0, R_028068_CB_COLOR2_SIZE
},
231 {0, 0, R_028088_CB_COLOR2_VIEW
},
232 {1, 0, R_0280E8_CB_COLOR2_FRAG
},
233 {1, 0, R_0280C8_CB_COLOR2_TILE
},
234 {0, 0, R_028108_CB_COLOR2_MASK
},
235 {1, 0, R_02804C_CB_COLOR3_BASE
},
236 {0, 0, R_0280AC_CB_COLOR3_INFO
},
237 {0, 0, R_02806C_CB_COLOR3_SIZE
},
238 {0, 0, R_02808C_CB_COLOR3_VIEW
},
239 {1, 0, R_0280EC_CB_COLOR3_FRAG
},
240 {1, 0, R_0280CC_CB_COLOR3_TILE
},
241 {0, 0, R_02810C_CB_COLOR3_MASK
},
242 {1, 0, R_028050_CB_COLOR4_BASE
},
243 {0, 0, R_0280B0_CB_COLOR4_INFO
},
244 {0, 0, R_028070_CB_COLOR4_SIZE
},
245 {0, 0, R_028090_CB_COLOR4_VIEW
},
246 {1, 0, R_0280F0_CB_COLOR4_FRAG
},
247 {1, 0, R_0280D0_CB_COLOR4_TILE
},
248 {0, 0, R_028110_CB_COLOR4_MASK
},
249 {1, 0, R_028054_CB_COLOR5_BASE
},
250 {0, 0, R_0280B4_CB_COLOR5_INFO
},
251 {0, 0, R_028074_CB_COLOR5_SIZE
},
252 {0, 0, R_028094_CB_COLOR5_VIEW
},
253 {1, 0, R_0280F4_CB_COLOR5_FRAG
},
254 {1, 0, R_0280D4_CB_COLOR5_TILE
},
255 {0, 0, R_028114_CB_COLOR5_MASK
},
256 {1, 0, R_028058_CB_COLOR6_BASE
},
257 {0, 0, R_0280B8_CB_COLOR6_INFO
},
258 {0, 0, R_028078_CB_COLOR6_SIZE
},
259 {0, 0, R_028098_CB_COLOR6_VIEW
},
260 {1, 0, R_0280F8_CB_COLOR6_FRAG
},
261 {1, 0, R_0280D8_CB_COLOR6_TILE
},
262 {0, 0, R_028118_CB_COLOR6_MASK
},
263 {1, 0, R_02805C_CB_COLOR7_BASE
},
264 {0, 0, R_0280BC_CB_COLOR7_INFO
},
265 {0, 0, R_02807C_CB_COLOR7_SIZE
},
266 {0, 0, R_02809C_CB_COLOR7_VIEW
},
267 {1, 0, R_0280FC_CB_COLOR7_FRAG
},
268 {1, 0, R_0280DC_CB_COLOR7_TILE
},
269 {0, 0, R_02811C_CB_COLOR7_MASK
},
270 {0, 0, R_028120_CB_CLEAR_RED
},
271 {0, 0, R_028124_CB_CLEAR_GREEN
},
272 {0, 0, R_028128_CB_CLEAR_BLUE
},
273 {0, 0, R_02812C_CB_CLEAR_ALPHA
},
274 {0, 0, R_02823C_CB_SHADER_MASK
},
275 {0, 0, R_028238_CB_TARGET_MASK
},
276 {0, 0, R_028410_SX_ALPHA_TEST_CONTROL
},
277 {0, 0, R_028414_CB_BLEND_RED
},
278 {0, 0, R_028418_CB_BLEND_GREEN
},
279 {0, 0, R_02841C_CB_BLEND_BLUE
},
280 {0, 0, R_028420_CB_BLEND_ALPHA
},
281 {0, 0, R_028424_CB_FOG_RED
},
282 {0, 0, R_028428_CB_FOG_GREEN
},
283 {0, 0, R_02842C_CB_FOG_BLUE
},
284 {0, 0, R_028430_DB_STENCILREFMASK
},
285 {0, 0, R_028434_DB_STENCILREFMASK_BF
},
286 {0, 0, R_028438_SX_ALPHA_REF
},
287 {0, 0, R_0286DC_SPI_FOG_CNTL
},
288 {0, 0, R_0286E0_SPI_FOG_FUNC_SCALE
},
289 {0, 0, R_0286E4_SPI_FOG_FUNC_BIAS
},
290 {0, 0, R_028780_CB_BLEND0_CONTROL
},
291 {0, 0, R_028784_CB_BLEND1_CONTROL
},
292 {0, 0, R_028788_CB_BLEND2_CONTROL
},
293 {0, 0, R_02878C_CB_BLEND3_CONTROL
},
294 {0, 0, R_028790_CB_BLEND4_CONTROL
},
295 {0, 0, R_028794_CB_BLEND5_CONTROL
},
296 {0, 0, R_028798_CB_BLEND6_CONTROL
},
297 {0, 0, R_02879C_CB_BLEND7_CONTROL
},
298 {0, 0, R_0287A0_CB_SHADER_CONTROL
},
299 {0, 0, R_028800_DB_DEPTH_CONTROL
},
300 {0, 0, R_028804_CB_BLEND_CONTROL
},
301 {0, 0, R_028808_CB_COLOR_CONTROL
},
302 {0, 0, R_02880C_DB_SHADER_CONTROL
},
303 {0, 0, R_028C04_PA_SC_AA_CONFIG
},
304 {0, 0, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX
},
305 {0, 0, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX
},
306 {0, 0, R_028C30_CB_CLRCMP_CONTROL
},
307 {0, 0, R_028C34_CB_CLRCMP_SRC
},
308 {0, 0, R_028C38_CB_CLRCMP_DST
},
309 {0, 0, R_028C3C_CB_CLRCMP_MSK
},
310 {0, 0, R_028C48_PA_SC_AA_MASK
},
311 {0, 0, R_028D2C_DB_SRESULTS_COMPARE_STATE1
},
312 {0, 0, R_028D44_DB_ALPHA_TO_MASK
},
313 {1, 0, R_02800C_DB_DEPTH_BASE
},
314 {0, 0, R_028000_DB_DEPTH_SIZE
},
315 {0, 0, R_028004_DB_DEPTH_VIEW
},
316 {0, 0, R_028010_DB_DEPTH_INFO
},
317 {0, 0, R_028D0C_DB_RENDER_CONTROL
},
318 {0, 0, R_028D10_DB_RENDER_OVERRIDE
},
319 {0, 0, R_028D24_DB_HTILE_SURFACE
},
320 {0, 0, R_028D30_DB_PRELOAD_CONTROL
},
321 {0, 0, R_028D34_DB_PREFETCH_LIMIT
},
322 {0, 0, R_028030_PA_SC_SCREEN_SCISSOR_TL
},
323 {0, 0, R_028034_PA_SC_SCREEN_SCISSOR_BR
},
324 {0, 0, R_028200_PA_SC_WINDOW_OFFSET
},
325 {0, 0, R_028204_PA_SC_WINDOW_SCISSOR_TL
},
326 {0, 0, R_028208_PA_SC_WINDOW_SCISSOR_BR
},
327 {0, 0, R_02820C_PA_SC_CLIPRECT_RULE
},
328 {0, 0, R_028210_PA_SC_CLIPRECT_0_TL
},
329 {0, 0, R_028214_PA_SC_CLIPRECT_0_BR
},
330 {0, 0, R_028218_PA_SC_CLIPRECT_1_TL
},
331 {0, 0, R_02821C_PA_SC_CLIPRECT_1_BR
},
332 {0, 0, R_028220_PA_SC_CLIPRECT_2_TL
},
333 {0, 0, R_028224_PA_SC_CLIPRECT_2_BR
},
334 {0, 0, R_028228_PA_SC_CLIPRECT_3_TL
},
335 {0, 0, R_02822C_PA_SC_CLIPRECT_3_BR
},
336 {0, 0, R_028230_PA_SC_EDGERULE
},
337 {0, 0, R_028240_PA_SC_GENERIC_SCISSOR_TL
},
338 {0, 0, R_028244_PA_SC_GENERIC_SCISSOR_BR
},
339 {0, 0, R_028250_PA_SC_VPORT_SCISSOR_0_TL
},
340 {0, 0, R_028254_PA_SC_VPORT_SCISSOR_0_BR
},
341 {0, 0, R_0282D0_PA_SC_VPORT_ZMIN_0
},
342 {0, 0, R_0282D4_PA_SC_VPORT_ZMAX_0
},
343 {0, 0, R_02843C_PA_CL_VPORT_XSCALE_0
},
344 {0, 0, R_028440_PA_CL_VPORT_XOFFSET_0
},
345 {0, 0, R_028444_PA_CL_VPORT_YSCALE_0
},
346 {0, 0, R_028448_PA_CL_VPORT_YOFFSET_0
},
347 {0, 0, R_02844C_PA_CL_VPORT_ZSCALE_0
},
348 {0, 0, R_028450_PA_CL_VPORT_ZOFFSET_0
},
349 {0, 0, R_0286D4_SPI_INTERP_CONTROL_0
},
350 {0, 0, R_028810_PA_CL_CLIP_CNTL
},
351 {0, 0, R_028814_PA_SU_SC_MODE_CNTL
},
352 {0, 0, R_028818_PA_CL_VTE_CNTL
},
353 {0, 0, R_02881C_PA_CL_VS_OUT_CNTL
},
354 {0, 0, R_028820_PA_CL_NANINF_CNTL
},
355 {0, 0, R_028A00_PA_SU_POINT_SIZE
},
356 {0, 0, R_028A04_PA_SU_POINT_MINMAX
},
357 {0, 0, R_028A08_PA_SU_LINE_CNTL
},
358 {0, 0, R_028A0C_PA_SC_LINE_STIPPLE
},
359 {0, 0, R_028A48_PA_SC_MPASS_PS_CNTL
},
360 {0, 0, R_028C00_PA_SC_LINE_CNTL
},
361 {0, 0, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ
},
362 {0, 0, R_028C10_PA_CL_GB_VERT_DISC_ADJ
},
363 {0, 0, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ
},
364 {0, 0, R_028C18_PA_CL_GB_HORZ_DISC_ADJ
},
365 {0, 0, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL
},
366 {0, 0, R_028DFC_PA_SU_POLY_OFFSET_CLAMP
},
367 {0, 0, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE
},
368 {0, 0, R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET
},
369 {0, 0, R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE
},
370 {0, 0, R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET
},
371 {0, 0, R_028E20_PA_CL_UCP0_X
},
372 {0, 0, R_028E24_PA_CL_UCP0_Y
},
373 {0, 0, R_028E28_PA_CL_UCP0_Z
},
374 {0, 0, R_028E2C_PA_CL_UCP0_W
},
375 {0, 0, R_028E30_PA_CL_UCP1_X
},
376 {0, 0, R_028E34_PA_CL_UCP1_Y
},
377 {0, 0, R_028E38_PA_CL_UCP1_Z
},
378 {0, 0, R_028E3C_PA_CL_UCP1_W
},
379 {0, 0, R_028E40_PA_CL_UCP2_X
},
380 {0, 0, R_028E44_PA_CL_UCP2_Y
},
381 {0, 0, R_028E48_PA_CL_UCP2_Z
},
382 {0, 0, R_028E4C_PA_CL_UCP2_W
},
383 {0, 0, R_028E50_PA_CL_UCP3_X
},
384 {0, 0, R_028E54_PA_CL_UCP3_Y
},
385 {0, 0, R_028E58_PA_CL_UCP3_Z
},
386 {0, 0, R_028E5C_PA_CL_UCP3_W
},
387 {0, 0, R_028E60_PA_CL_UCP4_X
},
388 {0, 0, R_028E64_PA_CL_UCP4_Y
},
389 {0, 0, R_028E68_PA_CL_UCP4_Z
},
390 {0, 0, R_028E6C_PA_CL_UCP4_W
},
391 {0, 0, R_028E70_PA_CL_UCP5_X
},
392 {0, 0, R_028E74_PA_CL_UCP5_Y
},
393 {0, 0, R_028E78_PA_CL_UCP5_Z
},
394 {0, 0, R_028E7C_PA_CL_UCP5_W
},
395 {0, 0, R_028380_SQ_VTX_SEMANTIC_0
},
396 {0, 0, R_028384_SQ_VTX_SEMANTIC_1
},
397 {0, 0, R_028388_SQ_VTX_SEMANTIC_2
},
398 {0, 0, R_02838C_SQ_VTX_SEMANTIC_3
},
399 {0, 0, R_028390_SQ_VTX_SEMANTIC_4
},
400 {0, 0, R_028394_SQ_VTX_SEMANTIC_5
},
401 {0, 0, R_028398_SQ_VTX_SEMANTIC_6
},
402 {0, 0, R_02839C_SQ_VTX_SEMANTIC_7
},
403 {0, 0, R_0283A0_SQ_VTX_SEMANTIC_8
},
404 {0, 0, R_0283A4_SQ_VTX_SEMANTIC_9
},
405 {0, 0, R_0283A8_SQ_VTX_SEMANTIC_10
},
406 {0, 0, R_0283AC_SQ_VTX_SEMANTIC_11
},
407 {0, 0, R_0283B0_SQ_VTX_SEMANTIC_12
},
408 {0, 0, R_0283B4_SQ_VTX_SEMANTIC_13
},
409 {0, 0, R_0283B8_SQ_VTX_SEMANTIC_14
},
410 {0, 0, R_0283BC_SQ_VTX_SEMANTIC_15
},
411 {0, 0, R_0283C0_SQ_VTX_SEMANTIC_16
},
412 {0, 0, R_0283C4_SQ_VTX_SEMANTIC_17
},
413 {0, 0, R_0283C8_SQ_VTX_SEMANTIC_18
},
414 {0, 0, R_0283CC_SQ_VTX_SEMANTIC_19
},
415 {0, 0, R_0283D0_SQ_VTX_SEMANTIC_20
},
416 {0, 0, R_0283D4_SQ_VTX_SEMANTIC_21
},
417 {0, 0, R_0283D8_SQ_VTX_SEMANTIC_22
},
418 {0, 0, R_0283DC_SQ_VTX_SEMANTIC_23
},
419 {0, 0, R_0283E0_SQ_VTX_SEMANTIC_24
},
420 {0, 0, R_0283E4_SQ_VTX_SEMANTIC_25
},
421 {0, 0, R_0283E8_SQ_VTX_SEMANTIC_26
},
422 {0, 0, R_0283EC_SQ_VTX_SEMANTIC_27
},
423 {0, 0, R_0283F0_SQ_VTX_SEMANTIC_28
},
424 {0, 0, R_0283F4_SQ_VTX_SEMANTIC_29
},
425 {0, 0, R_0283F8_SQ_VTX_SEMANTIC_30
},
426 {0, 0, R_0283FC_SQ_VTX_SEMANTIC_31
},
427 {0, 0, R_028614_SPI_VS_OUT_ID_0
},
428 {0, 0, R_028618_SPI_VS_OUT_ID_1
},
429 {0, 0, R_02861C_SPI_VS_OUT_ID_2
},
430 {0, 0, R_028620_SPI_VS_OUT_ID_3
},
431 {0, 0, R_028624_SPI_VS_OUT_ID_4
},
432 {0, 0, R_028628_SPI_VS_OUT_ID_5
},
433 {0, 0, R_02862C_SPI_VS_OUT_ID_6
},
434 {0, 0, R_028630_SPI_VS_OUT_ID_7
},
435 {0, 0, R_028634_SPI_VS_OUT_ID_8
},
436 {0, 0, R_028638_SPI_VS_OUT_ID_9
},
437 {0, 0, R_0286C4_SPI_VS_OUT_CONFIG
},
438 {1, 0, R_028858_SQ_PGM_START_VS
},
439 {0, S_0085F0_SH_ACTION_ENA(1), R_028868_SQ_PGM_RESOURCES_VS
},
440 {1, 0, R_028894_SQ_PGM_START_FS
},
441 {0, S_0085F0_SH_ACTION_ENA(1), R_0288A4_SQ_PGM_RESOURCES_FS
},
442 {0, 0, R_0288D0_SQ_PGM_CF_OFFSET_VS
},
443 {0, 0, R_0288DC_SQ_PGM_CF_OFFSET_FS
},
444 {0, 0, R_028644_SPI_PS_INPUT_CNTL_0
},
445 {0, 0, R_028648_SPI_PS_INPUT_CNTL_1
},
446 {0, 0, R_02864C_SPI_PS_INPUT_CNTL_2
},
447 {0, 0, R_028650_SPI_PS_INPUT_CNTL_3
},
448 {0, 0, R_028654_SPI_PS_INPUT_CNTL_4
},
449 {0, 0, R_028658_SPI_PS_INPUT_CNTL_5
},
450 {0, 0, R_02865C_SPI_PS_INPUT_CNTL_6
},
451 {0, 0, R_028660_SPI_PS_INPUT_CNTL_7
},
452 {0, 0, R_028664_SPI_PS_INPUT_CNTL_8
},
453 {0, 0, R_028668_SPI_PS_INPUT_CNTL_9
},
454 {0, 0, R_02866C_SPI_PS_INPUT_CNTL_10
},
455 {0, 0, R_028670_SPI_PS_INPUT_CNTL_11
},
456 {0, 0, R_028674_SPI_PS_INPUT_CNTL_12
},
457 {0, 0, R_028678_SPI_PS_INPUT_CNTL_13
},
458 {0, 0, R_02867C_SPI_PS_INPUT_CNTL_14
},
459 {0, 0, R_028680_SPI_PS_INPUT_CNTL_15
},
460 {0, 0, R_028684_SPI_PS_INPUT_CNTL_16
},
461 {0, 0, R_028688_SPI_PS_INPUT_CNTL_17
},
462 {0, 0, R_02868C_SPI_PS_INPUT_CNTL_18
},
463 {0, 0, R_028690_SPI_PS_INPUT_CNTL_19
},
464 {0, 0, R_028694_SPI_PS_INPUT_CNTL_20
},
465 {0, 0, R_028698_SPI_PS_INPUT_CNTL_21
},
466 {0, 0, R_02869C_SPI_PS_INPUT_CNTL_22
},
467 {0, 0, R_0286A0_SPI_PS_INPUT_CNTL_23
},
468 {0, 0, R_0286A4_SPI_PS_INPUT_CNTL_24
},
469 {0, 0, R_0286A8_SPI_PS_INPUT_CNTL_25
},
470 {0, 0, R_0286AC_SPI_PS_INPUT_CNTL_26
},
471 {0, 0, R_0286B0_SPI_PS_INPUT_CNTL_27
},
472 {0, 0, R_0286B4_SPI_PS_INPUT_CNTL_28
},
473 {0, 0, R_0286B8_SPI_PS_INPUT_CNTL_29
},
474 {0, 0, R_0286BC_SPI_PS_INPUT_CNTL_30
},
475 {0, 0, R_0286C0_SPI_PS_INPUT_CNTL_31
},
476 {0, 0, R_0286CC_SPI_PS_IN_CONTROL_0
},
477 {0, 0, R_0286D0_SPI_PS_IN_CONTROL_1
},
478 {0, 0, R_0286D8_SPI_INPUT_Z
},
479 {1, S_0085F0_SH_ACTION_ENA(1), R_028840_SQ_PGM_START_PS
},
480 {0, 0, R_028850_SQ_PGM_RESOURCES_PS
},
481 {0, 0, R_028854_SQ_PGM_EXPORTS_PS
},
482 {0, 0, R_0288CC_SQ_PGM_CF_OFFSET_PS
},
483 {0, 0, R_008958_VGT_PRIMITIVE_TYPE
},
484 {0, 0, R_028400_VGT_MAX_VTX_INDX
},
485 {0, 0, R_028404_VGT_MIN_VTX_INDX
},
486 {0, 0, R_028408_VGT_INDX_OFFSET
},
487 {0, 0, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX
},
488 {0, 0, R_028A84_VGT_PRIMITIVEID_EN
},
489 {0, 0, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN
},
490 {0, 0, R_028AA0_VGT_INSTANCE_STEP_RATE_0
},
491 {0, 0, R_028AA4_VGT_INSTANCE_STEP_RATE_1
},
494 /* SHADER CONSTANT R600/R700 */
495 static int r600_state_constant_init(struct r600_context
*ctx
, u32 offset
)
497 struct r600_reg r600_shader_constant
[] = {
498 {0, 0, R_030000_SQ_ALU_CONSTANT0_0
},
499 {0, 0, R_030004_SQ_ALU_CONSTANT1_0
},
500 {0, 0, R_030008_SQ_ALU_CONSTANT2_0
},
501 {0, 0, R_03000C_SQ_ALU_CONSTANT3_0
},
503 unsigned nreg
= sizeof(r600_shader_constant
)/sizeof(struct r600_reg
);
505 for (int i
= 0; i
< nreg
; i
++) {
506 r600_shader_constant
[i
].offset
+= offset
;
508 return r600_context_add_block(ctx
, r600_shader_constant
, nreg
);
511 /* SHADER RESOURCE R600/R700 */
512 static int r600_state_resource_init(struct r600_context
*ctx
, u32 offset
)
514 struct r600_reg r600_shader_resource
[] = {
515 {0, 0, R_038000_RESOURCE0_WORD0
},
516 {0, 0, R_038004_RESOURCE0_WORD1
},
517 {1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), R_038008_RESOURCE0_WORD2
},
518 {1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), R_03800C_RESOURCE0_WORD3
},
519 {0, 0, R_038010_RESOURCE0_WORD4
},
520 {0, 0, R_038014_RESOURCE0_WORD5
},
521 {0, 0, R_038018_RESOURCE0_WORD6
},
523 unsigned nreg
= sizeof(r600_shader_resource
)/sizeof(struct r600_reg
);
525 for (int i
= 0; i
< nreg
; i
++) {
526 r600_shader_resource
[i
].offset
+= offset
;
528 return r600_context_add_block(ctx
, r600_shader_resource
, nreg
);
531 /* SHADER SAMPLER R600/R700 */
532 static int r600_state_sampler_init(struct r600_context
*ctx
, u32 offset
)
534 struct r600_reg r600_shader_sampler
[] = {
535 {0, 0, R_03C000_SQ_TEX_SAMPLER_WORD0_0
},
536 {0, 0, R_03C004_SQ_TEX_SAMPLER_WORD1_0
},
537 {0, 0, R_03C008_SQ_TEX_SAMPLER_WORD2_0
},
539 unsigned nreg
= sizeof(r600_shader_sampler
)/sizeof(struct r600_reg
);
541 for (int i
= 0; i
< nreg
; i
++) {
542 r600_shader_sampler
[i
].offset
+= offset
;
544 return r600_context_add_block(ctx
, r600_shader_sampler
, nreg
);
547 /* SHADER SAMPLER BORDER R600/R700 */
548 static int r600_state_sampler_border_init(struct r600_context
*ctx
, u32 offset
)
550 struct r600_reg r600_shader_sampler_border
[] = {
551 {0, 0, R_00A400_TD_PS_SAMPLER0_BORDER_RED
},
552 {0, 0, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN
},
553 {0, 0, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE
},
554 {0, 0, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA
},
556 unsigned nreg
= sizeof(r600_shader_sampler_border
)/sizeof(struct r600_reg
);
558 for (int i
= 0; i
< nreg
; i
++) {
559 r600_shader_sampler_border
[i
].offset
+= offset
;
561 return r600_context_add_block(ctx
, r600_shader_sampler_border
, nreg
);
565 void r600_context_fini(struct r600_context
*ctx
)
567 for (int i
= 0; i
< ctx
->ngroups
; i
++) {
568 r600_group_fini(&ctx
->groups
[i
]);
572 memset(ctx
, 0, sizeof(struct r600_context
));
575 int r600_context_init(struct r600_context
*ctx
, struct radeon
*radeon
)
579 memset(ctx
, 0, sizeof(struct r600_context
));
580 ctx
->radeon
= radeon
;
581 LIST_INITHEAD(&ctx
->query_list
);
582 /* initialize groups */
583 r
= r600_group_init(&ctx
->groups
[R600_GROUP_CONFIG
], R600_CONFIG_REG_OFFSET
, R600_CONFIG_REG_END
);
587 r
= r600_group_init(&ctx
->groups
[R600_GROUP_CTL_CONST
], R600_CTL_CONST_OFFSET
, R600_CTL_CONST_END
);
591 r
= r600_group_init(&ctx
->groups
[R600_GROUP_LOOP_CONST
], R600_LOOP_CONST_OFFSET
, R600_LOOP_CONST_END
);
595 r
= r600_group_init(&ctx
->groups
[R600_GROUP_BOOL_CONST
], R600_BOOL_CONST_OFFSET
, R600_BOOL_CONST_END
);
599 r
= r600_group_init(&ctx
->groups
[R600_GROUP_SAMPLER
], R600_SAMPLER_OFFSET
, R600_SAMPLER_END
);
603 r
= r600_group_init(&ctx
->groups
[R600_GROUP_RESOURCE
], R600_RESOURCE_OFFSET
, R600_RESOURCE_END
);
607 r
= r600_group_init(&ctx
->groups
[R600_GROUP_ALU_CONST
], R600_ALU_CONST_OFFSET
, R600_ALU_CONST_END
);
611 r
= r600_group_init(&ctx
->groups
[R600_GROUP_CONTEXT
], R600_CONTEXT_REG_OFFSET
, R600_CONTEXT_REG_END
);
615 ctx
->ngroups
= R600_NGROUPS
;
618 r
= r600_context_add_block(ctx
, r600_reg_list
, sizeof(r600_reg_list
)/sizeof(struct r600_reg
));
622 /* PS SAMPLER BORDER */
623 for (int j
= 0, offset
= 0; j
< 18; j
++, offset
+= 0x10) {
624 r
= r600_state_sampler_border_init(ctx
, offset
);
629 /* VS SAMPLER BORDER */
630 for (int j
= 0, offset
= 0x200; j
< 18; j
++, offset
+= 0x10) {
631 r
= r600_state_sampler_border_init(ctx
, offset
);
636 for (int j
= 0, offset
= 0; j
< 18; j
++, offset
+= 0xC) {
637 r
= r600_state_sampler_init(ctx
, offset
);
642 for (int j
= 0, offset
= 0xD8; j
< 18; j
++, offset
+= 0xC) {
643 r
= r600_state_sampler_init(ctx
, offset
);
648 for (int j
= 0, offset
= 0; j
< 160; j
++, offset
+= 0x1C) {
649 r
= r600_state_resource_init(ctx
, offset
);
654 for (int j
= 0, offset
= 0x1180; j
< 160; j
++, offset
+= 0x1C) {
655 r
= r600_state_resource_init(ctx
, offset
);
660 for (int j
= 0, offset
= 0; j
< 256; j
++, offset
+= 0x10) {
661 r
= r600_state_constant_init(ctx
, offset
);
666 for (int j
= 0, offset
= 0x1000; j
< 256; j
++, offset
+= 0x10) {
667 r
= r600_state_constant_init(ctx
, offset
);
672 /* allocate cs variables */
673 ctx
->nreloc
= RADEON_CTX_MAX_PM4
;
674 ctx
->reloc
= calloc(ctx
->nreloc
, sizeof(struct r600_reloc
));
675 if (ctx
->reloc
== NULL
) {
679 ctx
->bo
= calloc(ctx
->nreloc
, sizeof(void *));
680 if (ctx
->bo
== NULL
) {
684 ctx
->pm4_ndwords
= RADEON_CTX_MAX_PM4
;
685 ctx
->pm4
= calloc(ctx
->pm4_ndwords
, 4);
686 if (ctx
->pm4
== NULL
) {
692 r600_context_fini(ctx
);
696 void r600_context_bo_reloc(struct r600_context
*ctx
, u32
*pm4
, struct radeon_bo
*bo
)
701 for (i
= 0, reloc_id
= -1; i
< ctx
->creloc
; i
++) {
702 if (ctx
->reloc
[i
].handle
== bo
->handle
) {
703 reloc_id
= i
* sizeof(struct r600_reloc
) / 4;
704 /* set PKT3 to point to proper reloc */
708 if (reloc_id
== -1) {
709 /* add new relocation */
710 if (ctx
->creloc
>= ctx
->nreloc
) {
711 r600_context_flush(ctx
);
713 reloc_id
= ctx
->creloc
* sizeof(struct r600_reloc
) / 4;
714 ctx
->reloc
[ctx
->creloc
].handle
= bo
->handle
;
715 ctx
->reloc
[ctx
->creloc
].read_domain
= RADEON_GEM_DOMAIN_GTT
;
716 ctx
->reloc
[ctx
->creloc
].write_domain
= RADEON_GEM_DOMAIN_GTT
;
717 ctx
->reloc
[ctx
->creloc
].flags
= 0;
718 radeon_bo_reference(ctx
->radeon
, &ctx
->bo
[ctx
->creloc
], bo
);
720 /* set PKT3 to point to proper reloc */
725 void r600_context_pipe_state_set(struct r600_context
*ctx
, struct r600_pipe_state
*state
)
727 struct r600_group
*group
;
728 struct r600_group_block
*block
;
730 for (int i
= 0; i
< state
->nregs
; i
++) {
732 group
= &ctx
->groups
[state
->regs
[i
].group_id
];
733 id
= group
->offset_block_id
[(state
->regs
[i
].offset
- group
->start_offset
) >> 2];
734 block
= &group
->blocks
[id
];
735 id
= (state
->regs
[i
].offset
- block
->start_offset
) >> 2;
736 block
->pm4
[id
] &= ~state
->regs
[i
].mask
;
737 block
->pm4
[id
] |= state
->regs
[i
].value
;
738 if (block
->pm4_bo_index
[id
]) {
739 /* find relocation */
740 id
= block
->pm4_bo_index
[id
];
741 radeon_ws_bo_reference(ctx
->radeon
, &block
->reloc
[id
].bo
, state
->regs
[i
].bo
);
743 block
->status
|= R600_BLOCK_STATUS_ENABLED
;
744 block
->status
|= R600_BLOCK_STATUS_DIRTY
;
745 ctx
->pm4_dirty_cdwords
+= 2 + block
->pm4_ndwords
;
749 static inline void r600_context_pipe_state_set_resource(struct r600_context
*ctx
, struct r600_pipe_state
*state
, unsigned offset
)
751 struct r600_group_block
*block
;
754 offset
-= ctx
->groups
[R600_GROUP_RESOURCE
].start_offset
;
755 id
= ctx
->groups
[R600_GROUP_RESOURCE
].offset_block_id
[offset
>> 2];
756 block
= &ctx
->groups
[R600_GROUP_RESOURCE
].blocks
[id
];
757 block
->pm4
[0] = state
->regs
[0].value
;
758 block
->pm4
[1] = state
->regs
[1].value
;
759 block
->pm4
[2] = state
->regs
[2].value
;
760 block
->pm4
[3] = state
->regs
[3].value
;
761 block
->pm4
[4] = state
->regs
[4].value
;
762 block
->pm4
[5] = state
->regs
[5].value
;
763 block
->pm4
[6] = state
->regs
[6].value
;
764 radeon_ws_bo_reference(ctx
->radeon
, &block
->reloc
[1].bo
, NULL
);
765 radeon_ws_bo_reference(ctx
->radeon
, &block
->reloc
[2].bo
, NULL
);
766 if (state
->regs
[0].bo
) {
767 /* VERTEX RESOURCE, we preted there is 2 bo to relocate so
768 * we have single case btw VERTEX & TEXTURE resource
770 radeon_ws_bo_reference(ctx
->radeon
, &block
->reloc
[1].bo
, state
->regs
[0].bo
);
771 radeon_ws_bo_reference(ctx
->radeon
, &block
->reloc
[2].bo
, state
->regs
[0].bo
);
773 /* TEXTURE RESOURCE */
774 radeon_ws_bo_reference(ctx
->radeon
, &block
->reloc
[1].bo
, state
->regs
[2].bo
);
775 radeon_ws_bo_reference(ctx
->radeon
, &block
->reloc
[2].bo
, state
->regs
[3].bo
);
777 block
->status
|= R600_BLOCK_STATUS_ENABLED
;
778 block
->status
|= R600_BLOCK_STATUS_DIRTY
;
779 ctx
->pm4_dirty_cdwords
+= 2 + block
->pm4_ndwords
;
782 void r600_context_pipe_state_set_ps_resource(struct r600_context
*ctx
, struct r600_pipe_state
*state
, unsigned rid
)
784 unsigned offset
= R_038000_SQ_TEX_RESOURCE_WORD0_0
+ 0x1C * rid
;
786 r600_context_pipe_state_set_resource(ctx
, state
, offset
);
789 void r600_context_pipe_state_set_vs_resource(struct r600_context
*ctx
, struct r600_pipe_state
*state
, unsigned rid
)
791 unsigned offset
= R_038000_SQ_TEX_RESOURCE_WORD0_0
+ 0x1180 + 0x1C * rid
;
793 r600_context_pipe_state_set_resource(ctx
, state
, offset
);
796 static inline void r600_context_pipe_state_set_sampler(struct r600_context
*ctx
, struct r600_pipe_state
*state
, unsigned offset
)
798 struct r600_group_block
*block
;
801 offset
-= ctx
->groups
[R600_GROUP_SAMPLER
].start_offset
;
802 id
= ctx
->groups
[R600_GROUP_SAMPLER
].offset_block_id
[offset
>> 2];
803 block
= &ctx
->groups
[R600_GROUP_SAMPLER
].blocks
[id
];
804 block
->pm4
[0] = state
->regs
[0].value
;
805 block
->pm4
[1] = state
->regs
[1].value
;
806 block
->pm4
[2] = state
->regs
[2].value
;
807 block
->status
|= R600_BLOCK_STATUS_ENABLED
;
808 block
->status
|= R600_BLOCK_STATUS_DIRTY
;
809 ctx
->pm4_dirty_cdwords
+= 2 + block
->pm4_ndwords
;
812 static inline void r600_context_pipe_state_set_sampler_border(struct r600_context
*ctx
, struct r600_pipe_state
*state
, unsigned offset
)
814 struct r600_group_block
*block
;
817 offset
-= ctx
->groups
[R600_GROUP_CONFIG
].start_offset
;
818 id
= ctx
->groups
[R600_GROUP_CONFIG
].offset_block_id
[offset
>> 2];
819 block
= &ctx
->groups
[R600_GROUP_CONFIG
].blocks
[id
];
820 block
->pm4
[0] = state
->regs
[3].value
;
821 block
->pm4
[1] = state
->regs
[4].value
;
822 block
->pm4
[2] = state
->regs
[5].value
;
823 block
->pm4
[3] = state
->regs
[6].value
;
824 block
->status
|= R600_BLOCK_STATUS_ENABLED
;
825 block
->status
|= R600_BLOCK_STATUS_DIRTY
;
826 ctx
->pm4_dirty_cdwords
+= 2 + block
->pm4_ndwords
;
829 void r600_context_pipe_state_set_ps_sampler(struct r600_context
*ctx
, struct r600_pipe_state
*state
, unsigned id
)
833 offset
= 0x0003C000 + id
* 0xc;
834 r600_context_pipe_state_set_sampler(ctx
, state
, offset
);
835 if (state
->nregs
> 3) {
836 offset
= 0x0000A400 + id
* 0x10;
837 r600_context_pipe_state_set_sampler_border(ctx
, state
, offset
);
841 void r600_context_pipe_state_set_vs_sampler(struct r600_context
*ctx
, struct r600_pipe_state
*state
, unsigned id
)
845 offset
= 0x0003C0D8 + id
* 0xc;
846 r600_context_pipe_state_set_sampler(ctx
, state
, offset
);
847 if (state
->nregs
> 3) {
848 offset
= 0x0000A600 + id
* 0x10;
849 r600_context_pipe_state_set_sampler_border(ctx
, state
, offset
);
853 void r600_context_group_emit_dirty(struct r600_context
*ctx
, struct r600_group
*group
, unsigned opcode
)
855 struct radeon_bo
*bo
;
858 for (int i
= 0; i
< group
->nblocks
; i
++) {
859 struct r600_group_block
*block
= &group
->blocks
[i
];
860 if (block
->status
& R600_BLOCK_STATUS_DIRTY
) {
861 for (int j
= 0; j
< block
->nreg
; j
++) {
862 if (block
->pm4_bo_index
[j
]) {
863 /* find relocation */
864 id
= block
->pm4_bo_index
[j
];
865 bo
= radeon_bo_pb_get_bo(block
->reloc
[id
].bo
->pb
);
866 for (int k
= 0; k
< block
->reloc
[id
].nreloc
; k
++) {
867 r600_context_bo_reloc(ctx
, &block
->pm4
[block
->reloc
[id
].bo_pm4_index
[k
]], bo
);
872 ctx
->pm4
[ctx
->pm4_cdwords
++] = PKT3(opcode
, block
->nreg
);
873 ctx
->pm4
[ctx
->pm4_cdwords
++] = (block
->start_offset
- group
->start_offset
) >> 2;
874 memcpy(&ctx
->pm4
[ctx
->pm4_cdwords
], block
->pm4
, block
->pm4_ndwords
* 4);
875 ctx
->pm4_cdwords
+= block
->pm4_ndwords
;
876 block
->status
^= R600_BLOCK_STATUS_DIRTY
;
881 struct radeon_bo
*r600_context_reg_bo(struct r600_context
*ctx
, unsigned group_id
, unsigned offset
)
883 struct r600_group_block
*block
;
886 id
= ctx
->groups
[group_id
].offset_block_id
[(offset
- ctx
->groups
[group_id
].start_offset
) >> 2];
887 block
= &ctx
->groups
[group_id
].blocks
[id
];
888 offset
-= block
->start_offset
;
889 id
= block
->pm4_bo_index
[offset
>> 2];
890 if (block
->reloc
[id
].bo
) {
891 return radeon_bo_pb_get_bo(block
->reloc
[id
].bo
->pb
);
896 void r600_context_draw(struct r600_context
*ctx
, const struct r600_draw
*draw
)
898 struct radeon_bo
*cb
[8];
899 unsigned ndwords
= 9;
903 /* make sure there is enough relocation space before scheduling draw */
904 if (ctx
->creloc
>= (ctx
->nreloc
- 1)) {
905 r600_context_flush(ctx
);
909 /* find number of color buffer */
910 for (int i
= 0; i
< 8; i
++) {
911 cb
[i
] = r600_context_reg_bo(ctx
, R600_GROUP_CONTEXT
, R_028040_CB_COLOR0_BASE
+ (i
<< 2));
917 if ((ctx
->pm4_dirty_cdwords
+ ndwords
+ ctx
->pm4_cdwords
) > ctx
->pm4_ndwords
) {
919 r600_context_flush(ctx
);
921 /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */
922 if ((ctx
->pm4_dirty_cdwords
+ ndwords
) > ctx
->pm4_ndwords
) {
923 R600_ERR("context is too big to be scheduled\n");
927 /* enough room to copy packet */
928 r600_context_group_emit_dirty(ctx
, &ctx
->groups
[R600_GROUP_CONFIG
], PKT3_SET_CONFIG_REG
);
929 r600_context_group_emit_dirty(ctx
, &ctx
->groups
[R600_GROUP_CONTEXT
], PKT3_SET_CONTEXT_REG
);
930 r600_context_group_emit_dirty(ctx
, &ctx
->groups
[R600_GROUP_ALU_CONST
], PKT3_SET_ALU_CONST
);
931 r600_context_group_emit_dirty(ctx
, &ctx
->groups
[R600_GROUP_RESOURCE
], PKT3_SET_RESOURCE
);
932 r600_context_group_emit_dirty(ctx
, &ctx
->groups
[R600_GROUP_SAMPLER
], PKT3_SET_SAMPLER
);
935 ctx
->pm4
[ctx
->pm4_cdwords
++] = PKT3(PKT3_INDEX_TYPE
, 0);
936 ctx
->pm4
[ctx
->pm4_cdwords
++] = draw
->vgt_index_type
;
937 ctx
->pm4
[ctx
->pm4_cdwords
++] = PKT3(PKT3_NUM_INSTANCES
, 0);
938 ctx
->pm4
[ctx
->pm4_cdwords
++] = draw
->vgt_num_instances
;
940 ctx
->pm4
[ctx
->pm4_cdwords
++] = PKT3(PKT3_DRAW_INDEX
, 3);
941 ctx
->pm4
[ctx
->pm4_cdwords
++] = draw
->indices_bo_offset
;
942 ctx
->pm4
[ctx
->pm4_cdwords
++] = 0;
943 ctx
->pm4
[ctx
->pm4_cdwords
++] = draw
->vgt_num_indices
;
944 ctx
->pm4
[ctx
->pm4_cdwords
++] = draw
->vgt_draw_initiator
;
945 ctx
->pm4
[ctx
->pm4_cdwords
++] = PKT3(PKT3_NOP
, 0);
946 ctx
->pm4
[ctx
->pm4_cdwords
++] = 0;
947 r600_context_bo_reloc(ctx
, &ctx
->pm4
[ctx
->pm4_cdwords
- 1], radeon_bo_pb_get_bo(draw
->indices
->pb
));
949 ctx
->pm4
[ctx
->pm4_cdwords
++] = PKT3(PKT3_DRAW_INDEX_AUTO
, 1);
950 ctx
->pm4
[ctx
->pm4_cdwords
++] = draw
->vgt_num_indices
;
951 ctx
->pm4
[ctx
->pm4_cdwords
++] = draw
->vgt_draw_initiator
;
953 ctx
->pm4
[ctx
->pm4_cdwords
++] = PKT3(PKT3_EVENT_WRITE
, 0);
954 ctx
->pm4
[ctx
->pm4_cdwords
++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT
;
956 /* flush color buffer */
957 for (int i
= 0; i
< 8; i
++) {
959 ctx
->pm4
[ctx
->pm4_cdwords
++] = PKT3(PKT3_SURFACE_SYNC
, 3);
960 ctx
->pm4
[ctx
->pm4_cdwords
++] = (S_0085F0_CB0_DEST_BASE_ENA(1) << i
) |
961 S_0085F0_CB_ACTION_ENA(1);
962 ctx
->pm4
[ctx
->pm4_cdwords
++] = (cb
[i
]->size
+ 255) >> 8;
963 ctx
->pm4
[ctx
->pm4_cdwords
++] = 0x00000000;
964 ctx
->pm4
[ctx
->pm4_cdwords
++] = 0x0000000A;
965 ctx
->pm4
[ctx
->pm4_cdwords
++] = PKT3(PKT3_NOP
, 0);
966 ctx
->pm4
[ctx
->pm4_cdwords
++] = 0;
967 r600_context_bo_reloc(ctx
, &ctx
->pm4
[ctx
->pm4_cdwords
- 1], cb
[i
]);
971 /* all dirty state have been scheduled in current cs */
972 ctx
->pm4_dirty_cdwords
= 0;
975 void r600_context_flush(struct r600_context
*ctx
)
977 struct drm_radeon_cs drmib
;
978 struct drm_radeon_cs_chunk chunks
[2];
979 uint64_t chunk_array
[2];
980 struct r600_group_block
*block
;
983 if (!ctx
->pm4_cdwords
)
986 /* suspend queries */
987 r600_context_queries_suspend(ctx
);
991 drmib
.num_chunks
= 2;
992 drmib
.chunks
= (uint64_t)(uintptr_t)chunk_array
;
993 chunks
[0].chunk_id
= RADEON_CHUNK_ID_IB
;
994 chunks
[0].length_dw
= ctx
->pm4_cdwords
;
995 chunks
[0].chunk_data
= (uint64_t)(uintptr_t)ctx
->pm4
;
996 chunks
[1].chunk_id
= RADEON_CHUNK_ID_RELOCS
;
997 chunks
[1].length_dw
= ctx
->creloc
* sizeof(struct r600_reloc
) / 4;
998 chunks
[1].chunk_data
= (uint64_t)(uintptr_t)ctx
->reloc
;
999 chunk_array
[0] = (uint64_t)(uintptr_t)&chunks
[0];
1000 chunk_array
[1] = (uint64_t)(uintptr_t)&chunks
[1];
1001 r
= drmCommandWriteRead(ctx
->radeon
->fd
, DRM_RADEON_CS
, &drmib
,
1002 sizeof(struct drm_radeon_cs
));
1005 for (int i
= 0; i
< ctx
->creloc
; i
++) {
1006 radeon_bo_reference(ctx
->radeon
, &ctx
->bo
[i
], NULL
);
1009 ctx
->pm4_dirty_cdwords
= 0;
1010 ctx
->pm4_cdwords
= 0;
1012 /* resume queries */
1013 r600_context_queries_resume(ctx
);
1015 /* set all valid group as dirty so they get reemited on
1018 for (int i
= 0; i
< ctx
->ngroups
; i
++) {
1019 for (int j
= 0; j
< ctx
->groups
[i
].nblocks
; j
++) {
1020 /* mark enabled block as dirty */
1021 block
= &ctx
->groups
[i
].blocks
[j
];
1022 if (block
->status
& R600_BLOCK_STATUS_ENABLED
) {
1023 ctx
->pm4_dirty_cdwords
+= 2 + block
->pm4_ndwords
;
1024 block
->status
|= R600_BLOCK_STATUS_DIRTY
;
1030 void r600_context_dump_bof(struct r600_context
*ctx
, const char *file
)
1032 bof_t
*bcs
, *blob
, *array
, *bo
, *size
, *handle
, *device_id
, *root
;
1035 root
= device_id
= bcs
= blob
= array
= bo
= size
= handle
= NULL
;
1036 root
= bof_object();
1039 device_id
= bof_int32(ctx
->radeon
->device
);
1040 if (device_id
== NULL
)
1042 if (bof_object_set(root
, "device_id", device_id
))
1044 bof_decref(device_id
);
1047 blob
= bof_blob(ctx
->creloc
* 16, ctx
->reloc
);
1050 if (bof_object_set(root
, "reloc", blob
))
1055 blob
= bof_blob(ctx
->pm4_cdwords
* 4, ctx
->pm4
);
1058 if (bof_object_set(root
, "pm4", blob
))
1063 array
= bof_array();
1066 for (i
= 0; i
< ctx
->creloc
; i
++) {
1067 struct radeon_bo
*rbo
= ctx
->bo
[i
];
1071 size
= bof_int32(rbo
->size
);
1074 if (bof_object_set(bo
, "size", size
))
1078 handle
= bof_int32(rbo
->handle
);
1081 if (bof_object_set(bo
, "handle", handle
))
1085 radeon_bo_map(ctx
->radeon
, rbo
);
1086 blob
= bof_blob(rbo
->size
, rbo
->data
);
1087 radeon_bo_unmap(ctx
->radeon
, rbo
);
1090 if (bof_object_set(bo
, "data", blob
))
1094 if (bof_array_append(array
, bo
))
1099 if (bof_object_set(root
, "bo", array
))
1101 bof_dump_file(root
, file
);
1108 bof_decref(device_id
);
1112 static void r600_query_result(struct r600_context
*ctx
, struct r600_query
*query
)
1118 results
= radeon_ws_bo_map(ctx
->radeon
, query
->buffer
, 0, NULL
);
1119 for (i
= 0; i
< query
->num_results
; i
+= 4) {
1120 start
= (u64
)results
[i
] | (u64
)results
[i
+ 1] << 32;
1121 end
= (u64
)results
[i
+ 2] | (u64
)results
[i
+ 3] << 32;
1122 if ((start
& 0x8000000000000000UL
) && (end
& 0x8000000000000000UL
)) {
1123 query
->result
+= end
- start
;
1126 radeon_ws_bo_unmap(ctx
->radeon
, query
->buffer
);
1127 query
->num_results
= 0;
1130 void r600_query_begin(struct r600_context
*ctx
, struct r600_query
*query
)
1132 /* query request needs 6 dwords for begin + 6 dwords for end */
1133 if ((12 + ctx
->pm4_cdwords
) > ctx
->pm4_ndwords
) {
1135 r600_context_flush(ctx
);
1138 /* if query buffer is full force a flush */
1139 if (query
->num_results
>= ((query
->buffer_size
>> 2) - 2)) {
1140 r600_context_flush(ctx
);
1141 r600_query_result(ctx
, query
);
1144 /* emit begin query */
1145 ctx
->pm4
[ctx
->pm4_cdwords
++] = PKT3(PKT3_EVENT_WRITE
, 2);
1146 ctx
->pm4
[ctx
->pm4_cdwords
++] = EVENT_TYPE_ZPASS_DONE
;
1147 ctx
->pm4
[ctx
->pm4_cdwords
++] = query
->num_results
;
1148 ctx
->pm4
[ctx
->pm4_cdwords
++] = 0;
1149 ctx
->pm4
[ctx
->pm4_cdwords
++] = PKT3(PKT3_NOP
, 0);
1150 ctx
->pm4
[ctx
->pm4_cdwords
++] = 0;
1151 r600_context_bo_reloc(ctx
, &ctx
->pm4
[ctx
->pm4_cdwords
- 1], radeon_bo_pb_get_bo(query
->buffer
->pb
));
1153 query
->state
|= R600_QUERY_STATE_STARTED
;
1154 query
->state
^= R600_QUERY_STATE_ENDED
;
1157 void r600_query_end(struct r600_context
*ctx
, struct r600_query
*query
)
1159 /* emit begin query */
1160 ctx
->pm4
[ctx
->pm4_cdwords
++] = PKT3(PKT3_EVENT_WRITE
, 2);
1161 ctx
->pm4
[ctx
->pm4_cdwords
++] = EVENT_TYPE_ZPASS_DONE
;
1162 ctx
->pm4
[ctx
->pm4_cdwords
++] = query
->num_results
+ 8;
1163 ctx
->pm4
[ctx
->pm4_cdwords
++] = 0;
1164 ctx
->pm4
[ctx
->pm4_cdwords
++] = PKT3(PKT3_NOP
, 0);
1165 ctx
->pm4
[ctx
->pm4_cdwords
++] = 0;
1166 r600_context_bo_reloc(ctx
, &ctx
->pm4
[ctx
->pm4_cdwords
- 1], radeon_bo_pb_get_bo(query
->buffer
->pb
));
1168 query
->num_results
+= 16;
1169 query
->state
^= R600_QUERY_STATE_STARTED
;
1170 query
->state
|= R600_QUERY_STATE_ENDED
;
1173 struct r600_query
*r600_context_query_create(struct r600_context
*ctx
, unsigned query_type
)
1175 struct r600_query
*query
;
1177 if (query_type
!= PIPE_QUERY_OCCLUSION_COUNTER
)
1180 query
= calloc(1, sizeof(struct r600_query
));
1184 query
->type
= query_type
;
1185 query
->buffer_size
= 4096;
1187 query
->buffer
= radeon_ws_bo(ctx
->radeon
, query
->buffer_size
, 1, 0);
1188 if (!query
->buffer
) {
1193 LIST_ADDTAIL(&query
->list
, &ctx
->query_list
);
1198 void r600_context_query_destroy(struct r600_context
*ctx
, struct r600_query
*query
)
1200 radeon_ws_bo_reference(ctx
->radeon
, &query
->buffer
, NULL
);
1201 LIST_DEL(&query
->list
);
1205 boolean
r600_context_query_result(struct r600_context
*ctx
,
1206 struct r600_query
*query
,
1207 boolean wait
, void *vresult
)
1209 uint64_t *result
= (uint64_t*)vresult
;
1211 if (query
->num_results
) {
1212 r600_context_flush(ctx
);
1214 r600_query_result(ctx
, query
);
1215 *result
= query
->result
;
1220 static void r600_context_queries_suspend(struct r600_context
*ctx
)
1222 struct r600_query
*query
;
1224 LIST_FOR_EACH_ENTRY(query
, &ctx
->query_list
, list
) {
1225 if (query
->state
& R600_QUERY_STATE_STARTED
) {
1226 r600_query_end(ctx
, query
);
1227 query
->state
|= R600_QUERY_STATE_SUSPENDED
;
1232 static void r600_context_queries_resume(struct r600_context
*ctx
)
1234 struct r600_query
*query
;
1236 LIST_FOR_EACH_ENTRY(query
, &ctx
->query_list
, list
) {
1237 if (query
->state
& R600_QUERY_STATE_SUSPENDED
) {
1238 r600_query_begin(ctx
, query
);
1239 query
->state
^= R600_QUERY_STATE_SUSPENDED
;