r600g: occlusion query for new design
[mesa.git] / src / gallium / winsys / r600 / drm / r600_state2.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Jerome Glisse
25 */
26 #include <errno.h>
27 #include <stdint.h>
28 #include <string.h>
29 #include <stdlib.h>
30 #include <assert.h>
31 #include "xf86drm.h"
32 #include "r600.h"
33 #include "r600d.h"
34 #include "r600_priv.h"
35 #include "radeon_drm.h"
36 #include "bof.h"
37 #include "pipe/p_compiler.h"
38 #include "util/u_inlines.h"
39 #include <pipebuffer/pb_bufmgr.h>
40
41 struct radeon_ws_bo {
42 struct pipe_reference reference;
43 struct pb_buffer *pb;
44 };
45
46 struct radeon_bo {
47 struct pipe_reference reference;
48 unsigned handle;
49 unsigned size;
50 unsigned alignment;
51 unsigned map_count;
52 void *data;
53 };
54 struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf);
55 int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo);
56 void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo);
57 void radeon_bo_reference(struct radeon *radeon,
58 struct radeon_bo **dst,
59 struct radeon_bo *src);
60
61 unsigned radeon_ws_bo_get_handle(struct radeon_ws_bo *pb_bo);
62
63 /* queries */
64 static void r600_context_queries_suspend(struct r600_context *ctx);
65 static void r600_context_queries_resume(struct r600_context *ctx);
66
67 static int r600_group_id_register_offset(unsigned offset)
68 {
69 if (offset >= R600_CONFIG_REG_OFFSET && offset < R600_CONFIG_REG_END) {
70 return R600_GROUP_CONFIG;
71 }
72 if (offset >= R600_CONTEXT_REG_OFFSET && offset < R600_CONTEXT_REG_END) {
73 return R600_GROUP_CONTEXT;
74 }
75 if (offset >= R600_ALU_CONST_OFFSET && offset < R600_ALU_CONST_END) {
76 return R600_GROUP_ALU_CONST;
77 }
78 if (offset >= R600_RESOURCE_OFFSET && offset < R600_RESOURCE_END) {
79 return R600_GROUP_RESOURCE;
80 }
81 if (offset >= R600_SAMPLER_OFFSET && offset < R600_SAMPLER_END) {
82 return R600_GROUP_SAMPLER;
83 }
84 if (offset >= R600_CTL_CONST_OFFSET && offset < R600_CTL_CONST_END) {
85 return R600_GROUP_CTL_CONST;
86 }
87 if (offset >= R600_LOOP_CONST_OFFSET && offset < R600_LOOP_CONST_END) {
88 return R600_GROUP_LOOP_CONST;
89 }
90 if (offset >= R600_BOOL_CONST_OFFSET && offset < R600_BOOL_CONST_END) {
91 return R600_GROUP_BOOL_CONST;
92 }
93 return -1;
94 }
95
96 static int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg)
97 {
98 struct r600_group_block *block, *tmp;
99 struct r600_group *group;
100 int group_id, id;
101
102 for (unsigned i = 0, n = 0; i < nreg; i += n) {
103 u32 j, r;
104 /* find number of consecutive registers */
105 for (j = i + 1, r = reg[i].offset + 4, n = 1; j < (nreg - i); j++, n++, r+=4) {
106 if (r != reg[j].offset) {
107 break;
108 }
109 }
110
111 /* find into which group this block is */
112 group_id = r600_group_id_register_offset(reg[i].offset);
113 assert(group_id >= 0);
114 group = &ctx->groups[group_id];
115
116 /* allocate new block */
117 tmp = realloc(group->blocks, (group->nblocks + 1) * sizeof(struct r600_group_block));
118 if (tmp == NULL) {
119 return -ENOMEM;
120 }
121 group->blocks = tmp;
122 block = &group->blocks[group->nblocks++];
123 for (int j = 0; j < n; j++) {
124 group->offset_block_id[((reg[i].offset - group->start_offset) >> 2) + j] = group->nblocks - 1;
125 }
126
127 /* initialize block */
128 memset(block, 0, sizeof(struct r600_group_block));
129 block->start_offset = reg[i].offset;
130 block->pm4_ndwords = n;
131 block->nreg = n;
132 for (j = 0; j < n; j++) {
133 if (reg[i+j].need_bo) {
134 block->nbo++;
135 assert(block->nbo < R600_BLOCK_MAX_BO);
136 block->pm4_bo_index[j] = block->nbo;
137 block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0);
138 block->pm4[block->pm4_ndwords++] = 0x00000000;
139 block->reloc[block->nbo].bo_pm4_index[block->reloc[block->nbo].nreloc++] = block->pm4_ndwords - 1;
140 }
141 }
142 for (j = 0; j < n; j++) {
143 if (reg[i+j].flush_flags) {
144 block->pm4[block->pm4_ndwords++] = PKT3(PKT3_SURFACE_SYNC, 3);
145 block->pm4[block->pm4_ndwords++] = reg[i+j].flush_flags;
146 block->pm4[block->pm4_ndwords++] = 0xFFFFFFFF;
147 block->pm4[block->pm4_ndwords++] = 0x00000000;
148 block->pm4[block->pm4_ndwords++] = 0x0000000A;
149 block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0);
150 block->pm4[block->pm4_ndwords++] = 0x00000000;
151 id = block->pm4_bo_index[j];
152 block->reloc[id].bo_pm4_index[block->reloc[id].nreloc++] = block->pm4_ndwords - 1;
153 }
154 }
155 /* check that we stay in limit */
156 assert(block->pm4_ndwords < R600_BLOCK_MAX_REG);
157 }
158 return 0;
159 }
160
161 static int r600_group_init(struct r600_group *group, unsigned start_offset, unsigned end_offset)
162 {
163 group->start_offset = start_offset;
164 group->end_offset = end_offset;
165 group->nblocks = 0;
166 group->blocks = NULL;
167 group->offset_block_id = calloc((end_offset - start_offset) >> 2, sizeof(unsigned));
168 if (group->offset_block_id == NULL)
169 return -ENOMEM;
170 return 0;
171 }
172
173 static void r600_group_fini(struct r600_group *group)
174 {
175 free(group->offset_block_id);
176 free(group->blocks);
177 }
178
179 /* R600/R700 configuration */
180 static const struct r600_reg r600_reg_list[] = {
181 {0, 0, R_008C00_SQ_CONFIG},
182 {0, 0, R_008C04_SQ_GPR_RESOURCE_MGMT_1},
183 {0, 0, R_008C08_SQ_GPR_RESOURCE_MGMT_2},
184 {0, 0, R_008C0C_SQ_THREAD_RESOURCE_MGMT},
185 {0, 0, R_008C10_SQ_STACK_RESOURCE_MGMT_1},
186 {0, 0, R_008C14_SQ_STACK_RESOURCE_MGMT_2},
187 {0, 0, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ},
188 {0, 0, R_009508_TA_CNTL_AUX},
189 {0, 0, R_009714_VC_ENHANCE},
190 {0, 0, R_009830_DB_DEBUG},
191 {0, 0, R_009838_DB_WATERMARKS},
192 {0, 0, R_028350_SX_MISC},
193 {0, 0, R_0286C8_SPI_THREAD_GROUPING},
194 {0, 0, R_0288A8_SQ_ESGS_RING_ITEMSIZE},
195 {0, 0, R_0288AC_SQ_GSVS_RING_ITEMSIZE},
196 {0, 0, R_0288B0_SQ_ESTMP_RING_ITEMSIZE},
197 {0, 0, R_0288B4_SQ_GSTMP_RING_ITEMSIZE},
198 {0, 0, R_0288B8_SQ_VSTMP_RING_ITEMSIZE},
199 {0, 0, R_0288BC_SQ_PSTMP_RING_ITEMSIZE},
200 {0, 0, R_0288C0_SQ_FBUF_RING_ITEMSIZE},
201 {0, 0, R_0288C4_SQ_REDUC_RING_ITEMSIZE},
202 {0, 0, R_0288C8_SQ_GS_VERT_ITEMSIZE},
203 {0, 0, R_028A10_VGT_OUTPUT_PATH_CNTL},
204 {0, 0, R_028A14_VGT_HOS_CNTL},
205 {0, 0, R_028A18_VGT_HOS_MAX_TESS_LEVEL},
206 {0, 0, R_028A1C_VGT_HOS_MIN_TESS_LEVEL},
207 {0, 0, R_028A20_VGT_HOS_REUSE_DEPTH},
208 {0, 0, R_028A24_VGT_GROUP_PRIM_TYPE},
209 {0, 0, R_028A28_VGT_GROUP_FIRST_DECR},
210 {0, 0, R_028A2C_VGT_GROUP_DECR},
211 {0, 0, R_028A30_VGT_GROUP_VECT_0_CNTL},
212 {0, 0, R_028A34_VGT_GROUP_VECT_1_CNTL},
213 {0, 0, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL},
214 {0, 0, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL},
215 {0, 0, R_028A40_VGT_GS_MODE},
216 {0, 0, R_028A4C_PA_SC_MODE_CNTL},
217 {0, 0, R_028AB0_VGT_STRMOUT_EN},
218 {0, 0, R_028AB4_VGT_REUSE_OFF},
219 {0, 0, R_028AB8_VGT_VTX_CNT_EN},
220 {0, 0, R_028B20_VGT_STRMOUT_BUFFER_EN},
221 {0, 0, R_028028_DB_STENCIL_CLEAR},
222 {0, 0, R_02802C_DB_DEPTH_CLEAR},
223 {1, 0, R_028040_CB_COLOR0_BASE},
224 {0, 0, R_0280A0_CB_COLOR0_INFO},
225 {0, 0, R_028060_CB_COLOR0_SIZE},
226 {0, 0, R_028080_CB_COLOR0_VIEW},
227 {1, 0, R_0280E0_CB_COLOR0_FRAG},
228 {1, 0, R_0280C0_CB_COLOR0_TILE},
229 {0, 0, R_028100_CB_COLOR0_MASK},
230 {1, 0, R_028044_CB_COLOR1_BASE},
231 {0, 0, R_0280A4_CB_COLOR1_INFO},
232 {0, 0, R_028064_CB_COLOR1_SIZE},
233 {0, 0, R_028084_CB_COLOR1_VIEW},
234 {1, 0, R_0280E4_CB_COLOR1_FRAG},
235 {1, 0, R_0280C4_CB_COLOR1_TILE},
236 {0, 0, R_028104_CB_COLOR1_MASK},
237 {1, 0, R_028048_CB_COLOR2_BASE},
238 {0, 0, R_0280A8_CB_COLOR2_INFO},
239 {0, 0, R_028068_CB_COLOR2_SIZE},
240 {0, 0, R_028088_CB_COLOR2_VIEW},
241 {1, 0, R_0280E8_CB_COLOR2_FRAG},
242 {1, 0, R_0280C8_CB_COLOR2_TILE},
243 {0, 0, R_028108_CB_COLOR2_MASK},
244 {1, 0, R_02804C_CB_COLOR3_BASE},
245 {0, 0, R_0280AC_CB_COLOR3_INFO},
246 {0, 0, R_02806C_CB_COLOR3_SIZE},
247 {0, 0, R_02808C_CB_COLOR3_VIEW},
248 {1, 0, R_0280EC_CB_COLOR3_FRAG},
249 {1, 0, R_0280CC_CB_COLOR3_TILE},
250 {0, 0, R_02810C_CB_COLOR3_MASK},
251 {1, 0, R_028050_CB_COLOR4_BASE},
252 {0, 0, R_0280B0_CB_COLOR4_INFO},
253 {0, 0, R_028070_CB_COLOR4_SIZE},
254 {0, 0, R_028090_CB_COLOR4_VIEW},
255 {1, 0, R_0280F0_CB_COLOR4_FRAG},
256 {1, 0, R_0280D0_CB_COLOR4_TILE},
257 {0, 0, R_028110_CB_COLOR4_MASK},
258 {1, 0, R_028054_CB_COLOR5_BASE},
259 {0, 0, R_0280B4_CB_COLOR5_INFO},
260 {0, 0, R_028074_CB_COLOR5_SIZE},
261 {0, 0, R_028094_CB_COLOR5_VIEW},
262 {1, 0, R_0280F4_CB_COLOR5_FRAG},
263 {1, 0, R_0280D4_CB_COLOR5_TILE},
264 {0, 0, R_028114_CB_COLOR5_MASK},
265 {1, 0, R_028058_CB_COLOR6_BASE},
266 {0, 0, R_0280B8_CB_COLOR6_INFO},
267 {0, 0, R_028078_CB_COLOR6_SIZE},
268 {0, 0, R_028098_CB_COLOR6_VIEW},
269 {1, 0, R_0280F8_CB_COLOR6_FRAG},
270 {1, 0, R_0280D8_CB_COLOR6_TILE},
271 {0, 0, R_028118_CB_COLOR6_MASK},
272 {1, 0, R_02805C_CB_COLOR7_BASE},
273 {0, 0, R_0280BC_CB_COLOR7_INFO},
274 {0, 0, R_02807C_CB_COLOR7_SIZE},
275 {0, 0, R_02809C_CB_COLOR7_VIEW},
276 {1, 0, R_0280FC_CB_COLOR7_FRAG},
277 {1, 0, R_0280DC_CB_COLOR7_TILE},
278 {0, 0, R_02811C_CB_COLOR7_MASK},
279 {0, 0, R_028120_CB_CLEAR_RED},
280 {0, 0, R_028124_CB_CLEAR_GREEN},
281 {0, 0, R_028128_CB_CLEAR_BLUE},
282 {0, 0, R_02812C_CB_CLEAR_ALPHA},
283 {0, 0, R_02823C_CB_SHADER_MASK},
284 {0, 0, R_028238_CB_TARGET_MASK},
285 {0, 0, R_028410_SX_ALPHA_TEST_CONTROL},
286 {0, 0, R_028414_CB_BLEND_RED},
287 {0, 0, R_028418_CB_BLEND_GREEN},
288 {0, 0, R_02841C_CB_BLEND_BLUE},
289 {0, 0, R_028420_CB_BLEND_ALPHA},
290 {0, 0, R_028424_CB_FOG_RED},
291 {0, 0, R_028428_CB_FOG_GREEN},
292 {0, 0, R_02842C_CB_FOG_BLUE},
293 {0, 0, R_028430_DB_STENCILREFMASK},
294 {0, 0, R_028434_DB_STENCILREFMASK_BF},
295 {0, 0, R_028438_SX_ALPHA_REF},
296 {0, 0, R_0286DC_SPI_FOG_CNTL},
297 {0, 0, R_0286E0_SPI_FOG_FUNC_SCALE},
298 {0, 0, R_0286E4_SPI_FOG_FUNC_BIAS},
299 {0, 0, R_028780_CB_BLEND0_CONTROL},
300 {0, 0, R_028784_CB_BLEND1_CONTROL},
301 {0, 0, R_028788_CB_BLEND2_CONTROL},
302 {0, 0, R_02878C_CB_BLEND3_CONTROL},
303 {0, 0, R_028790_CB_BLEND4_CONTROL},
304 {0, 0, R_028794_CB_BLEND5_CONTROL},
305 {0, 0, R_028798_CB_BLEND6_CONTROL},
306 {0, 0, R_02879C_CB_BLEND7_CONTROL},
307 {0, 0, R_0287A0_CB_SHADER_CONTROL},
308 {0, 0, R_028800_DB_DEPTH_CONTROL},
309 {0, 0, R_028804_CB_BLEND_CONTROL},
310 {0, 0, R_028808_CB_COLOR_CONTROL},
311 {0, 0, R_02880C_DB_SHADER_CONTROL},
312 {0, 0, R_028C04_PA_SC_AA_CONFIG},
313 {0, 0, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX},
314 {0, 0, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX},
315 {0, 0, R_028C30_CB_CLRCMP_CONTROL},
316 {0, 0, R_028C34_CB_CLRCMP_SRC},
317 {0, 0, R_028C38_CB_CLRCMP_DST},
318 {0, 0, R_028C3C_CB_CLRCMP_MSK},
319 {0, 0, R_028C48_PA_SC_AA_MASK},
320 {0, 0, R_028D2C_DB_SRESULTS_COMPARE_STATE1},
321 {0, 0, R_028D44_DB_ALPHA_TO_MASK},
322 {1, 0, R_02800C_DB_DEPTH_BASE},
323 {0, 0, R_028000_DB_DEPTH_SIZE},
324 {0, 0, R_028004_DB_DEPTH_VIEW},
325 {0, 0, R_028010_DB_DEPTH_INFO},
326 {0, 0, R_028D0C_DB_RENDER_CONTROL},
327 {0, 0, R_028D10_DB_RENDER_OVERRIDE},
328 {0, 0, R_028D24_DB_HTILE_SURFACE},
329 {0, 0, R_028D30_DB_PRELOAD_CONTROL},
330 {0, 0, R_028D34_DB_PREFETCH_LIMIT},
331 {0, 0, R_028030_PA_SC_SCREEN_SCISSOR_TL},
332 {0, 0, R_028034_PA_SC_SCREEN_SCISSOR_BR},
333 {0, 0, R_028200_PA_SC_WINDOW_OFFSET},
334 {0, 0, R_028204_PA_SC_WINDOW_SCISSOR_TL},
335 {0, 0, R_028208_PA_SC_WINDOW_SCISSOR_BR},
336 {0, 0, R_02820C_PA_SC_CLIPRECT_RULE},
337 {0, 0, R_028210_PA_SC_CLIPRECT_0_TL},
338 {0, 0, R_028214_PA_SC_CLIPRECT_0_BR},
339 {0, 0, R_028218_PA_SC_CLIPRECT_1_TL},
340 {0, 0, R_02821C_PA_SC_CLIPRECT_1_BR},
341 {0, 0, R_028220_PA_SC_CLIPRECT_2_TL},
342 {0, 0, R_028224_PA_SC_CLIPRECT_2_BR},
343 {0, 0, R_028228_PA_SC_CLIPRECT_3_TL},
344 {0, 0, R_02822C_PA_SC_CLIPRECT_3_BR},
345 {0, 0, R_028230_PA_SC_EDGERULE},
346 {0, 0, R_028240_PA_SC_GENERIC_SCISSOR_TL},
347 {0, 0, R_028244_PA_SC_GENERIC_SCISSOR_BR},
348 {0, 0, R_028250_PA_SC_VPORT_SCISSOR_0_TL},
349 {0, 0, R_028254_PA_SC_VPORT_SCISSOR_0_BR},
350 {0, 0, R_0282D0_PA_SC_VPORT_ZMIN_0},
351 {0, 0, R_0282D4_PA_SC_VPORT_ZMAX_0},
352 {0, 0, R_02843C_PA_CL_VPORT_XSCALE_0},
353 {0, 0, R_028440_PA_CL_VPORT_XOFFSET_0},
354 {0, 0, R_028444_PA_CL_VPORT_YSCALE_0},
355 {0, 0, R_028448_PA_CL_VPORT_YOFFSET_0},
356 {0, 0, R_02844C_PA_CL_VPORT_ZSCALE_0},
357 {0, 0, R_028450_PA_CL_VPORT_ZOFFSET_0},
358 {0, 0, R_0286D4_SPI_INTERP_CONTROL_0},
359 {0, 0, R_028810_PA_CL_CLIP_CNTL},
360 {0, 0, R_028814_PA_SU_SC_MODE_CNTL},
361 {0, 0, R_028818_PA_CL_VTE_CNTL},
362 {0, 0, R_02881C_PA_CL_VS_OUT_CNTL},
363 {0, 0, R_028820_PA_CL_NANINF_CNTL},
364 {0, 0, R_028A00_PA_SU_POINT_SIZE},
365 {0, 0, R_028A04_PA_SU_POINT_MINMAX},
366 {0, 0, R_028A08_PA_SU_LINE_CNTL},
367 {0, 0, R_028A0C_PA_SC_LINE_STIPPLE},
368 {0, 0, R_028A48_PA_SC_MPASS_PS_CNTL},
369 {0, 0, R_028C00_PA_SC_LINE_CNTL},
370 {0, 0, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ},
371 {0, 0, R_028C10_PA_CL_GB_VERT_DISC_ADJ},
372 {0, 0, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ},
373 {0, 0, R_028C18_PA_CL_GB_HORZ_DISC_ADJ},
374 {0, 0, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL},
375 {0, 0, R_028DFC_PA_SU_POLY_OFFSET_CLAMP},
376 {0, 0, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE},
377 {0, 0, R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET},
378 {0, 0, R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE},
379 {0, 0, R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET},
380 {0, 0, R_028E20_PA_CL_UCP0_X},
381 {0, 0, R_028E24_PA_CL_UCP0_Y},
382 {0, 0, R_028E28_PA_CL_UCP0_Z},
383 {0, 0, R_028E2C_PA_CL_UCP0_W},
384 {0, 0, R_028E30_PA_CL_UCP1_X},
385 {0, 0, R_028E34_PA_CL_UCP1_Y},
386 {0, 0, R_028E38_PA_CL_UCP1_Z},
387 {0, 0, R_028E3C_PA_CL_UCP1_W},
388 {0, 0, R_028E40_PA_CL_UCP2_X},
389 {0, 0, R_028E44_PA_CL_UCP2_Y},
390 {0, 0, R_028E48_PA_CL_UCP2_Z},
391 {0, 0, R_028E4C_PA_CL_UCP2_W},
392 {0, 0, R_028E50_PA_CL_UCP3_X},
393 {0, 0, R_028E54_PA_CL_UCP3_Y},
394 {0, 0, R_028E58_PA_CL_UCP3_Z},
395 {0, 0, R_028E5C_PA_CL_UCP3_W},
396 {0, 0, R_028E60_PA_CL_UCP4_X},
397 {0, 0, R_028E64_PA_CL_UCP4_Y},
398 {0, 0, R_028E68_PA_CL_UCP4_Z},
399 {0, 0, R_028E6C_PA_CL_UCP4_W},
400 {0, 0, R_028E70_PA_CL_UCP5_X},
401 {0, 0, R_028E74_PA_CL_UCP5_Y},
402 {0, 0, R_028E78_PA_CL_UCP5_Z},
403 {0, 0, R_028E7C_PA_CL_UCP5_W},
404 {0, 0, R_028380_SQ_VTX_SEMANTIC_0},
405 {0, 0, R_028384_SQ_VTX_SEMANTIC_1},
406 {0, 0, R_028388_SQ_VTX_SEMANTIC_2},
407 {0, 0, R_02838C_SQ_VTX_SEMANTIC_3},
408 {0, 0, R_028390_SQ_VTX_SEMANTIC_4},
409 {0, 0, R_028394_SQ_VTX_SEMANTIC_5},
410 {0, 0, R_028398_SQ_VTX_SEMANTIC_6},
411 {0, 0, R_02839C_SQ_VTX_SEMANTIC_7},
412 {0, 0, R_0283A0_SQ_VTX_SEMANTIC_8},
413 {0, 0, R_0283A4_SQ_VTX_SEMANTIC_9},
414 {0, 0, R_0283A8_SQ_VTX_SEMANTIC_10},
415 {0, 0, R_0283AC_SQ_VTX_SEMANTIC_11},
416 {0, 0, R_0283B0_SQ_VTX_SEMANTIC_12},
417 {0, 0, R_0283B4_SQ_VTX_SEMANTIC_13},
418 {0, 0, R_0283B8_SQ_VTX_SEMANTIC_14},
419 {0, 0, R_0283BC_SQ_VTX_SEMANTIC_15},
420 {0, 0, R_0283C0_SQ_VTX_SEMANTIC_16},
421 {0, 0, R_0283C4_SQ_VTX_SEMANTIC_17},
422 {0, 0, R_0283C8_SQ_VTX_SEMANTIC_18},
423 {0, 0, R_0283CC_SQ_VTX_SEMANTIC_19},
424 {0, 0, R_0283D0_SQ_VTX_SEMANTIC_20},
425 {0, 0, R_0283D4_SQ_VTX_SEMANTIC_21},
426 {0, 0, R_0283D8_SQ_VTX_SEMANTIC_22},
427 {0, 0, R_0283DC_SQ_VTX_SEMANTIC_23},
428 {0, 0, R_0283E0_SQ_VTX_SEMANTIC_24},
429 {0, 0, R_0283E4_SQ_VTX_SEMANTIC_25},
430 {0, 0, R_0283E8_SQ_VTX_SEMANTIC_26},
431 {0, 0, R_0283EC_SQ_VTX_SEMANTIC_27},
432 {0, 0, R_0283F0_SQ_VTX_SEMANTIC_28},
433 {0, 0, R_0283F4_SQ_VTX_SEMANTIC_29},
434 {0, 0, R_0283F8_SQ_VTX_SEMANTIC_30},
435 {0, 0, R_0283FC_SQ_VTX_SEMANTIC_31},
436 {0, 0, R_028614_SPI_VS_OUT_ID_0},
437 {0, 0, R_028618_SPI_VS_OUT_ID_1},
438 {0, 0, R_02861C_SPI_VS_OUT_ID_2},
439 {0, 0, R_028620_SPI_VS_OUT_ID_3},
440 {0, 0, R_028624_SPI_VS_OUT_ID_4},
441 {0, 0, R_028628_SPI_VS_OUT_ID_5},
442 {0, 0, R_02862C_SPI_VS_OUT_ID_6},
443 {0, 0, R_028630_SPI_VS_OUT_ID_7},
444 {0, 0, R_028634_SPI_VS_OUT_ID_8},
445 {0, 0, R_028638_SPI_VS_OUT_ID_9},
446 {0, 0, R_0286C4_SPI_VS_OUT_CONFIG},
447 {1, 0, R_028858_SQ_PGM_START_VS},
448 {0, S_0085F0_SH_ACTION_ENA(1), R_028868_SQ_PGM_RESOURCES_VS},
449 {1, 0, R_028894_SQ_PGM_START_FS},
450 {0, S_0085F0_SH_ACTION_ENA(1), R_0288A4_SQ_PGM_RESOURCES_FS},
451 {0, 0, R_0288D0_SQ_PGM_CF_OFFSET_VS},
452 {0, 0, R_0288DC_SQ_PGM_CF_OFFSET_FS},
453 {0, 0, R_028644_SPI_PS_INPUT_CNTL_0},
454 {0, 0, R_028648_SPI_PS_INPUT_CNTL_1},
455 {0, 0, R_02864C_SPI_PS_INPUT_CNTL_2},
456 {0, 0, R_028650_SPI_PS_INPUT_CNTL_3},
457 {0, 0, R_028654_SPI_PS_INPUT_CNTL_4},
458 {0, 0, R_028658_SPI_PS_INPUT_CNTL_5},
459 {0, 0, R_02865C_SPI_PS_INPUT_CNTL_6},
460 {0, 0, R_028660_SPI_PS_INPUT_CNTL_7},
461 {0, 0, R_028664_SPI_PS_INPUT_CNTL_8},
462 {0, 0, R_028668_SPI_PS_INPUT_CNTL_9},
463 {0, 0, R_02866C_SPI_PS_INPUT_CNTL_10},
464 {0, 0, R_028670_SPI_PS_INPUT_CNTL_11},
465 {0, 0, R_028674_SPI_PS_INPUT_CNTL_12},
466 {0, 0, R_028678_SPI_PS_INPUT_CNTL_13},
467 {0, 0, R_02867C_SPI_PS_INPUT_CNTL_14},
468 {0, 0, R_028680_SPI_PS_INPUT_CNTL_15},
469 {0, 0, R_028684_SPI_PS_INPUT_CNTL_16},
470 {0, 0, R_028688_SPI_PS_INPUT_CNTL_17},
471 {0, 0, R_02868C_SPI_PS_INPUT_CNTL_18},
472 {0, 0, R_028690_SPI_PS_INPUT_CNTL_19},
473 {0, 0, R_028694_SPI_PS_INPUT_CNTL_20},
474 {0, 0, R_028698_SPI_PS_INPUT_CNTL_21},
475 {0, 0, R_02869C_SPI_PS_INPUT_CNTL_22},
476 {0, 0, R_0286A0_SPI_PS_INPUT_CNTL_23},
477 {0, 0, R_0286A4_SPI_PS_INPUT_CNTL_24},
478 {0, 0, R_0286A8_SPI_PS_INPUT_CNTL_25},
479 {0, 0, R_0286AC_SPI_PS_INPUT_CNTL_26},
480 {0, 0, R_0286B0_SPI_PS_INPUT_CNTL_27},
481 {0, 0, R_0286B4_SPI_PS_INPUT_CNTL_28},
482 {0, 0, R_0286B8_SPI_PS_INPUT_CNTL_29},
483 {0, 0, R_0286BC_SPI_PS_INPUT_CNTL_30},
484 {0, 0, R_0286C0_SPI_PS_INPUT_CNTL_31},
485 {0, 0, R_0286CC_SPI_PS_IN_CONTROL_0},
486 {0, 0, R_0286D0_SPI_PS_IN_CONTROL_1},
487 {0, 0, R_0286D8_SPI_INPUT_Z},
488 {1, S_0085F0_SH_ACTION_ENA(1), R_028840_SQ_PGM_START_PS},
489 {0, 0, R_028850_SQ_PGM_RESOURCES_PS},
490 {0, 0, R_028854_SQ_PGM_EXPORTS_PS},
491 {0, 0, R_0288CC_SQ_PGM_CF_OFFSET_PS},
492 {0, 0, R_008958_VGT_PRIMITIVE_TYPE},
493 {0, 0, R_028400_VGT_MAX_VTX_INDX},
494 {0, 0, R_028404_VGT_MIN_VTX_INDX},
495 {0, 0, R_028408_VGT_INDX_OFFSET},
496 {0, 0, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX},
497 {0, 0, R_028A84_VGT_PRIMITIVEID_EN},
498 {0, 0, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN},
499 {0, 0, R_028AA0_VGT_INSTANCE_STEP_RATE_0},
500 {0, 0, R_028AA4_VGT_INSTANCE_STEP_RATE_1},
501 };
502
503 /* SHADER CONSTANT R600/R700 */
504 static int r600_state_constant_init(struct r600_context *ctx, u32 offset)
505 {
506 struct r600_reg r600_shader_constant[] = {
507 {0, 0, R_030000_SQ_ALU_CONSTANT0_0},
508 {0, 0, R_030004_SQ_ALU_CONSTANT1_0},
509 {0, 0, R_030008_SQ_ALU_CONSTANT2_0},
510 {0, 0, R_03000C_SQ_ALU_CONSTANT3_0},
511 };
512 unsigned nreg = sizeof(r600_shader_constant)/sizeof(struct r600_reg);
513
514 for (int i = 0; i < nreg; i++) {
515 r600_shader_constant[i].offset += offset;
516 }
517 return r600_context_add_block(ctx, r600_shader_constant, nreg);
518 }
519
520 /* SHADER RESOURCE R600/R700 */
521 static int r600_state_resource_init(struct r600_context *ctx, u32 offset)
522 {
523 struct r600_reg r600_shader_resource[] = {
524 {0, 0, R_038000_RESOURCE0_WORD0},
525 {0, 0, R_038004_RESOURCE0_WORD1},
526 {1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), R_038008_RESOURCE0_WORD2},
527 {1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), R_03800C_RESOURCE0_WORD3},
528 {0, 0, R_038010_RESOURCE0_WORD4},
529 {0, 0, R_038014_RESOURCE0_WORD5},
530 {0, 0, R_038018_RESOURCE0_WORD6},
531 };
532 unsigned nreg = sizeof(r600_shader_resource)/sizeof(struct r600_reg);
533
534 for (int i = 0; i < nreg; i++) {
535 r600_shader_resource[i].offset += offset;
536 }
537 return r600_context_add_block(ctx, r600_shader_resource, nreg);
538 }
539
540 /* SHADER SAMPLER R600/R700 */
541 static int r600_state_sampler_init(struct r600_context *ctx, u32 offset)
542 {
543 struct r600_reg r600_shader_sampler[] = {
544 {0, 0, R_03C000_SQ_TEX_SAMPLER_WORD0_0},
545 {0, 0, R_03C004_SQ_TEX_SAMPLER_WORD1_0},
546 {0, 0, R_03C008_SQ_TEX_SAMPLER_WORD2_0},
547 };
548 unsigned nreg = sizeof(r600_shader_sampler)/sizeof(struct r600_reg);
549
550 for (int i = 0; i < nreg; i++) {
551 r600_shader_sampler[i].offset += offset;
552 }
553 return r600_context_add_block(ctx, r600_shader_sampler, nreg);
554 }
555
556 /* SHADER SAMPLER BORDER R600/R700 */
557 static int r600_state_sampler_border_init(struct r600_context *ctx, u32 offset)
558 {
559 struct r600_reg r600_shader_sampler_border[] = {
560 {0, 0, R_00A400_TD_PS_SAMPLER0_BORDER_RED},
561 {0, 0, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN},
562 {0, 0, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE},
563 {0, 0, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA},
564 };
565 unsigned nreg = sizeof(r600_shader_sampler_border)/sizeof(struct r600_reg);
566
567 for (int i = 0; i < nreg; i++) {
568 r600_shader_sampler_border[i].offset += offset;
569 }
570 return r600_context_add_block(ctx, r600_shader_sampler_border, nreg);
571 }
572
573 /* initialize */
574 void r600_context_fini(struct r600_context *ctx)
575 {
576 for (int i = 0; i < ctx->ngroups; i++) {
577 r600_group_fini(&ctx->groups[i]);
578 }
579 free(ctx->reloc);
580 free(ctx->pm4);
581 memset(ctx, 0, sizeof(struct r600_context));
582 }
583
584 int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
585 {
586 int r;
587
588 memset(ctx, 0, sizeof(struct r600_context));
589 ctx->radeon = radeon;
590 LIST_INITHEAD(&ctx->query_list);
591 /* initialize groups */
592 r = r600_group_init(&ctx->groups[R600_GROUP_CONFIG], R600_CONFIG_REG_OFFSET, R600_CONFIG_REG_END);
593 if (r) {
594 goto out_err;
595 }
596 r = r600_group_init(&ctx->groups[R600_GROUP_CTL_CONST], R600_CTL_CONST_OFFSET, R600_CTL_CONST_END);
597 if (r) {
598 goto out_err;
599 }
600 r = r600_group_init(&ctx->groups[R600_GROUP_LOOP_CONST], R600_LOOP_CONST_OFFSET, R600_LOOP_CONST_END);
601 if (r) {
602 goto out_err;
603 }
604 r = r600_group_init(&ctx->groups[R600_GROUP_BOOL_CONST], R600_BOOL_CONST_OFFSET, R600_BOOL_CONST_END);
605 if (r) {
606 goto out_err;
607 }
608 r = r600_group_init(&ctx->groups[R600_GROUP_SAMPLER], R600_SAMPLER_OFFSET, R600_SAMPLER_END);
609 if (r) {
610 goto out_err;
611 }
612 r = r600_group_init(&ctx->groups[R600_GROUP_RESOURCE], R600_RESOURCE_OFFSET, R600_RESOURCE_END);
613 if (r) {
614 goto out_err;
615 }
616 r = r600_group_init(&ctx->groups[R600_GROUP_ALU_CONST], R600_ALU_CONST_OFFSET, R600_ALU_CONST_END);
617 if (r) {
618 goto out_err;
619 }
620 r = r600_group_init(&ctx->groups[R600_GROUP_CONTEXT], R600_CONTEXT_REG_OFFSET, R600_CONTEXT_REG_END);
621 if (r) {
622 goto out_err;
623 }
624 ctx->ngroups = R600_NGROUPS;
625
626 /* add blocks */
627 r = r600_context_add_block(ctx, r600_reg_list, sizeof(r600_reg_list)/sizeof(struct r600_reg));
628 if (r)
629 goto out_err;
630
631 /* PS SAMPLER BORDER */
632 for (int j = 0, offset = 0; j < 18; j++, offset += 0x10) {
633 r = r600_state_sampler_border_init(ctx, offset);
634 if (r)
635 goto out_err;
636 }
637
638 /* VS SAMPLER BORDER */
639 for (int j = 0, offset = 0x200; j < 18; j++, offset += 0x10) {
640 r = r600_state_sampler_border_init(ctx, offset);
641 if (r)
642 goto out_err;
643 }
644 /* PS SAMPLER */
645 for (int j = 0, offset = 0; j < 18; j++, offset += 0xC) {
646 r = r600_state_sampler_init(ctx, offset);
647 if (r)
648 goto out_err;
649 }
650 /* VS SAMPLER */
651 for (int j = 0, offset = 0xD8; j < 18; j++, offset += 0xC) {
652 r = r600_state_sampler_init(ctx, offset);
653 if (r)
654 goto out_err;
655 }
656 /* PS RESOURCE */
657 for (int j = 0, offset = 0; j < 160; j++, offset += 0x1C) {
658 r = r600_state_resource_init(ctx, offset);
659 if (r)
660 goto out_err;
661 }
662 /* VS RESOURCE */
663 for (int j = 0, offset = 0x1180; j < 160; j++, offset += 0x1C) {
664 r = r600_state_resource_init(ctx, offset);
665 if (r)
666 goto out_err;
667 }
668 /* PS CONSTANT */
669 for (int j = 0, offset = 0; j < 256; j++, offset += 0x10) {
670 r = r600_state_constant_init(ctx, offset);
671 if (r)
672 goto out_err;
673 }
674 /* VS CONSTANT */
675 for (int j = 0, offset = 0x1000; j < 256; j++, offset += 0x10) {
676 r = r600_state_constant_init(ctx, offset);
677 if (r)
678 goto out_err;
679 }
680
681 /* allocate cs variables */
682 ctx->nreloc = RADEON_CTX_MAX_PM4;
683 ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc));
684 if (ctx->reloc == NULL) {
685 r = -ENOMEM;
686 goto out_err;
687 }
688 ctx->bo = calloc(ctx->nreloc, sizeof(void *));
689 if (ctx->bo == NULL) {
690 r = -ENOMEM;
691 goto out_err;
692 }
693 ctx->pm4_ndwords = RADEON_CTX_MAX_PM4;
694 ctx->pm4 = calloc(ctx->pm4_ndwords, 4);
695 if (ctx->pm4 == NULL) {
696 r = -ENOMEM;
697 goto out_err;
698 }
699 return 0;
700 out_err:
701 r600_context_fini(ctx);
702 return r;
703 }
704
705 static void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct radeon_bo *bo)
706 {
707 int i, reloc_id;
708
709 assert(bo != NULL);
710 for (i = 0, reloc_id = -1; i < ctx->creloc; i++) {
711 if (ctx->reloc[i].handle == bo->handle) {
712 reloc_id = i * sizeof(struct r600_reloc) / 4;
713 /* set PKT3 to point to proper reloc */
714 *pm4 = reloc_id;
715 }
716 }
717 if (reloc_id == -1) {
718 /* add new relocation */
719 if (ctx->creloc >= ctx->nreloc) {
720 r600_context_flush(ctx);
721 }
722 reloc_id = ctx->creloc * sizeof(struct r600_reloc) / 4;
723 ctx->reloc[ctx->creloc].handle = bo->handle;
724 ctx->reloc[ctx->creloc].read_domain = RADEON_GEM_DOMAIN_GTT;
725 ctx->reloc[ctx->creloc].write_domain = RADEON_GEM_DOMAIN_GTT;
726 ctx->reloc[ctx->creloc].flags = 0;
727 radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo);
728 ctx->creloc++;
729 /* set PKT3 to point to proper reloc */
730 *pm4 = reloc_id;
731 }
732 }
733
734 void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state)
735 {
736 struct r600_group *group;
737 struct r600_group_block *block;
738
739 for (int i = 0; i < state->nregs; i++) {
740 unsigned id;
741 group = &ctx->groups[state->regs[i].group_id];
742 id = group->offset_block_id[(state->regs[i].offset - group->start_offset) >> 2];
743 block = &group->blocks[id];
744 id = (state->regs[i].offset - block->start_offset) >> 2;
745 block->pm4[id] &= ~state->regs[i].mask;
746 block->pm4[id] |= state->regs[i].value;
747 if (block->pm4_bo_index[id]) {
748 /* find relocation */
749 id = block->pm4_bo_index[id];
750 radeon_ws_bo_reference(ctx->radeon, &block->reloc[id].bo, state->regs[i].bo);
751 }
752 block->status |= R600_BLOCK_STATUS_ENABLED;
753 block->status |= R600_BLOCK_STATUS_DIRTY;
754 ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
755 }
756 }
757
758 static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
759 {
760 struct r600_group_block *block;
761 unsigned id;
762
763 offset -= ctx->groups[R600_GROUP_RESOURCE].start_offset;
764 id = ctx->groups[R600_GROUP_RESOURCE].offset_block_id[offset >> 2];
765 block = &ctx->groups[R600_GROUP_RESOURCE].blocks[id];
766 block->pm4[0] = state->regs[0].value;
767 block->pm4[1] = state->regs[1].value;
768 block->pm4[2] = state->regs[2].value;
769 block->pm4[3] = state->regs[3].value;
770 block->pm4[4] = state->regs[4].value;
771 block->pm4[5] = state->regs[5].value;
772 block->pm4[6] = state->regs[6].value;
773 radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, block->reloc[1].bo);
774 radeon_ws_bo_reference(ctx->radeon , &block->reloc[2].bo, block->reloc[2].bo);
775 if (state->regs[0].bo) {
776 /* VERTEX RESOURCE, we preted there is 2 bo to relocate so
777 * we have single case btw VERTEX & TEXTURE resource
778 */
779 radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo);
780 radeon_ws_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo);
781 } else {
782 /* TEXTURE RESOURCE */
783 radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo);
784 radeon_ws_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
785 }
786 block->status |= R600_BLOCK_STATUS_ENABLED;
787 block->status |= R600_BLOCK_STATUS_DIRTY;
788 ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
789 }
790
791 void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
792 {
793 unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1C * rid;
794
795 r600_context_pipe_state_set_resource(ctx, state, offset);
796 }
797
798 void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
799 {
800 unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1180 + 0x1C * rid;
801
802 r600_context_pipe_state_set_resource(ctx, state, offset);
803 }
804
805 static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
806 {
807 struct r600_group_block *block;
808 unsigned id;
809
810 offset -= ctx->groups[R600_GROUP_SAMPLER].start_offset;
811 id = ctx->groups[R600_GROUP_SAMPLER].offset_block_id[offset >> 2];
812 block = &ctx->groups[R600_GROUP_SAMPLER].blocks[id];
813 block->pm4[0] = state->regs[0].value;
814 block->pm4[1] = state->regs[1].value;
815 block->pm4[2] = state->regs[2].value;
816 block->status |= R600_BLOCK_STATUS_ENABLED;
817 block->status |= R600_BLOCK_STATUS_DIRTY;
818 ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
819 }
820
821 static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
822 {
823 struct r600_group_block *block;
824 unsigned id;
825
826 offset -= ctx->groups[R600_GROUP_CONFIG].start_offset;
827 id = ctx->groups[R600_GROUP_CONFIG].offset_block_id[offset >> 2];
828 block = &ctx->groups[R600_GROUP_CONFIG].blocks[id];
829 block->pm4[0] = state->regs[3].value;
830 block->pm4[1] = state->regs[4].value;
831 block->pm4[2] = state->regs[5].value;
832 block->pm4[3] = state->regs[6].value;
833 block->status |= R600_BLOCK_STATUS_ENABLED;
834 block->status |= R600_BLOCK_STATUS_DIRTY;
835 ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
836 }
837
838 void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
839 {
840 unsigned offset;
841
842 offset = 0x0003C000 + id * 0xc;
843 r600_context_pipe_state_set_sampler(ctx, state, offset);
844 if (state->nregs > 3) {
845 offset = 0x0000A400 + id * 0x10;
846 r600_context_pipe_state_set_sampler_border(ctx, state, offset);
847 }
848 }
849
850 void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
851 {
852 unsigned offset;
853
854 offset = 0x0003C0D8 + id * 0xc;
855 r600_context_pipe_state_set_sampler(ctx, state, offset);
856 if (state->nregs > 3) {
857 offset = 0x0000A600 + id * 0x10;
858 r600_context_pipe_state_set_sampler_border(ctx, state, offset);
859 }
860 }
861
862 static inline void r600_context_group_emit_dirty(struct r600_context *ctx, struct r600_group *group, unsigned opcode)
863 {
864 struct radeon_bo *bo;
865 int id;
866
867 for (int i = 0; i < group->nblocks; i++) {
868 struct r600_group_block *block = &group->blocks[i];
869 if (block->status & R600_BLOCK_STATUS_DIRTY) {
870 for (int j = 0; j < block->nreg; j++) {
871 if (block->pm4_bo_index[j]) {
872 /* find relocation */
873 id = block->pm4_bo_index[j];
874 bo = radeon_bo_pb_get_bo(block->reloc[id].bo->pb);
875 for (int k = 0; k < block->reloc[id].nreloc; k++) {
876 r600_context_bo_reloc(ctx, &block->pm4[block->reloc[id].bo_pm4_index[k]], bo);
877 }
878 }
879 }
880
881 ctx->pm4[ctx->pm4_cdwords++] = PKT3(opcode, block->nreg);
882 ctx->pm4[ctx->pm4_cdwords++] = (block->start_offset - group->start_offset) >> 2;
883 memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4);
884 ctx->pm4_cdwords += block->pm4_ndwords;
885 block->status ^= R600_BLOCK_STATUS_DIRTY;
886 }
887 }
888 }
889
890 void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
891 {
892 unsigned ndwords = 9;
893
894 if (draw->indices) {
895 ndwords = 13;
896 /* make sure there is enough relocation space before scheduling draw */
897 if (ctx->creloc >= (ctx->nreloc - 1)) {
898 r600_context_flush(ctx);
899 }
900 }
901 if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
902 /* need to flush */
903 r600_context_flush(ctx);
904 }
905 /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */
906 if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) {
907 R600_ERR("context is too big to be scheduled\n");
908 return;
909 }
910 /* enough room to copy packet */
911 r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONFIG], PKT3_SET_CONFIG_REG);
912 r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONTEXT], PKT3_SET_CONTEXT_REG);
913 r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_ALU_CONST], PKT3_SET_ALU_CONST);
914 r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_SAMPLER], PKT3_SET_SAMPLER);
915 r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_RESOURCE], PKT3_SET_RESOURCE);
916 /* draw packet */
917 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0);
918 ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type;
919 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0);
920 ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances;
921 if (draw->indices) {
922 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3);
923 ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset;
924 ctx->pm4[ctx->pm4_cdwords++] = 0;
925 ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
926 ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
927 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
928 ctx->pm4[ctx->pm4_cdwords++] = 0;
929 r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], radeon_bo_pb_get_bo(draw->indices->pb));
930 } else {
931 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1);
932 ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
933 ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
934 }
935 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0);
936 ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT;
937 /* all dirty state have been scheduled in current cs */
938 ctx->pm4_dirty_cdwords = 0;
939 }
940
941 void r600_context_flush(struct r600_context *ctx)
942 {
943 struct drm_radeon_cs drmib;
944 struct drm_radeon_cs_chunk chunks[2];
945 uint64_t chunk_array[2];
946 struct r600_group_block *block;
947 int r;
948
949 if (!ctx->pm4_cdwords)
950 return;
951
952 /* suspend queries */
953 r600_context_queries_suspend(ctx);
954
955 #if 1
956 /* emit cs */
957 drmib.num_chunks = 2;
958 drmib.chunks = (uint64_t)(uintptr_t)chunk_array;
959 chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
960 chunks[0].length_dw = ctx->pm4_cdwords;
961 chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4;
962 chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
963 chunks[1].length_dw = ctx->creloc * sizeof(struct r600_reloc) / 4;
964 chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc;
965 chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0];
966 chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1];
967 r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib,
968 sizeof(struct drm_radeon_cs));
969 #endif
970 /* restart */
971 for (int i = 0; i < ctx->creloc; i++) {
972 radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
973 }
974 ctx->creloc = 0;
975 ctx->pm4_dirty_cdwords = 0;
976 ctx->pm4_cdwords = 0;
977
978 /* resume queries */
979 r600_context_queries_resume(ctx);
980
981 /* set all valid group as dirty so they get reemited on
982 * next draw command
983 */
984 for (int i = 0; i < ctx->ngroups; i++) {
985 for (int j = 0; j < ctx->groups[i].nblocks; j++) {
986 /* mark enabled block as dirty */
987 block = &ctx->groups[i].blocks[j];
988 if (block->status & R600_BLOCK_STATUS_ENABLED) {
989 ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
990 block->status |= R600_BLOCK_STATUS_DIRTY;
991 }
992 }
993 }
994 }
995
996 void r600_context_dump_bof(struct r600_context *ctx, const char *file)
997 {
998 bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root;
999 unsigned i;
1000
1001 root = device_id = bcs = blob = array = bo = size = handle = NULL;
1002 root = bof_object();
1003 if (root == NULL)
1004 goto out_err;
1005 device_id = bof_int32(ctx->radeon->device);
1006 if (device_id == NULL)
1007 goto out_err;
1008 if (bof_object_set(root, "device_id", device_id))
1009 goto out_err;
1010 bof_decref(device_id);
1011 device_id = NULL;
1012 /* dump relocs */
1013 blob = bof_blob(ctx->creloc * 16, ctx->reloc);
1014 if (blob == NULL)
1015 goto out_err;
1016 if (bof_object_set(root, "reloc", blob))
1017 goto out_err;
1018 bof_decref(blob);
1019 blob = NULL;
1020 /* dump cs */
1021 blob = bof_blob(ctx->pm4_cdwords * 4, ctx->pm4);
1022 if (blob == NULL)
1023 goto out_err;
1024 if (bof_object_set(root, "pm4", blob))
1025 goto out_err;
1026 bof_decref(blob);
1027 blob = NULL;
1028 /* dump bo */
1029 array = bof_array();
1030 if (array == NULL)
1031 goto out_err;
1032 for (i = 0; i < ctx->creloc; i++) {
1033 struct radeon_bo *rbo = ctx->bo[i];
1034 bo = bof_object();
1035 if (bo == NULL)
1036 goto out_err;
1037 size = bof_int32(rbo->size);
1038 if (size == NULL)
1039 goto out_err;
1040 if (bof_object_set(bo, "size", size))
1041 goto out_err;
1042 bof_decref(size);
1043 size = NULL;
1044 handle = bof_int32(rbo->handle);
1045 if (handle == NULL)
1046 goto out_err;
1047 if (bof_object_set(bo, "handle", handle))
1048 goto out_err;
1049 bof_decref(handle);
1050 handle = NULL;
1051 radeon_bo_map(ctx->radeon, rbo);
1052 blob = bof_blob(rbo->size, rbo->data);
1053 radeon_bo_unmap(ctx->radeon, rbo);
1054 if (blob == NULL)
1055 goto out_err;
1056 if (bof_object_set(bo, "data", blob))
1057 goto out_err;
1058 bof_decref(blob);
1059 blob = NULL;
1060 if (bof_array_append(array, bo))
1061 goto out_err;
1062 bof_decref(bo);
1063 bo = NULL;
1064 }
1065 if (bof_object_set(root, "bo", array))
1066 goto out_err;
1067 bof_dump_file(root, file);
1068 out_err:
1069 bof_decref(blob);
1070 bof_decref(array);
1071 bof_decref(bo);
1072 bof_decref(size);
1073 bof_decref(handle);
1074 bof_decref(device_id);
1075 bof_decref(root);
1076 }
1077
1078 static void r600_query_result(struct r600_context *ctx, struct r600_query *query)
1079 {
1080 u64 start, end;
1081 u32 *results;
1082 int i;
1083
1084 results = radeon_ws_bo_map(ctx->radeon, query->buffer, 0, NULL);
1085 for (i = 0; i < query->num_results; i += 4) {
1086 start = (u64)results[i] | (u64)results[i + 1] << 32;
1087 end = (u64)results[i + 2] | (u64)results[i + 3] << 32;
1088 if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) {
1089 query->result += end - start;
1090 }
1091 }
1092 radeon_ws_bo_unmap(ctx->radeon, query->buffer);
1093 query->num_results = 0;
1094 }
1095
1096 void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
1097 {
1098 /* query request needs 6 dwords for begin + 6 dwords for end */
1099 if ((12 + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
1100 /* need to flush */
1101 r600_context_flush(ctx);
1102 }
1103
1104 /* if query buffer is full force a flush */
1105 if (query->num_results >= ((query->buffer_size >> 2) - 2)) {
1106 r600_context_flush(ctx);
1107 r600_query_result(ctx, query);
1108 }
1109
1110 /* emit begin query */
1111 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2);
1112 ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_ZPASS_DONE;
1113 ctx->pm4[ctx->pm4_cdwords++] = query->num_results;
1114 ctx->pm4[ctx->pm4_cdwords++] = 0;
1115 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
1116 ctx->pm4[ctx->pm4_cdwords++] = 0;
1117 r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], radeon_bo_pb_get_bo(query->buffer->pb));
1118
1119 query->state |= R600_QUERY_STATE_STARTED;
1120 query->state ^= R600_QUERY_STATE_ENDED;
1121 }
1122
1123 void r600_query_end(struct r600_context *ctx, struct r600_query *query)
1124 {
1125 /* emit begin query */
1126 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2);
1127 ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_ZPASS_DONE;
1128 ctx->pm4[ctx->pm4_cdwords++] = query->num_results + 8;
1129 ctx->pm4[ctx->pm4_cdwords++] = 0;
1130 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
1131 ctx->pm4[ctx->pm4_cdwords++] = 0;
1132 r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], radeon_bo_pb_get_bo(query->buffer->pb));
1133
1134 query->num_results += 16;
1135 query->state ^= R600_QUERY_STATE_STARTED;
1136 query->state |= R600_QUERY_STATE_ENDED;
1137 }
1138
1139 struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type)
1140 {
1141 struct r600_query *query;
1142
1143 if (query_type != PIPE_QUERY_OCCLUSION_COUNTER)
1144 return NULL;
1145
1146 query = calloc(1, sizeof(struct r600_query));
1147 if (query == NULL)
1148 return NULL;
1149
1150 query->type = query_type;
1151 query->buffer_size = 4096;
1152
1153 query->buffer = radeon_ws_bo(ctx->radeon, query->buffer_size, 1, 0);
1154 if (!query->buffer) {
1155 free(query);
1156 return NULL;
1157 }
1158
1159 LIST_ADDTAIL(&query->list, &ctx->query_list);
1160
1161 return query;
1162 }
1163
1164 void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
1165 {
1166 radeon_ws_bo_reference(ctx->radeon, &query->buffer, NULL);
1167 LIST_DEL(&query->list);
1168 free(query);
1169 }
1170
1171 boolean r600_context_query_result(struct r600_context *ctx,
1172 struct r600_query *query,
1173 boolean wait, void *vresult)
1174 {
1175 uint64_t *result = (uint64_t*)vresult;
1176
1177 if (query->num_results) {
1178 r600_context_flush(ctx);
1179 }
1180 r600_query_result(ctx, query);
1181 *result = query->result;
1182 query->result = 0;
1183 return TRUE;
1184 }
1185
1186 static void r600_context_queries_suspend(struct r600_context *ctx)
1187 {
1188 struct r600_query *query;
1189
1190 LIST_FOR_EACH_ENTRY(query, &ctx->query_list, list) {
1191 if (query->state & R600_QUERY_STATE_STARTED) {
1192 r600_query_end(ctx, query);
1193 query->state |= R600_QUERY_STATE_SUSPENDED;
1194 }
1195 }
1196 }
1197
1198 static void r600_context_queries_resume(struct r600_context *ctx)
1199 {
1200 struct r600_query *query;
1201
1202 LIST_FOR_EACH_ENTRY(query, &ctx->query_list, list) {
1203 if (query->state & R600_QUERY_STATE_SUSPENDED) {
1204 r600_query_begin(ctx, query);
1205 query->state ^= R600_QUERY_STATE_SUSPENDED;
1206 }
1207 }
1208 }