c29616c9cbc03d5b8ec8373ed0bc330448409d82
[mesa.git] / src / gallium / winsys / r600 / drm / r600_state2.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Jerome Glisse
25 */
26 #include <errno.h>
27 #include <stdint.h>
28 #include <string.h>
29 #include <stdlib.h>
30 #include <assert.h>
31 #include "xf86drm.h"
32 #include "r600.h"
33 #include "r600d.h"
34 #include "r600_priv.h"
35 #include "radeon_drm.h"
36 #include "bof.h"
37 #include "pipe/p_compiler.h"
38 #include "util/u_inlines.h"
39 #include <pipebuffer/pb_bufmgr.h>
40
41 #define GROUP_FORCE_NEW_BLOCK 0
42 struct radeon_ws_bo {
43 struct pipe_reference reference;
44 struct pb_buffer *pb;
45 };
46
47 struct radeon_bo {
48 struct pipe_reference reference;
49 unsigned handle;
50 unsigned size;
51 unsigned alignment;
52 unsigned map_count;
53 void *data;
54 };
55 struct radeon_bo *radeon_bo_pb_get_bo(struct pb_buffer *_buf);
56 int radeon_bo_map(struct radeon *radeon, struct radeon_bo *bo);
57 void radeon_bo_unmap(struct radeon *radeon, struct radeon_bo *bo);
58 void radeon_bo_reference(struct radeon *radeon,
59 struct radeon_bo **dst,
60 struct radeon_bo *src);
61
62 unsigned radeon_ws_bo_get_handle(struct radeon_ws_bo *pb_bo);
63
64 /* queries */
65 static void r600_context_queries_suspend(struct r600_context *ctx);
66 static void r600_context_queries_resume(struct r600_context *ctx);
67
68 static int r600_group_id_register_offset(struct r600_context *ctx, unsigned offset)
69 {
70 for (int i = 0; i < ctx->ngroups; i++) {
71 if (offset >= ctx->groups[i].start_offset && offset <= ctx->groups[i].end_offset) {
72 return i;
73 }
74 }
75 return -1;
76 }
77
78 int r600_context_add_block(struct r600_context *ctx, const struct r600_reg *reg, unsigned nreg)
79 {
80 struct r600_group_block *block, *tmp;
81 struct r600_group *group;
82 int group_id, id;
83
84 for (unsigned i = 0, n = 0; i < nreg; i += n) {
85 u32 j, r;
86
87 /* register that need relocation are in their own group */
88 n = 1;
89 if (!reg[i].need_bo) {
90 /* find number of consecutive registers */
91 for (j = i + 1, r = reg[i].offset + 4, n = 1; j < (nreg - i); j++, n++, r+=4) {
92 if (reg[i].need_bo || r != reg[j].offset) {
93 break;
94 }
95 }
96 }
97
98 /* ignore new block balise */
99 if (reg[i].offset == GROUP_FORCE_NEW_BLOCK)
100 continue;
101
102 /* find into which group this block is */
103 group_id = r600_group_id_register_offset(ctx, reg[i].offset);
104 assert(group_id >= 0);
105 group = &ctx->groups[group_id];
106
107 /* allocate new block */
108 tmp = realloc(group->blocks, (group->nblocks + 1) * sizeof(struct r600_group_block));
109 if (tmp == NULL) {
110 return -ENOMEM;
111 }
112 group->blocks = tmp;
113 block = &group->blocks[group->nblocks++];
114 for (int j = 0; j < n; j++) {
115 group->offset_block_id[((reg[i].offset - group->start_offset) >> 2) + j] = group->nblocks - 1;
116 }
117
118 /* initialize block */
119 memset(block, 0, sizeof(struct r600_group_block));
120 block->start_offset = reg[i].offset;
121 block->pm4_ndwords = n;
122 block->nreg = n;
123 for (j = 0; j < n; j++) {
124 if (reg[i+j].need_bo) {
125 block->nbo++;
126 assert(block->nbo < R600_BLOCK_MAX_BO);
127 block->pm4_bo_index[j] = block->nbo;
128 block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0);
129 block->pm4[block->pm4_ndwords++] = 0x00000000;
130 block->reloc[block->nbo].bo_pm4_index[block->reloc[block->nbo].nreloc++] = block->pm4_ndwords - 1;
131 }
132 }
133 for (j = 0; j < n; j++) {
134 if (reg[i+j].flush_flags) {
135 block->pm4[block->pm4_ndwords++] = PKT3(PKT3_SURFACE_SYNC, 3);
136 block->pm4[block->pm4_ndwords++] = reg[i+j].flush_flags;
137 block->pm4[block->pm4_ndwords++] = 0xFFFFFFFF;
138 block->pm4[block->pm4_ndwords++] = 0x00000000;
139 block->pm4[block->pm4_ndwords++] = 0x0000000A;
140 block->pm4[block->pm4_ndwords++] = PKT3(PKT3_NOP, 0);
141 block->pm4[block->pm4_ndwords++] = 0x00000000;
142 id = block->pm4_bo_index[j];
143 block->reloc[id].bo_pm4_index[block->reloc[id].nreloc++] = block->pm4_ndwords - 1;
144 }
145 }
146 /* check that we stay in limit */
147 assert(block->pm4_ndwords < R600_BLOCK_MAX_REG);
148 }
149 return 0;
150 }
151
152 int r600_group_init(struct r600_group *group, unsigned start_offset, unsigned end_offset)
153 {
154 group->start_offset = start_offset;
155 group->end_offset = end_offset;
156 group->nblocks = 0;
157 group->blocks = NULL;
158 group->offset_block_id = calloc((end_offset - start_offset) >> 2, sizeof(unsigned));
159 if (group->offset_block_id == NULL)
160 return -ENOMEM;
161 return 0;
162 }
163
164 static void r600_group_fini(struct r600_group *group)
165 {
166 free(group->offset_block_id);
167 free(group->blocks);
168 }
169
170 /* R600/R700 configuration */
171 static const struct r600_reg r600_reg_list[] = {
172 {0, 0, R_008C00_SQ_CONFIG},
173 {0, 0, R_008C04_SQ_GPR_RESOURCE_MGMT_1},
174 {0, 0, R_008C08_SQ_GPR_RESOURCE_MGMT_2},
175 {0, 0, R_008C0C_SQ_THREAD_RESOURCE_MGMT},
176 {0, 0, R_008C10_SQ_STACK_RESOURCE_MGMT_1},
177 {0, 0, R_008C14_SQ_STACK_RESOURCE_MGMT_2},
178 {0, 0, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ},
179 {0, 0, R_009508_TA_CNTL_AUX},
180 {0, 0, R_009714_VC_ENHANCE},
181 {0, 0, R_009830_DB_DEBUG},
182 {0, 0, R_009838_DB_WATERMARKS},
183 {0, 0, R_028350_SX_MISC},
184 {0, 0, R_0286C8_SPI_THREAD_GROUPING},
185 {0, 0, R_0288A8_SQ_ESGS_RING_ITEMSIZE},
186 {0, 0, R_0288AC_SQ_GSVS_RING_ITEMSIZE},
187 {0, 0, R_0288B0_SQ_ESTMP_RING_ITEMSIZE},
188 {0, 0, R_0288B4_SQ_GSTMP_RING_ITEMSIZE},
189 {0, 0, R_0288B8_SQ_VSTMP_RING_ITEMSIZE},
190 {0, 0, R_0288BC_SQ_PSTMP_RING_ITEMSIZE},
191 {0, 0, R_0288C0_SQ_FBUF_RING_ITEMSIZE},
192 {0, 0, R_0288C4_SQ_REDUC_RING_ITEMSIZE},
193 {0, 0, R_0288C8_SQ_GS_VERT_ITEMSIZE},
194 {0, 0, R_028A10_VGT_OUTPUT_PATH_CNTL},
195 {0, 0, R_028A14_VGT_HOS_CNTL},
196 {0, 0, R_028A18_VGT_HOS_MAX_TESS_LEVEL},
197 {0, 0, R_028A1C_VGT_HOS_MIN_TESS_LEVEL},
198 {0, 0, R_028A20_VGT_HOS_REUSE_DEPTH},
199 {0, 0, R_028A24_VGT_GROUP_PRIM_TYPE},
200 {0, 0, R_028A28_VGT_GROUP_FIRST_DECR},
201 {0, 0, R_028A2C_VGT_GROUP_DECR},
202 {0, 0, R_028A30_VGT_GROUP_VECT_0_CNTL},
203 {0, 0, R_028A34_VGT_GROUP_VECT_1_CNTL},
204 {0, 0, R_028A38_VGT_GROUP_VECT_0_FMT_CNTL},
205 {0, 0, R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL},
206 {0, 0, R_028A40_VGT_GS_MODE},
207 {0, 0, R_028A4C_PA_SC_MODE_CNTL},
208 {0, 0, R_028AB0_VGT_STRMOUT_EN},
209 {0, 0, R_028AB4_VGT_REUSE_OFF},
210 {0, 0, R_028AB8_VGT_VTX_CNT_EN},
211 {0, 0, R_028B20_VGT_STRMOUT_BUFFER_EN},
212 {0, 0, R_028028_DB_STENCIL_CLEAR},
213 {0, 0, R_02802C_DB_DEPTH_CLEAR},
214 {1, 0, R_028040_CB_COLOR0_BASE},
215 {0, 0, R_0280A0_CB_COLOR0_INFO},
216 {0, 0, R_028060_CB_COLOR0_SIZE},
217 {0, 0, R_028080_CB_COLOR0_VIEW},
218 {1, 0, R_0280E0_CB_COLOR0_FRAG},
219 {1, 0, R_0280C0_CB_COLOR0_TILE},
220 {0, 0, R_028100_CB_COLOR0_MASK},
221 {1, 0, R_028044_CB_COLOR1_BASE},
222 {0, 0, R_0280A4_CB_COLOR1_INFO},
223 {0, 0, R_028064_CB_COLOR1_SIZE},
224 {0, 0, R_028084_CB_COLOR1_VIEW},
225 {1, 0, R_0280E4_CB_COLOR1_FRAG},
226 {1, 0, R_0280C4_CB_COLOR1_TILE},
227 {0, 0, R_028104_CB_COLOR1_MASK},
228 {1, 0, R_028048_CB_COLOR2_BASE},
229 {0, 0, R_0280A8_CB_COLOR2_INFO},
230 {0, 0, R_028068_CB_COLOR2_SIZE},
231 {0, 0, R_028088_CB_COLOR2_VIEW},
232 {1, 0, R_0280E8_CB_COLOR2_FRAG},
233 {1, 0, R_0280C8_CB_COLOR2_TILE},
234 {0, 0, R_028108_CB_COLOR2_MASK},
235 {1, 0, R_02804C_CB_COLOR3_BASE},
236 {0, 0, R_0280AC_CB_COLOR3_INFO},
237 {0, 0, R_02806C_CB_COLOR3_SIZE},
238 {0, 0, R_02808C_CB_COLOR3_VIEW},
239 {1, 0, R_0280EC_CB_COLOR3_FRAG},
240 {1, 0, R_0280CC_CB_COLOR3_TILE},
241 {0, 0, R_02810C_CB_COLOR3_MASK},
242 {1, 0, R_028050_CB_COLOR4_BASE},
243 {0, 0, R_0280B0_CB_COLOR4_INFO},
244 {0, 0, R_028070_CB_COLOR4_SIZE},
245 {0, 0, R_028090_CB_COLOR4_VIEW},
246 {1, 0, R_0280F0_CB_COLOR4_FRAG},
247 {1, 0, R_0280D0_CB_COLOR4_TILE},
248 {0, 0, R_028110_CB_COLOR4_MASK},
249 {1, 0, R_028054_CB_COLOR5_BASE},
250 {0, 0, R_0280B4_CB_COLOR5_INFO},
251 {0, 0, R_028074_CB_COLOR5_SIZE},
252 {0, 0, R_028094_CB_COLOR5_VIEW},
253 {1, 0, R_0280F4_CB_COLOR5_FRAG},
254 {1, 0, R_0280D4_CB_COLOR5_TILE},
255 {0, 0, R_028114_CB_COLOR5_MASK},
256 {1, 0, R_028058_CB_COLOR6_BASE},
257 {0, 0, R_0280B8_CB_COLOR6_INFO},
258 {0, 0, R_028078_CB_COLOR6_SIZE},
259 {0, 0, R_028098_CB_COLOR6_VIEW},
260 {1, 0, R_0280F8_CB_COLOR6_FRAG},
261 {1, 0, R_0280D8_CB_COLOR6_TILE},
262 {0, 0, R_028118_CB_COLOR6_MASK},
263 {1, 0, R_02805C_CB_COLOR7_BASE},
264 {0, 0, R_0280BC_CB_COLOR7_INFO},
265 {0, 0, R_02807C_CB_COLOR7_SIZE},
266 {0, 0, R_02809C_CB_COLOR7_VIEW},
267 {1, 0, R_0280FC_CB_COLOR7_FRAG},
268 {1, 0, R_0280DC_CB_COLOR7_TILE},
269 {0, 0, R_02811C_CB_COLOR7_MASK},
270 {0, 0, R_028120_CB_CLEAR_RED},
271 {0, 0, R_028124_CB_CLEAR_GREEN},
272 {0, 0, R_028128_CB_CLEAR_BLUE},
273 {0, 0, R_02812C_CB_CLEAR_ALPHA},
274 {0, 0, R_02823C_CB_SHADER_MASK},
275 {0, 0, R_028238_CB_TARGET_MASK},
276 {0, 0, R_028410_SX_ALPHA_TEST_CONTROL},
277 {0, 0, R_028414_CB_BLEND_RED},
278 {0, 0, R_028418_CB_BLEND_GREEN},
279 {0, 0, R_02841C_CB_BLEND_BLUE},
280 {0, 0, R_028420_CB_BLEND_ALPHA},
281 {0, 0, R_028424_CB_FOG_RED},
282 {0, 0, R_028428_CB_FOG_GREEN},
283 {0, 0, R_02842C_CB_FOG_BLUE},
284 {0, 0, R_028430_DB_STENCILREFMASK},
285 {0, 0, R_028434_DB_STENCILREFMASK_BF},
286 {0, 0, R_028438_SX_ALPHA_REF},
287 {0, 0, R_0286DC_SPI_FOG_CNTL},
288 {0, 0, R_0286E0_SPI_FOG_FUNC_SCALE},
289 {0, 0, R_0286E4_SPI_FOG_FUNC_BIAS},
290 {0, 0, R_028780_CB_BLEND0_CONTROL},
291 {0, 0, R_028784_CB_BLEND1_CONTROL},
292 {0, 0, R_028788_CB_BLEND2_CONTROL},
293 {0, 0, R_02878C_CB_BLEND3_CONTROL},
294 {0, 0, R_028790_CB_BLEND4_CONTROL},
295 {0, 0, R_028794_CB_BLEND5_CONTROL},
296 {0, 0, R_028798_CB_BLEND6_CONTROL},
297 {0, 0, R_02879C_CB_BLEND7_CONTROL},
298 {0, 0, R_0287A0_CB_SHADER_CONTROL},
299 {0, 0, R_028800_DB_DEPTH_CONTROL},
300 {0, 0, R_028804_CB_BLEND_CONTROL},
301 {0, 0, R_028808_CB_COLOR_CONTROL},
302 {0, 0, R_02880C_DB_SHADER_CONTROL},
303 {0, 0, R_028C04_PA_SC_AA_CONFIG},
304 {0, 0, R_028C1C_PA_SC_AA_SAMPLE_LOCS_MCTX},
305 {0, 0, R_028C20_PA_SC_AA_SAMPLE_LOCS_8S_WD1_MCTX},
306 {0, 0, R_028C30_CB_CLRCMP_CONTROL},
307 {0, 0, R_028C34_CB_CLRCMP_SRC},
308 {0, 0, R_028C38_CB_CLRCMP_DST},
309 {0, 0, R_028C3C_CB_CLRCMP_MSK},
310 {0, 0, R_028C48_PA_SC_AA_MASK},
311 {0, 0, R_028D2C_DB_SRESULTS_COMPARE_STATE1},
312 {0, 0, R_028D44_DB_ALPHA_TO_MASK},
313 {1, 0, R_02800C_DB_DEPTH_BASE},
314 {0, 0, R_028000_DB_DEPTH_SIZE},
315 {0, 0, R_028004_DB_DEPTH_VIEW},
316 {0, 0, R_028010_DB_DEPTH_INFO},
317 {0, 0, R_028D0C_DB_RENDER_CONTROL},
318 {0, 0, R_028D10_DB_RENDER_OVERRIDE},
319 {0, 0, R_028D24_DB_HTILE_SURFACE},
320 {0, 0, R_028D30_DB_PRELOAD_CONTROL},
321 {0, 0, R_028D34_DB_PREFETCH_LIMIT},
322 {0, 0, R_028030_PA_SC_SCREEN_SCISSOR_TL},
323 {0, 0, R_028034_PA_SC_SCREEN_SCISSOR_BR},
324 {0, 0, R_028200_PA_SC_WINDOW_OFFSET},
325 {0, 0, R_028204_PA_SC_WINDOW_SCISSOR_TL},
326 {0, 0, R_028208_PA_SC_WINDOW_SCISSOR_BR},
327 {0, 0, R_02820C_PA_SC_CLIPRECT_RULE},
328 {0, 0, R_028210_PA_SC_CLIPRECT_0_TL},
329 {0, 0, R_028214_PA_SC_CLIPRECT_0_BR},
330 {0, 0, R_028218_PA_SC_CLIPRECT_1_TL},
331 {0, 0, R_02821C_PA_SC_CLIPRECT_1_BR},
332 {0, 0, R_028220_PA_SC_CLIPRECT_2_TL},
333 {0, 0, R_028224_PA_SC_CLIPRECT_2_BR},
334 {0, 0, R_028228_PA_SC_CLIPRECT_3_TL},
335 {0, 0, R_02822C_PA_SC_CLIPRECT_3_BR},
336 {0, 0, R_028230_PA_SC_EDGERULE},
337 {0, 0, R_028240_PA_SC_GENERIC_SCISSOR_TL},
338 {0, 0, R_028244_PA_SC_GENERIC_SCISSOR_BR},
339 {0, 0, R_028250_PA_SC_VPORT_SCISSOR_0_TL},
340 {0, 0, R_028254_PA_SC_VPORT_SCISSOR_0_BR},
341 {0, 0, R_0282D0_PA_SC_VPORT_ZMIN_0},
342 {0, 0, R_0282D4_PA_SC_VPORT_ZMAX_0},
343 {0, 0, R_02843C_PA_CL_VPORT_XSCALE_0},
344 {0, 0, R_028440_PA_CL_VPORT_XOFFSET_0},
345 {0, 0, R_028444_PA_CL_VPORT_YSCALE_0},
346 {0, 0, R_028448_PA_CL_VPORT_YOFFSET_0},
347 {0, 0, R_02844C_PA_CL_VPORT_ZSCALE_0},
348 {0, 0, R_028450_PA_CL_VPORT_ZOFFSET_0},
349 {0, 0, R_0286D4_SPI_INTERP_CONTROL_0},
350 {0, 0, R_028810_PA_CL_CLIP_CNTL},
351 {0, 0, R_028814_PA_SU_SC_MODE_CNTL},
352 {0, 0, R_028818_PA_CL_VTE_CNTL},
353 {0, 0, R_02881C_PA_CL_VS_OUT_CNTL},
354 {0, 0, R_028820_PA_CL_NANINF_CNTL},
355 {0, 0, R_028A00_PA_SU_POINT_SIZE},
356 {0, 0, R_028A04_PA_SU_POINT_MINMAX},
357 {0, 0, R_028A08_PA_SU_LINE_CNTL},
358 {0, 0, R_028A0C_PA_SC_LINE_STIPPLE},
359 {0, 0, R_028A48_PA_SC_MPASS_PS_CNTL},
360 {0, 0, R_028C00_PA_SC_LINE_CNTL},
361 {0, 0, R_028C0C_PA_CL_GB_VERT_CLIP_ADJ},
362 {0, 0, R_028C10_PA_CL_GB_VERT_DISC_ADJ},
363 {0, 0, R_028C14_PA_CL_GB_HORZ_CLIP_ADJ},
364 {0, 0, R_028C18_PA_CL_GB_HORZ_DISC_ADJ},
365 {0, 0, R_028DF8_PA_SU_POLY_OFFSET_DB_FMT_CNTL},
366 {0, 0, R_028DFC_PA_SU_POLY_OFFSET_CLAMP},
367 {0, 0, R_028E00_PA_SU_POLY_OFFSET_FRONT_SCALE},
368 {0, 0, R_028E04_PA_SU_POLY_OFFSET_FRONT_OFFSET},
369 {0, 0, R_028E08_PA_SU_POLY_OFFSET_BACK_SCALE},
370 {0, 0, R_028E0C_PA_SU_POLY_OFFSET_BACK_OFFSET},
371 {0, 0, R_028E20_PA_CL_UCP0_X},
372 {0, 0, R_028E24_PA_CL_UCP0_Y},
373 {0, 0, R_028E28_PA_CL_UCP0_Z},
374 {0, 0, R_028E2C_PA_CL_UCP0_W},
375 {0, 0, R_028E30_PA_CL_UCP1_X},
376 {0, 0, R_028E34_PA_CL_UCP1_Y},
377 {0, 0, R_028E38_PA_CL_UCP1_Z},
378 {0, 0, R_028E3C_PA_CL_UCP1_W},
379 {0, 0, R_028E40_PA_CL_UCP2_X},
380 {0, 0, R_028E44_PA_CL_UCP2_Y},
381 {0, 0, R_028E48_PA_CL_UCP2_Z},
382 {0, 0, R_028E4C_PA_CL_UCP2_W},
383 {0, 0, R_028E50_PA_CL_UCP3_X},
384 {0, 0, R_028E54_PA_CL_UCP3_Y},
385 {0, 0, R_028E58_PA_CL_UCP3_Z},
386 {0, 0, R_028E5C_PA_CL_UCP3_W},
387 {0, 0, R_028E60_PA_CL_UCP4_X},
388 {0, 0, R_028E64_PA_CL_UCP4_Y},
389 {0, 0, R_028E68_PA_CL_UCP4_Z},
390 {0, 0, R_028E6C_PA_CL_UCP4_W},
391 {0, 0, R_028E70_PA_CL_UCP5_X},
392 {0, 0, R_028E74_PA_CL_UCP5_Y},
393 {0, 0, R_028E78_PA_CL_UCP5_Z},
394 {0, 0, R_028E7C_PA_CL_UCP5_W},
395 {0, 0, R_028380_SQ_VTX_SEMANTIC_0},
396 {0, 0, R_028384_SQ_VTX_SEMANTIC_1},
397 {0, 0, R_028388_SQ_VTX_SEMANTIC_2},
398 {0, 0, R_02838C_SQ_VTX_SEMANTIC_3},
399 {0, 0, R_028390_SQ_VTX_SEMANTIC_4},
400 {0, 0, R_028394_SQ_VTX_SEMANTIC_5},
401 {0, 0, R_028398_SQ_VTX_SEMANTIC_6},
402 {0, 0, R_02839C_SQ_VTX_SEMANTIC_7},
403 {0, 0, R_0283A0_SQ_VTX_SEMANTIC_8},
404 {0, 0, R_0283A4_SQ_VTX_SEMANTIC_9},
405 {0, 0, R_0283A8_SQ_VTX_SEMANTIC_10},
406 {0, 0, R_0283AC_SQ_VTX_SEMANTIC_11},
407 {0, 0, R_0283B0_SQ_VTX_SEMANTIC_12},
408 {0, 0, R_0283B4_SQ_VTX_SEMANTIC_13},
409 {0, 0, R_0283B8_SQ_VTX_SEMANTIC_14},
410 {0, 0, R_0283BC_SQ_VTX_SEMANTIC_15},
411 {0, 0, R_0283C0_SQ_VTX_SEMANTIC_16},
412 {0, 0, R_0283C4_SQ_VTX_SEMANTIC_17},
413 {0, 0, R_0283C8_SQ_VTX_SEMANTIC_18},
414 {0, 0, R_0283CC_SQ_VTX_SEMANTIC_19},
415 {0, 0, R_0283D0_SQ_VTX_SEMANTIC_20},
416 {0, 0, R_0283D4_SQ_VTX_SEMANTIC_21},
417 {0, 0, R_0283D8_SQ_VTX_SEMANTIC_22},
418 {0, 0, R_0283DC_SQ_VTX_SEMANTIC_23},
419 {0, 0, R_0283E0_SQ_VTX_SEMANTIC_24},
420 {0, 0, R_0283E4_SQ_VTX_SEMANTIC_25},
421 {0, 0, R_0283E8_SQ_VTX_SEMANTIC_26},
422 {0, 0, R_0283EC_SQ_VTX_SEMANTIC_27},
423 {0, 0, R_0283F0_SQ_VTX_SEMANTIC_28},
424 {0, 0, R_0283F4_SQ_VTX_SEMANTIC_29},
425 {0, 0, R_0283F8_SQ_VTX_SEMANTIC_30},
426 {0, 0, R_0283FC_SQ_VTX_SEMANTIC_31},
427 {0, 0, R_028614_SPI_VS_OUT_ID_0},
428 {0, 0, R_028618_SPI_VS_OUT_ID_1},
429 {0, 0, R_02861C_SPI_VS_OUT_ID_2},
430 {0, 0, R_028620_SPI_VS_OUT_ID_3},
431 {0, 0, R_028624_SPI_VS_OUT_ID_4},
432 {0, 0, R_028628_SPI_VS_OUT_ID_5},
433 {0, 0, R_02862C_SPI_VS_OUT_ID_6},
434 {0, 0, R_028630_SPI_VS_OUT_ID_7},
435 {0, 0, R_028634_SPI_VS_OUT_ID_8},
436 {0, 0, R_028638_SPI_VS_OUT_ID_9},
437 {0, 0, R_0286C4_SPI_VS_OUT_CONFIG},
438 {1, 0, R_028858_SQ_PGM_START_VS},
439 {0, S_0085F0_SH_ACTION_ENA(1), R_028868_SQ_PGM_RESOURCES_VS},
440 {1, 0, R_028894_SQ_PGM_START_FS},
441 {0, S_0085F0_SH_ACTION_ENA(1), R_0288A4_SQ_PGM_RESOURCES_FS},
442 {0, 0, R_0288D0_SQ_PGM_CF_OFFSET_VS},
443 {0, 0, R_0288DC_SQ_PGM_CF_OFFSET_FS},
444 {0, 0, R_028644_SPI_PS_INPUT_CNTL_0},
445 {0, 0, R_028648_SPI_PS_INPUT_CNTL_1},
446 {0, 0, R_02864C_SPI_PS_INPUT_CNTL_2},
447 {0, 0, R_028650_SPI_PS_INPUT_CNTL_3},
448 {0, 0, R_028654_SPI_PS_INPUT_CNTL_4},
449 {0, 0, R_028658_SPI_PS_INPUT_CNTL_5},
450 {0, 0, R_02865C_SPI_PS_INPUT_CNTL_6},
451 {0, 0, R_028660_SPI_PS_INPUT_CNTL_7},
452 {0, 0, R_028664_SPI_PS_INPUT_CNTL_8},
453 {0, 0, R_028668_SPI_PS_INPUT_CNTL_9},
454 {0, 0, R_02866C_SPI_PS_INPUT_CNTL_10},
455 {0, 0, R_028670_SPI_PS_INPUT_CNTL_11},
456 {0, 0, R_028674_SPI_PS_INPUT_CNTL_12},
457 {0, 0, R_028678_SPI_PS_INPUT_CNTL_13},
458 {0, 0, R_02867C_SPI_PS_INPUT_CNTL_14},
459 {0, 0, R_028680_SPI_PS_INPUT_CNTL_15},
460 {0, 0, R_028684_SPI_PS_INPUT_CNTL_16},
461 {0, 0, R_028688_SPI_PS_INPUT_CNTL_17},
462 {0, 0, R_02868C_SPI_PS_INPUT_CNTL_18},
463 {0, 0, R_028690_SPI_PS_INPUT_CNTL_19},
464 {0, 0, R_028694_SPI_PS_INPUT_CNTL_20},
465 {0, 0, R_028698_SPI_PS_INPUT_CNTL_21},
466 {0, 0, R_02869C_SPI_PS_INPUT_CNTL_22},
467 {0, 0, R_0286A0_SPI_PS_INPUT_CNTL_23},
468 {0, 0, R_0286A4_SPI_PS_INPUT_CNTL_24},
469 {0, 0, R_0286A8_SPI_PS_INPUT_CNTL_25},
470 {0, 0, R_0286AC_SPI_PS_INPUT_CNTL_26},
471 {0, 0, R_0286B0_SPI_PS_INPUT_CNTL_27},
472 {0, 0, R_0286B4_SPI_PS_INPUT_CNTL_28},
473 {0, 0, R_0286B8_SPI_PS_INPUT_CNTL_29},
474 {0, 0, R_0286BC_SPI_PS_INPUT_CNTL_30},
475 {0, 0, R_0286C0_SPI_PS_INPUT_CNTL_31},
476 {0, 0, R_0286CC_SPI_PS_IN_CONTROL_0},
477 {0, 0, R_0286D0_SPI_PS_IN_CONTROL_1},
478 {0, 0, R_0286D8_SPI_INPUT_Z},
479 {1, S_0085F0_SH_ACTION_ENA(1), R_028840_SQ_PGM_START_PS},
480 {0, 0, R_028850_SQ_PGM_RESOURCES_PS},
481 {0, 0, R_028854_SQ_PGM_EXPORTS_PS},
482 {0, 0, R_0288CC_SQ_PGM_CF_OFFSET_PS},
483 {0, 0, R_008958_VGT_PRIMITIVE_TYPE},
484 {0, 0, R_028400_VGT_MAX_VTX_INDX},
485 {0, 0, R_028404_VGT_MIN_VTX_INDX},
486 {0, 0, R_028408_VGT_INDX_OFFSET},
487 {0, 0, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX},
488 {0, 0, R_028A84_VGT_PRIMITIVEID_EN},
489 {0, 0, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN},
490 {0, 0, R_028AA0_VGT_INSTANCE_STEP_RATE_0},
491 {0, 0, R_028AA4_VGT_INSTANCE_STEP_RATE_1},
492 };
493
494 /* SHADER CONSTANT R600/R700 */
495 static int r600_state_constant_init(struct r600_context *ctx, u32 offset)
496 {
497 struct r600_reg r600_shader_constant[] = {
498 {0, 0, R_030000_SQ_ALU_CONSTANT0_0},
499 {0, 0, R_030004_SQ_ALU_CONSTANT1_0},
500 {0, 0, R_030008_SQ_ALU_CONSTANT2_0},
501 {0, 0, R_03000C_SQ_ALU_CONSTANT3_0},
502 };
503 unsigned nreg = sizeof(r600_shader_constant)/sizeof(struct r600_reg);
504
505 for (int i = 0; i < nreg; i++) {
506 r600_shader_constant[i].offset += offset;
507 }
508 return r600_context_add_block(ctx, r600_shader_constant, nreg);
509 }
510
511 /* SHADER RESOURCE R600/R700 */
512 static int r600_state_resource_init(struct r600_context *ctx, u32 offset)
513 {
514 struct r600_reg r600_shader_resource[] = {
515 {0, 0, R_038000_RESOURCE0_WORD0},
516 {0, 0, R_038004_RESOURCE0_WORD1},
517 {1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), R_038008_RESOURCE0_WORD2},
518 {1, S_0085F0_TC_ACTION_ENA(1) | S_0085F0_VC_ACTION_ENA(1), R_03800C_RESOURCE0_WORD3},
519 {0, 0, R_038010_RESOURCE0_WORD4},
520 {0, 0, R_038014_RESOURCE0_WORD5},
521 {0, 0, R_038018_RESOURCE0_WORD6},
522 };
523 unsigned nreg = sizeof(r600_shader_resource)/sizeof(struct r600_reg);
524
525 for (int i = 0; i < nreg; i++) {
526 r600_shader_resource[i].offset += offset;
527 }
528 return r600_context_add_block(ctx, r600_shader_resource, nreg);
529 }
530
531 /* SHADER SAMPLER R600/R700 */
532 static int r600_state_sampler_init(struct r600_context *ctx, u32 offset)
533 {
534 struct r600_reg r600_shader_sampler[] = {
535 {0, 0, R_03C000_SQ_TEX_SAMPLER_WORD0_0},
536 {0, 0, R_03C004_SQ_TEX_SAMPLER_WORD1_0},
537 {0, 0, R_03C008_SQ_TEX_SAMPLER_WORD2_0},
538 };
539 unsigned nreg = sizeof(r600_shader_sampler)/sizeof(struct r600_reg);
540
541 for (int i = 0; i < nreg; i++) {
542 r600_shader_sampler[i].offset += offset;
543 }
544 return r600_context_add_block(ctx, r600_shader_sampler, nreg);
545 }
546
547 /* SHADER SAMPLER BORDER R600/R700 */
548 static int r600_state_sampler_border_init(struct r600_context *ctx, u32 offset)
549 {
550 struct r600_reg r600_shader_sampler_border[] = {
551 {0, 0, R_00A400_TD_PS_SAMPLER0_BORDER_RED},
552 {0, 0, R_00A404_TD_PS_SAMPLER0_BORDER_GREEN},
553 {0, 0, R_00A408_TD_PS_SAMPLER0_BORDER_BLUE},
554 {0, 0, R_00A40C_TD_PS_SAMPLER0_BORDER_ALPHA},
555 };
556 unsigned nreg = sizeof(r600_shader_sampler_border)/sizeof(struct r600_reg);
557
558 for (int i = 0; i < nreg; i++) {
559 r600_shader_sampler_border[i].offset += offset;
560 }
561 return r600_context_add_block(ctx, r600_shader_sampler_border, nreg);
562 }
563
564 /* initialize */
565 void r600_context_fini(struct r600_context *ctx)
566 {
567 for (int i = 0; i < ctx->ngroups; i++) {
568 r600_group_fini(&ctx->groups[i]);
569 }
570 free(ctx->reloc);
571 free(ctx->pm4);
572 memset(ctx, 0, sizeof(struct r600_context));
573 }
574
575 int r600_context_init(struct r600_context *ctx, struct radeon *radeon)
576 {
577 int r;
578
579 memset(ctx, 0, sizeof(struct r600_context));
580 ctx->radeon = radeon;
581 LIST_INITHEAD(&ctx->query_list);
582 /* initialize groups */
583 r = r600_group_init(&ctx->groups[R600_GROUP_CONFIG], R600_CONFIG_REG_OFFSET, R600_CONFIG_REG_END);
584 if (r) {
585 goto out_err;
586 }
587 r = r600_group_init(&ctx->groups[R600_GROUP_CTL_CONST], R600_CTL_CONST_OFFSET, R600_CTL_CONST_END);
588 if (r) {
589 goto out_err;
590 }
591 r = r600_group_init(&ctx->groups[R600_GROUP_LOOP_CONST], R600_LOOP_CONST_OFFSET, R600_LOOP_CONST_END);
592 if (r) {
593 goto out_err;
594 }
595 r = r600_group_init(&ctx->groups[R600_GROUP_BOOL_CONST], R600_BOOL_CONST_OFFSET, R600_BOOL_CONST_END);
596 if (r) {
597 goto out_err;
598 }
599 r = r600_group_init(&ctx->groups[R600_GROUP_SAMPLER], R600_SAMPLER_OFFSET, R600_SAMPLER_END);
600 if (r) {
601 goto out_err;
602 }
603 r = r600_group_init(&ctx->groups[R600_GROUP_RESOURCE], R600_RESOURCE_OFFSET, R600_RESOURCE_END);
604 if (r) {
605 goto out_err;
606 }
607 r = r600_group_init(&ctx->groups[R600_GROUP_ALU_CONST], R600_ALU_CONST_OFFSET, R600_ALU_CONST_END);
608 if (r) {
609 goto out_err;
610 }
611 r = r600_group_init(&ctx->groups[R600_GROUP_CONTEXT], R600_CONTEXT_REG_OFFSET, R600_CONTEXT_REG_END);
612 if (r) {
613 goto out_err;
614 }
615 ctx->ngroups = R600_NGROUPS;
616
617 /* add blocks */
618 r = r600_context_add_block(ctx, r600_reg_list, sizeof(r600_reg_list)/sizeof(struct r600_reg));
619 if (r)
620 goto out_err;
621
622 /* PS SAMPLER BORDER */
623 for (int j = 0, offset = 0; j < 18; j++, offset += 0x10) {
624 r = r600_state_sampler_border_init(ctx, offset);
625 if (r)
626 goto out_err;
627 }
628
629 /* VS SAMPLER BORDER */
630 for (int j = 0, offset = 0x200; j < 18; j++, offset += 0x10) {
631 r = r600_state_sampler_border_init(ctx, offset);
632 if (r)
633 goto out_err;
634 }
635 /* PS SAMPLER */
636 for (int j = 0, offset = 0; j < 18; j++, offset += 0xC) {
637 r = r600_state_sampler_init(ctx, offset);
638 if (r)
639 goto out_err;
640 }
641 /* VS SAMPLER */
642 for (int j = 0, offset = 0xD8; j < 18; j++, offset += 0xC) {
643 r = r600_state_sampler_init(ctx, offset);
644 if (r)
645 goto out_err;
646 }
647 /* PS RESOURCE */
648 for (int j = 0, offset = 0; j < 160; j++, offset += 0x1C) {
649 r = r600_state_resource_init(ctx, offset);
650 if (r)
651 goto out_err;
652 }
653 /* VS RESOURCE */
654 for (int j = 0, offset = 0x1180; j < 160; j++, offset += 0x1C) {
655 r = r600_state_resource_init(ctx, offset);
656 if (r)
657 goto out_err;
658 }
659 /* PS CONSTANT */
660 for (int j = 0, offset = 0; j < 256; j++, offset += 0x10) {
661 r = r600_state_constant_init(ctx, offset);
662 if (r)
663 goto out_err;
664 }
665 /* VS CONSTANT */
666 for (int j = 0, offset = 0x1000; j < 256; j++, offset += 0x10) {
667 r = r600_state_constant_init(ctx, offset);
668 if (r)
669 goto out_err;
670 }
671
672 /* allocate cs variables */
673 ctx->nreloc = RADEON_CTX_MAX_PM4;
674 ctx->reloc = calloc(ctx->nreloc, sizeof(struct r600_reloc));
675 if (ctx->reloc == NULL) {
676 r = -ENOMEM;
677 goto out_err;
678 }
679 ctx->bo = calloc(ctx->nreloc, sizeof(void *));
680 if (ctx->bo == NULL) {
681 r = -ENOMEM;
682 goto out_err;
683 }
684 ctx->pm4_ndwords = RADEON_CTX_MAX_PM4;
685 ctx->pm4 = calloc(ctx->pm4_ndwords, 4);
686 if (ctx->pm4 == NULL) {
687 r = -ENOMEM;
688 goto out_err;
689 }
690 return 0;
691 out_err:
692 r600_context_fini(ctx);
693 return r;
694 }
695
696 void r600_context_bo_reloc(struct r600_context *ctx, u32 *pm4, struct radeon_bo *bo)
697 {
698 int i, reloc_id;
699
700 assert(bo != NULL);
701 for (i = 0, reloc_id = -1; i < ctx->creloc; i++) {
702 if (ctx->reloc[i].handle == bo->handle) {
703 reloc_id = i * sizeof(struct r600_reloc) / 4;
704 /* set PKT3 to point to proper reloc */
705 *pm4 = reloc_id;
706 }
707 }
708 if (reloc_id == -1) {
709 /* add new relocation */
710 if (ctx->creloc >= ctx->nreloc) {
711 r600_context_flush(ctx);
712 }
713 reloc_id = ctx->creloc * sizeof(struct r600_reloc) / 4;
714 ctx->reloc[ctx->creloc].handle = bo->handle;
715 ctx->reloc[ctx->creloc].read_domain = RADEON_GEM_DOMAIN_GTT;
716 ctx->reloc[ctx->creloc].write_domain = RADEON_GEM_DOMAIN_GTT;
717 ctx->reloc[ctx->creloc].flags = 0;
718 radeon_bo_reference(ctx->radeon, &ctx->bo[ctx->creloc], bo);
719 ctx->creloc++;
720 /* set PKT3 to point to proper reloc */
721 *pm4 = reloc_id;
722 }
723 }
724
725 void r600_context_pipe_state_set(struct r600_context *ctx, struct r600_pipe_state *state)
726 {
727 struct r600_group *group;
728 struct r600_group_block *block;
729
730 for (int i = 0; i < state->nregs; i++) {
731 unsigned id;
732 group = &ctx->groups[state->regs[i].group_id];
733 id = group->offset_block_id[(state->regs[i].offset - group->start_offset) >> 2];
734 block = &group->blocks[id];
735 id = (state->regs[i].offset - block->start_offset) >> 2;
736 block->pm4[id] &= ~state->regs[i].mask;
737 block->pm4[id] |= state->regs[i].value;
738 if (block->pm4_bo_index[id]) {
739 /* find relocation */
740 id = block->pm4_bo_index[id];
741 radeon_ws_bo_reference(ctx->radeon, &block->reloc[id].bo, state->regs[i].bo);
742 }
743 block->status |= R600_BLOCK_STATUS_ENABLED;
744 block->status |= R600_BLOCK_STATUS_DIRTY;
745 ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
746 }
747 }
748
749 static inline void r600_context_pipe_state_set_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
750 {
751 struct r600_group_block *block;
752 unsigned id;
753
754 offset -= ctx->groups[R600_GROUP_RESOURCE].start_offset;
755 id = ctx->groups[R600_GROUP_RESOURCE].offset_block_id[offset >> 2];
756 block = &ctx->groups[R600_GROUP_RESOURCE].blocks[id];
757 block->pm4[0] = state->regs[0].value;
758 block->pm4[1] = state->regs[1].value;
759 block->pm4[2] = state->regs[2].value;
760 block->pm4[3] = state->regs[3].value;
761 block->pm4[4] = state->regs[4].value;
762 block->pm4[5] = state->regs[5].value;
763 block->pm4[6] = state->regs[6].value;
764 radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, NULL);
765 radeon_ws_bo_reference(ctx->radeon , &block->reloc[2].bo, NULL);
766 if (state->regs[0].bo) {
767 /* VERTEX RESOURCE, we preted there is 2 bo to relocate so
768 * we have single case btw VERTEX & TEXTURE resource
769 */
770 radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[0].bo);
771 radeon_ws_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[0].bo);
772 } else {
773 /* TEXTURE RESOURCE */
774 radeon_ws_bo_reference(ctx->radeon, &block->reloc[1].bo, state->regs[2].bo);
775 radeon_ws_bo_reference(ctx->radeon, &block->reloc[2].bo, state->regs[3].bo);
776 }
777 block->status |= R600_BLOCK_STATUS_ENABLED;
778 block->status |= R600_BLOCK_STATUS_DIRTY;
779 ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
780 }
781
782 void r600_context_pipe_state_set_ps_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
783 {
784 unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1C * rid;
785
786 r600_context_pipe_state_set_resource(ctx, state, offset);
787 }
788
789 void r600_context_pipe_state_set_vs_resource(struct r600_context *ctx, struct r600_pipe_state *state, unsigned rid)
790 {
791 unsigned offset = R_038000_SQ_TEX_RESOURCE_WORD0_0 + 0x1180 + 0x1C * rid;
792
793 r600_context_pipe_state_set_resource(ctx, state, offset);
794 }
795
796 static inline void r600_context_pipe_state_set_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
797 {
798 struct r600_group_block *block;
799 unsigned id;
800
801 offset -= ctx->groups[R600_GROUP_SAMPLER].start_offset;
802 id = ctx->groups[R600_GROUP_SAMPLER].offset_block_id[offset >> 2];
803 block = &ctx->groups[R600_GROUP_SAMPLER].blocks[id];
804 block->pm4[0] = state->regs[0].value;
805 block->pm4[1] = state->regs[1].value;
806 block->pm4[2] = state->regs[2].value;
807 block->status |= R600_BLOCK_STATUS_ENABLED;
808 block->status |= R600_BLOCK_STATUS_DIRTY;
809 ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
810 }
811
812 static inline void r600_context_pipe_state_set_sampler_border(struct r600_context *ctx, struct r600_pipe_state *state, unsigned offset)
813 {
814 struct r600_group_block *block;
815 unsigned id;
816
817 offset -= ctx->groups[R600_GROUP_CONFIG].start_offset;
818 id = ctx->groups[R600_GROUP_CONFIG].offset_block_id[offset >> 2];
819 block = &ctx->groups[R600_GROUP_CONFIG].blocks[id];
820 block->pm4[0] = state->regs[3].value;
821 block->pm4[1] = state->regs[4].value;
822 block->pm4[2] = state->regs[5].value;
823 block->pm4[3] = state->regs[6].value;
824 block->status |= R600_BLOCK_STATUS_ENABLED;
825 block->status |= R600_BLOCK_STATUS_DIRTY;
826 ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
827 }
828
829 void r600_context_pipe_state_set_ps_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
830 {
831 unsigned offset;
832
833 offset = 0x0003C000 + id * 0xc;
834 r600_context_pipe_state_set_sampler(ctx, state, offset);
835 if (state->nregs > 3) {
836 offset = 0x0000A400 + id * 0x10;
837 r600_context_pipe_state_set_sampler_border(ctx, state, offset);
838 }
839 }
840
841 void r600_context_pipe_state_set_vs_sampler(struct r600_context *ctx, struct r600_pipe_state *state, unsigned id)
842 {
843 unsigned offset;
844
845 offset = 0x0003C0D8 + id * 0xc;
846 r600_context_pipe_state_set_sampler(ctx, state, offset);
847 if (state->nregs > 3) {
848 offset = 0x0000A600 + id * 0x10;
849 r600_context_pipe_state_set_sampler_border(ctx, state, offset);
850 }
851 }
852
853 void r600_context_group_emit_dirty(struct r600_context *ctx, struct r600_group *group, unsigned opcode)
854 {
855 struct radeon_bo *bo;
856 int id;
857
858 for (int i = 0; i < group->nblocks; i++) {
859 struct r600_group_block *block = &group->blocks[i];
860 if (block->status & R600_BLOCK_STATUS_DIRTY) {
861 for (int j = 0; j < block->nreg; j++) {
862 if (block->pm4_bo_index[j]) {
863 /* find relocation */
864 id = block->pm4_bo_index[j];
865 bo = radeon_bo_pb_get_bo(block->reloc[id].bo->pb);
866 for (int k = 0; k < block->reloc[id].nreloc; k++) {
867 r600_context_bo_reloc(ctx, &block->pm4[block->reloc[id].bo_pm4_index[k]], bo);
868 }
869 }
870 }
871
872 ctx->pm4[ctx->pm4_cdwords++] = PKT3(opcode, block->nreg);
873 ctx->pm4[ctx->pm4_cdwords++] = (block->start_offset - group->start_offset) >> 2;
874 memcpy(&ctx->pm4[ctx->pm4_cdwords], block->pm4, block->pm4_ndwords * 4);
875 ctx->pm4_cdwords += block->pm4_ndwords;
876 block->status ^= R600_BLOCK_STATUS_DIRTY;
877 }
878 }
879 }
880
881 struct radeon_bo *r600_context_reg_bo(struct r600_context *ctx, unsigned group_id, unsigned offset)
882 {
883 struct r600_group_block *block;
884 unsigned id;
885
886 id = ctx->groups[group_id].offset_block_id[(offset - ctx->groups[group_id].start_offset) >> 2];
887 block = &ctx->groups[group_id].blocks[id];
888 offset -= block->start_offset;
889 id = block->pm4_bo_index[offset >> 2];
890 if (block->reloc[id].bo) {
891 return radeon_bo_pb_get_bo(block->reloc[id].bo->pb);
892 }
893 return NULL;
894 }
895
896 void r600_context_draw(struct r600_context *ctx, const struct r600_draw *draw)
897 {
898 struct radeon_bo *cb[8];
899 unsigned ndwords = 9;
900
901 if (draw->indices) {
902 ndwords = 13;
903 /* make sure there is enough relocation space before scheduling draw */
904 if (ctx->creloc >= (ctx->nreloc - 1)) {
905 r600_context_flush(ctx);
906 }
907 }
908
909 /* find number of color buffer */
910 for (int i = 0; i < 8; i++) {
911 cb[i] = r600_context_reg_bo(ctx, R600_GROUP_CONTEXT, R_028040_CB_COLOR0_BASE + (i << 2));
912 if (cb[i]) {
913 ndwords += 7;
914 }
915 }
916
917 if ((ctx->pm4_dirty_cdwords + ndwords + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
918 /* need to flush */
919 r600_context_flush(ctx);
920 }
921 /* at that point everythings is flushed and ctx->pm4_cdwords = 0 */
922 if ((ctx->pm4_dirty_cdwords + ndwords) > ctx->pm4_ndwords) {
923 R600_ERR("context is too big to be scheduled\n");
924 return;
925 }
926
927 /* enough room to copy packet */
928 r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONFIG], PKT3_SET_CONFIG_REG);
929 r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_CONTEXT], PKT3_SET_CONTEXT_REG);
930 r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_ALU_CONST], PKT3_SET_ALU_CONST);
931 r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_RESOURCE], PKT3_SET_RESOURCE);
932 r600_context_group_emit_dirty(ctx, &ctx->groups[R600_GROUP_SAMPLER], PKT3_SET_SAMPLER);
933
934 /* draw packet */
935 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_INDEX_TYPE, 0);
936 ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_index_type;
937 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NUM_INSTANCES, 0);
938 ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_instances;
939 if (draw->indices) {
940 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX, 3);
941 ctx->pm4[ctx->pm4_cdwords++] = draw->indices_bo_offset;
942 ctx->pm4[ctx->pm4_cdwords++] = 0;
943 ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
944 ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
945 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
946 ctx->pm4[ctx->pm4_cdwords++] = 0;
947 r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], radeon_bo_pb_get_bo(draw->indices->pb));
948 } else {
949 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_DRAW_INDEX_AUTO, 1);
950 ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_num_indices;
951 ctx->pm4[ctx->pm4_cdwords++] = draw->vgt_draw_initiator;
952 }
953 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0);
954 ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_CACHE_FLUSH_AND_INV_EVENT;
955
956 /* flush color buffer */
957 for (int i = 0; i < 8; i++) {
958 if (cb[i]) {
959 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3);
960 ctx->pm4[ctx->pm4_cdwords++] = (S_0085F0_CB0_DEST_BASE_ENA(1) << i) |
961 S_0085F0_CB_ACTION_ENA(1);
962 ctx->pm4[ctx->pm4_cdwords++] = (cb[i]->size + 255) >> 8;
963 ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
964 ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
965 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
966 ctx->pm4[ctx->pm4_cdwords++] = 0;
967 r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], cb[i]);
968 }
969 }
970
971 /* all dirty state have been scheduled in current cs */
972 ctx->pm4_dirty_cdwords = 0;
973 }
974
975 void r600_context_flush(struct r600_context *ctx)
976 {
977 struct drm_radeon_cs drmib;
978 struct drm_radeon_cs_chunk chunks[2];
979 uint64_t chunk_array[2];
980 struct r600_group_block *block;
981 int r;
982
983 if (!ctx->pm4_cdwords)
984 return;
985
986 /* suspend queries */
987 r600_context_queries_suspend(ctx);
988
989 #if 0
990 /* emit cs */
991 drmib.num_chunks = 2;
992 drmib.chunks = (uint64_t)(uintptr_t)chunk_array;
993 chunks[0].chunk_id = RADEON_CHUNK_ID_IB;
994 chunks[0].length_dw = ctx->pm4_cdwords;
995 chunks[0].chunk_data = (uint64_t)(uintptr_t)ctx->pm4;
996 chunks[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
997 chunks[1].length_dw = ctx->creloc * sizeof(struct r600_reloc) / 4;
998 chunks[1].chunk_data = (uint64_t)(uintptr_t)ctx->reloc;
999 chunk_array[0] = (uint64_t)(uintptr_t)&chunks[0];
1000 chunk_array[1] = (uint64_t)(uintptr_t)&chunks[1];
1001 r = drmCommandWriteRead(ctx->radeon->fd, DRM_RADEON_CS, &drmib,
1002 sizeof(struct drm_radeon_cs));
1003 #endif
1004 /* restart */
1005 for (int i = 0; i < ctx->creloc; i++) {
1006 radeon_bo_reference(ctx->radeon, &ctx->bo[i], NULL);
1007 }
1008 ctx->creloc = 0;
1009 ctx->pm4_dirty_cdwords = 0;
1010 ctx->pm4_cdwords = 0;
1011
1012 /* resume queries */
1013 r600_context_queries_resume(ctx);
1014
1015 /* set all valid group as dirty so they get reemited on
1016 * next draw command
1017 */
1018 for (int i = 0; i < ctx->ngroups; i++) {
1019 for (int j = 0; j < ctx->groups[i].nblocks; j++) {
1020 /* mark enabled block as dirty */
1021 block = &ctx->groups[i].blocks[j];
1022 if (block->status & R600_BLOCK_STATUS_ENABLED) {
1023 ctx->pm4_dirty_cdwords += 2 + block->pm4_ndwords;
1024 block->status |= R600_BLOCK_STATUS_DIRTY;
1025 }
1026 }
1027 }
1028 }
1029
1030 void r600_context_dump_bof(struct r600_context *ctx, const char *file)
1031 {
1032 bof_t *bcs, *blob, *array, *bo, *size, *handle, *device_id, *root;
1033 unsigned i;
1034
1035 root = device_id = bcs = blob = array = bo = size = handle = NULL;
1036 root = bof_object();
1037 if (root == NULL)
1038 goto out_err;
1039 device_id = bof_int32(ctx->radeon->device);
1040 if (device_id == NULL)
1041 goto out_err;
1042 if (bof_object_set(root, "device_id", device_id))
1043 goto out_err;
1044 bof_decref(device_id);
1045 device_id = NULL;
1046 /* dump relocs */
1047 blob = bof_blob(ctx->creloc * 16, ctx->reloc);
1048 if (blob == NULL)
1049 goto out_err;
1050 if (bof_object_set(root, "reloc", blob))
1051 goto out_err;
1052 bof_decref(blob);
1053 blob = NULL;
1054 /* dump cs */
1055 blob = bof_blob(ctx->pm4_cdwords * 4, ctx->pm4);
1056 if (blob == NULL)
1057 goto out_err;
1058 if (bof_object_set(root, "pm4", blob))
1059 goto out_err;
1060 bof_decref(blob);
1061 blob = NULL;
1062 /* dump bo */
1063 array = bof_array();
1064 if (array == NULL)
1065 goto out_err;
1066 for (i = 0; i < ctx->creloc; i++) {
1067 struct radeon_bo *rbo = ctx->bo[i];
1068 bo = bof_object();
1069 if (bo == NULL)
1070 goto out_err;
1071 size = bof_int32(rbo->size);
1072 if (size == NULL)
1073 goto out_err;
1074 if (bof_object_set(bo, "size", size))
1075 goto out_err;
1076 bof_decref(size);
1077 size = NULL;
1078 handle = bof_int32(rbo->handle);
1079 if (handle == NULL)
1080 goto out_err;
1081 if (bof_object_set(bo, "handle", handle))
1082 goto out_err;
1083 bof_decref(handle);
1084 handle = NULL;
1085 radeon_bo_map(ctx->radeon, rbo);
1086 blob = bof_blob(rbo->size, rbo->data);
1087 radeon_bo_unmap(ctx->radeon, rbo);
1088 if (blob == NULL)
1089 goto out_err;
1090 if (bof_object_set(bo, "data", blob))
1091 goto out_err;
1092 bof_decref(blob);
1093 blob = NULL;
1094 if (bof_array_append(array, bo))
1095 goto out_err;
1096 bof_decref(bo);
1097 bo = NULL;
1098 }
1099 if (bof_object_set(root, "bo", array))
1100 goto out_err;
1101 bof_dump_file(root, file);
1102 out_err:
1103 bof_decref(blob);
1104 bof_decref(array);
1105 bof_decref(bo);
1106 bof_decref(size);
1107 bof_decref(handle);
1108 bof_decref(device_id);
1109 bof_decref(root);
1110 }
1111
1112 static void r600_query_result(struct r600_context *ctx, struct r600_query *query)
1113 {
1114 u64 start, end;
1115 u32 *results;
1116 int i;
1117
1118 results = radeon_ws_bo_map(ctx->radeon, query->buffer, 0, NULL);
1119 for (i = 0; i < query->num_results; i += 4) {
1120 start = (u64)results[i] | (u64)results[i + 1] << 32;
1121 end = (u64)results[i + 2] | (u64)results[i + 3] << 32;
1122 if ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL)) {
1123 query->result += end - start;
1124 }
1125 }
1126 radeon_ws_bo_unmap(ctx->radeon, query->buffer);
1127 query->num_results = 0;
1128 }
1129
1130 void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
1131 {
1132 /* query request needs 6 dwords for begin + 6 dwords for end */
1133 if ((12 + ctx->pm4_cdwords) > ctx->pm4_ndwords) {
1134 /* need to flush */
1135 r600_context_flush(ctx);
1136 }
1137
1138 /* if query buffer is full force a flush */
1139 if (query->num_results >= ((query->buffer_size >> 2) - 2)) {
1140 r600_context_flush(ctx);
1141 r600_query_result(ctx, query);
1142 }
1143
1144 /* emit begin query */
1145 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2);
1146 ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_ZPASS_DONE;
1147 ctx->pm4[ctx->pm4_cdwords++] = query->num_results;
1148 ctx->pm4[ctx->pm4_cdwords++] = 0;
1149 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
1150 ctx->pm4[ctx->pm4_cdwords++] = 0;
1151 r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], radeon_bo_pb_get_bo(query->buffer->pb));
1152
1153 query->state |= R600_QUERY_STATE_STARTED;
1154 query->state ^= R600_QUERY_STATE_ENDED;
1155 }
1156
1157 void r600_query_end(struct r600_context *ctx, struct r600_query *query)
1158 {
1159 /* emit begin query */
1160 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2);
1161 ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE_ZPASS_DONE;
1162 ctx->pm4[ctx->pm4_cdwords++] = query->num_results + 8;
1163 ctx->pm4[ctx->pm4_cdwords++] = 0;
1164 ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0);
1165 ctx->pm4[ctx->pm4_cdwords++] = 0;
1166 r600_context_bo_reloc(ctx, &ctx->pm4[ctx->pm4_cdwords - 1], radeon_bo_pb_get_bo(query->buffer->pb));
1167
1168 query->num_results += 16;
1169 query->state ^= R600_QUERY_STATE_STARTED;
1170 query->state |= R600_QUERY_STATE_ENDED;
1171 }
1172
1173 struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type)
1174 {
1175 struct r600_query *query;
1176
1177 if (query_type != PIPE_QUERY_OCCLUSION_COUNTER)
1178 return NULL;
1179
1180 query = calloc(1, sizeof(struct r600_query));
1181 if (query == NULL)
1182 return NULL;
1183
1184 query->type = query_type;
1185 query->buffer_size = 4096;
1186
1187 query->buffer = radeon_ws_bo(ctx->radeon, query->buffer_size, 1, 0);
1188 if (!query->buffer) {
1189 free(query);
1190 return NULL;
1191 }
1192
1193 LIST_ADDTAIL(&query->list, &ctx->query_list);
1194
1195 return query;
1196 }
1197
1198 void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
1199 {
1200 radeon_ws_bo_reference(ctx->radeon, &query->buffer, NULL);
1201 LIST_DEL(&query->list);
1202 free(query);
1203 }
1204
1205 boolean r600_context_query_result(struct r600_context *ctx,
1206 struct r600_query *query,
1207 boolean wait, void *vresult)
1208 {
1209 uint64_t *result = (uint64_t*)vresult;
1210
1211 if (query->num_results) {
1212 r600_context_flush(ctx);
1213 }
1214 r600_query_result(ctx, query);
1215 *result = query->result;
1216 query->result = 0;
1217 return TRUE;
1218 }
1219
1220 static void r600_context_queries_suspend(struct r600_context *ctx)
1221 {
1222 struct r600_query *query;
1223
1224 LIST_FOR_EACH_ENTRY(query, &ctx->query_list, list) {
1225 if (query->state & R600_QUERY_STATE_STARTED) {
1226 r600_query_end(ctx, query);
1227 query->state |= R600_QUERY_STATE_SUSPENDED;
1228 }
1229 }
1230 }
1231
1232 static void r600_context_queries_resume(struct r600_context *ctx)
1233 {
1234 struct r600_query *query;
1235
1236 LIST_FOR_EACH_ENTRY(query, &ctx->query_list, list) {
1237 if (query->state & R600_QUERY_STATE_SUSPENDED) {
1238 r600_query_begin(ctx, query);
1239 query->state ^= R600_QUERY_STATE_SUSPENDED;
1240 }
1241 }
1242 }