2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #include "r600_hw_context_priv.h"
28 #include "radeonsi_pipe.h"
30 #include "util/u_memory.h"
33 #define GROUP_FORCE_NEW_BLOCK 0
35 static const struct r600_reg si_config_reg_list
[] = {
36 {R_0088B0_VGT_VTX_VECT_EJECT_REG
, REG_FLAG_FLUSH_CHANGE
},
37 {R_0088C8_VGT_ESGS_RING_SIZE
, REG_FLAG_FLUSH_CHANGE
},
38 {R_0088CC_VGT_GSVS_RING_SIZE
, REG_FLAG_FLUSH_CHANGE
},
39 {R_008A14_PA_CL_ENHANCE
, REG_FLAG_FLUSH_CHANGE
},
40 {R_009100_SPI_CONFIG_CNTL
, REG_FLAG_ENABLE_ALWAYS
| REG_FLAG_FLUSH_CHANGE
},
41 {R_00913C_SPI_CONFIG_CNTL_1
, REG_FLAG_ENABLE_ALWAYS
| REG_FLAG_FLUSH_CHANGE
},
44 static const struct r600_reg si_context_reg_list
[] = {
45 {R_028004_DB_COUNT_CONTROL
, 0},
46 {R_028010_DB_RENDER_OVERRIDE2
, 0},
47 {GROUP_FORCE_NEW_BLOCK
, 0},
48 {R_028014_DB_HTILE_DATA_BASE
, REG_FLAG_NEED_BO
},
49 {GROUP_FORCE_NEW_BLOCK
, 0},
50 {R_028080_TA_BC_BASE_ADDR
, REG_FLAG_NEED_BO
},
51 {GROUP_FORCE_NEW_BLOCK
, 0},
52 {R_028234_PA_SU_HARDWARE_SCREEN_OFFSET
, 0},
53 {GROUP_FORCE_NEW_BLOCK
, 0},
54 {R_0286C4_SPI_VS_OUT_CONFIG
, 0},
55 {R_0286CC_SPI_PS_INPUT_ENA
, 0},
56 {R_0286D0_SPI_PS_INPUT_ADDR
, 0},
57 {R_0286D8_SPI_PS_IN_CONTROL
, 0},
58 {R_0286E0_SPI_BARYC_CNTL
, 0},
59 {R_02870C_SPI_SHADER_POS_FORMAT
, 0},
60 {R_028710_SPI_SHADER_Z_FORMAT
, 0},
61 {R_028714_SPI_SHADER_COL_FORMAT
, 0},
62 {R_0287D4_PA_CL_POINT_X_RAD
, 0},
63 {R_0287D8_PA_CL_POINT_Y_RAD
, 0},
64 {R_0287DC_PA_CL_POINT_SIZE
, 0},
65 {R_0287E0_PA_CL_POINT_CULL_RAD
, 0},
66 {R_028804_DB_EQAA
, 0},
67 {R_02880C_DB_SHADER_CONTROL
, 0},
68 {R_028824_PA_SU_LINE_STIPPLE_CNTL
, 0},
69 {R_028828_PA_SU_LINE_STIPPLE_SCALE
, 0},
70 {R_02882C_PA_SU_PRIM_FILTER_CNTL
, 0},
71 {R_028A10_VGT_OUTPUT_PATH_CNTL
, 0},
72 {R_028A14_VGT_HOS_CNTL
, 0},
73 {R_028A18_VGT_HOS_MAX_TESS_LEVEL
, 0},
74 {R_028A1C_VGT_HOS_MIN_TESS_LEVEL
, 0},
75 {R_028A20_VGT_HOS_REUSE_DEPTH
, 0},
76 {R_028A24_VGT_GROUP_PRIM_TYPE
, 0},
77 {R_028A28_VGT_GROUP_FIRST_DECR
, 0},
78 {R_028A2C_VGT_GROUP_DECR
, 0},
79 {R_028A30_VGT_GROUP_VECT_0_CNTL
, 0},
80 {R_028A34_VGT_GROUP_VECT_1_CNTL
, 0},
81 {R_028A38_VGT_GROUP_VECT_0_FMT_CNTL
, 0},
82 {R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL
, 0},
83 {R_028A40_VGT_GS_MODE
, 0},
84 {R_028A4C_PA_SC_MODE_CNTL_1
, 0},
85 {R_028A50_VGT_ENHANCE
, 0},
86 {R_028A54_VGT_GS_PER_ES
, 0},
87 {R_028A58_VGT_ES_PER_GS
, 0},
88 {R_028A5C_VGT_GS_PER_VS
, 0},
89 {R_028A60_VGT_GSVS_RING_OFFSET_1
, 0},
90 {R_028A64_VGT_GSVS_RING_OFFSET_2
, 0},
91 {R_028A68_VGT_GSVS_RING_OFFSET_3
, 0},
92 {R_028A6C_VGT_GS_OUT_PRIM_TYPE
, 0},
93 {R_028A70_IA_ENHANCE
, 0},
94 {R_028A84_VGT_PRIMITIVEID_EN
, 0},
95 {R_028A8C_VGT_PRIMITIVEID_RESET
, 0},
96 {R_028AA0_VGT_INSTANCE_STEP_RATE_0
, 0},
97 {R_028AA4_VGT_INSTANCE_STEP_RATE_1
, 0},
98 {R_028AA8_IA_MULTI_VGT_PARAM
, 0},
99 {R_028AAC_VGT_ESGS_RING_ITEMSIZE
, 0},
100 {R_028AB0_VGT_GSVS_RING_ITEMSIZE
, 0},
101 {R_028AB4_VGT_REUSE_OFF
, 0},
102 {R_028AB8_VGT_VTX_CNT_EN
, 0},
103 {R_028ABC_DB_HTILE_SURFACE
, 0},
104 {R_028B54_VGT_SHADER_STAGES_EN
, 0},
105 {R_028B94_VGT_STRMOUT_CONFIG
, 0},
106 {R_028B98_VGT_STRMOUT_BUFFER_CONFIG
, 0},
107 {R_028BD4_PA_SC_CENTROID_PRIORITY_0
, 0},
108 {R_028BD8_PA_SC_CENTROID_PRIORITY_1
, 0},
109 {R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0
, 0},
110 {R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1
, 0},
111 {R_028C00_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2
, 0},
112 {R_028C04_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3
, 0},
113 {R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0
, 0},
114 {R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1
, 0},
115 {R_028C10_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2
, 0},
116 {R_028C14_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3
, 0},
117 {R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0
, 0},
118 {R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1
, 0},
119 {R_028C20_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2
, 0},
120 {R_028C24_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3
, 0},
121 {R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0
, 0},
122 {R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1
, 0},
123 {R_028C30_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2
, 0},
124 {R_028C34_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3
, 0},
127 static const struct r600_reg si_sh_reg_list
[] = {
128 {R_00B020_SPI_SHADER_PGM_LO_PS
, REG_FLAG_NEED_BO
},
129 {R_00B024_SPI_SHADER_PGM_HI_PS
, REG_FLAG_NEED_BO
},
130 {R_00B028_SPI_SHADER_PGM_RSRC1_PS
, 0},
131 {R_00B02C_SPI_SHADER_PGM_RSRC2_PS
, 0},
132 {GROUP_FORCE_NEW_BLOCK
, 0},
133 {R_00B030_SPI_SHADER_USER_DATA_PS_0
, REG_FLAG_NEED_BO
},
134 {R_00B034_SPI_SHADER_USER_DATA_PS_1
, 0},
135 {GROUP_FORCE_NEW_BLOCK
, 0},
136 {R_00B038_SPI_SHADER_USER_DATA_PS_2
, REG_FLAG_NEED_BO
},
137 {R_00B03C_SPI_SHADER_USER_DATA_PS_3
, 0},
138 {GROUP_FORCE_NEW_BLOCK
, 0},
139 {R_00B040_SPI_SHADER_USER_DATA_PS_4
, REG_FLAG_NEED_BO
},
140 {R_00B044_SPI_SHADER_USER_DATA_PS_5
, 0},
141 {GROUP_FORCE_NEW_BLOCK
, 0},
142 {R_00B048_SPI_SHADER_USER_DATA_PS_6
, REG_FLAG_NEED_BO
},
143 {R_00B04C_SPI_SHADER_USER_DATA_PS_7
, 0},
144 {GROUP_FORCE_NEW_BLOCK
, 0},
145 {R_00B050_SPI_SHADER_USER_DATA_PS_8
, REG_FLAG_NEED_BO
},
146 {R_00B054_SPI_SHADER_USER_DATA_PS_9
, 0},
147 {GROUP_FORCE_NEW_BLOCK
, 0},
148 {R_00B058_SPI_SHADER_USER_DATA_PS_10
, REG_FLAG_NEED_BO
},
149 {R_00B05C_SPI_SHADER_USER_DATA_PS_11
, 0},
150 {GROUP_FORCE_NEW_BLOCK
, 0},
151 {R_00B060_SPI_SHADER_USER_DATA_PS_12
, REG_FLAG_NEED_BO
},
152 {R_00B064_SPI_SHADER_USER_DATA_PS_13
, 0},
153 {GROUP_FORCE_NEW_BLOCK
, 0},
154 {R_00B068_SPI_SHADER_USER_DATA_PS_14
, REG_FLAG_NEED_BO
},
155 {R_00B06C_SPI_SHADER_USER_DATA_PS_15
, 0},
156 {GROUP_FORCE_NEW_BLOCK
, 0},
157 {R_00B120_SPI_SHADER_PGM_LO_VS
, REG_FLAG_NEED_BO
},
158 {R_00B124_SPI_SHADER_PGM_HI_VS
, REG_FLAG_NEED_BO
},
159 {R_00B128_SPI_SHADER_PGM_RSRC1_VS
, 0},
160 {R_00B12C_SPI_SHADER_PGM_RSRC2_VS
, 0},
161 {GROUP_FORCE_NEW_BLOCK
, 0},
162 {R_00B130_SPI_SHADER_USER_DATA_VS_0
, REG_FLAG_NEED_BO
},
163 {R_00B134_SPI_SHADER_USER_DATA_VS_1
, 0},
164 {GROUP_FORCE_NEW_BLOCK
, 0},
165 {R_00B138_SPI_SHADER_USER_DATA_VS_2
, REG_FLAG_NEED_BO
},
166 {R_00B13C_SPI_SHADER_USER_DATA_VS_3
, 0},
167 {GROUP_FORCE_NEW_BLOCK
, 0},
168 {R_00B140_SPI_SHADER_USER_DATA_VS_4
, REG_FLAG_NEED_BO
},
169 {R_00B144_SPI_SHADER_USER_DATA_VS_5
, 0},
170 {GROUP_FORCE_NEW_BLOCK
, 0},
171 {R_00B148_SPI_SHADER_USER_DATA_VS_6
, REG_FLAG_NEED_BO
},
172 {R_00B14C_SPI_SHADER_USER_DATA_VS_7
, 0},
173 {GROUP_FORCE_NEW_BLOCK
, 0},
174 {R_00B150_SPI_SHADER_USER_DATA_VS_8
, REG_FLAG_NEED_BO
},
175 {R_00B154_SPI_SHADER_USER_DATA_VS_9
, 0},
176 {GROUP_FORCE_NEW_BLOCK
, 0},
177 {R_00B158_SPI_SHADER_USER_DATA_VS_10
, REG_FLAG_NEED_BO
},
178 {R_00B15C_SPI_SHADER_USER_DATA_VS_11
, 0},
179 {GROUP_FORCE_NEW_BLOCK
, 0},
180 {R_00B160_SPI_SHADER_USER_DATA_VS_12
, REG_FLAG_NEED_BO
},
181 {R_00B164_SPI_SHADER_USER_DATA_VS_13
, 0},
182 {GROUP_FORCE_NEW_BLOCK
, 0},
183 {R_00B168_SPI_SHADER_USER_DATA_VS_14
, REG_FLAG_NEED_BO
},
184 {R_00B16C_SPI_SHADER_USER_DATA_VS_15
, 0},
187 int si_context_init(struct r600_context
*ctx
)
191 LIST_INITHEAD(&ctx
->active_query_list
);
193 /* init dirty list */
194 LIST_INITHEAD(&ctx
->dirty
);
195 LIST_INITHEAD(&ctx
->enable_list
);
197 ctx
->range
= calloc(NUM_RANGES
, sizeof(struct r600_range
));
204 r
= r600_context_add_block(ctx
, si_config_reg_list
,
205 Elements(si_config_reg_list
), PKT3_SET_CONFIG_REG
, SI_CONFIG_REG_OFFSET
);
208 r
= r600_context_add_block(ctx
, si_context_reg_list
,
209 Elements(si_context_reg_list
), PKT3_SET_CONTEXT_REG
, SI_CONTEXT_REG_OFFSET
);
212 r
= r600_context_add_block(ctx
, si_sh_reg_list
,
213 Elements(si_sh_reg_list
), PKT3_SET_SH_REG
, SI_SH_REG_OFFSET
);
221 /* PS SAMPLER BORDER */
222 /* VS SAMPLER BORDER */
227 ctx
->cs
= ctx
->ws
->cs_create(ctx
->ws
);
233 r600_context_fini(ctx
);
237 static inline void evergreen_context_ps_partial_flush(struct r600_context
*ctx
)
239 struct radeon_winsys_cs
*cs
= ctx
->cs
;
241 if (!(ctx
->flags
& R600_CONTEXT_DRAW_PENDING
))
244 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 0, 0);
245 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH
) | EVENT_INDEX(4);
247 ctx
->flags
&= ~R600_CONTEXT_DRAW_PENDING
;
250 void si_context_draw(struct r600_context
*ctx
, const struct r600_draw
*draw
)
252 struct radeon_winsys_cs
*cs
= ctx
->cs
;
253 unsigned ndwords
= 7;
260 if (ctx
->num_cs_dw_queries_suspend
)
263 /* when increasing ndwords, bump the max limit too */
264 assert(ndwords
<= SI_MAX_DRAW_CS_DWORDS
);
266 /* queries need some special values
267 * (this is non-zero if any query is active) */
268 if (ctx
->num_cs_dw_queries_suspend
) {
269 pm4
= &cs
->buf
[cs
->cdw
];
270 pm4
[0] = PKT3(PKT3_SET_CONTEXT_REG
, 1, 0);
271 pm4
[1] = (R_028004_DB_COUNT_CONTROL
- SI_CONTEXT_REG_OFFSET
) >> 2;
272 pm4
[2] = S_028004_PERFECT_ZPASS_COUNTS(1);
273 pm4
[3] = PKT3(PKT3_SET_CONTEXT_REG
, 1, 0);
274 pm4
[4] = (R_02800C_DB_RENDER_OVERRIDE
- SI_CONTEXT_REG_OFFSET
) >> 2;
275 pm4
[5] = draw
->db_render_override
| S_02800C_NOOP_CULL_DISABLE(1);
281 pm4
= &cs
->buf
[cs
->cdw
];
282 pm4
[0] = PKT3(PKT3_INDEX_TYPE
, 0, ctx
->predicate_drawing
);
283 pm4
[1] = draw
->vgt_index_type
;
284 pm4
[2] = PKT3(PKT3_NUM_INSTANCES
, 0, ctx
->predicate_drawing
);
285 pm4
[3] = draw
->vgt_num_instances
;
287 va
= r600_resource_va(&ctx
->screen
->screen
, (void*)draw
->indices
);
288 va
+= draw
->indices_bo_offset
;
289 pm4
[4] = PKT3(PKT3_DRAW_INDEX_2
, 4, ctx
->predicate_drawing
);
290 pm4
[5] = (draw
->indices
->b
.b
.width0
- draw
->indices_bo_offset
) /
291 ctx
->index_buffer
.index_size
;
293 pm4
[7] = (va
>> 32UL) & 0xFF;
294 pm4
[8] = draw
->vgt_num_indices
;
295 pm4
[9] = draw
->vgt_draw_initiator
;
296 pm4
[10] = PKT3(PKT3_NOP
, 0, ctx
->predicate_drawing
);
297 pm4
[11] = r600_context_bo_reloc(ctx
, draw
->indices
, RADEON_USAGE_READ
);
299 pm4
[4] = PKT3(PKT3_DRAW_INDEX_AUTO
, 1, ctx
->predicate_drawing
);
300 pm4
[5] = draw
->vgt_num_indices
;
301 pm4
[6] = draw
->vgt_draw_initiator
;
306 void evergreen_flush_vgt_streamout(struct r600_context
*ctx
)
308 struct radeon_winsys_cs
*cs
= ctx
->cs
;
310 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONFIG_REG
, 1, 0);
311 cs
->buf
[cs
->cdw
++] = (R_0084FC_CP_STRMOUT_CNTL
- SI_CONFIG_REG_OFFSET
) >> 2;
312 cs
->buf
[cs
->cdw
++] = 0;
314 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_EVENT_WRITE
, 0, 0);
315 cs
->buf
[cs
->cdw
++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH
) | EVENT_INDEX(0);
317 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_WAIT_REG_MEM
, 5, 0);
318 cs
->buf
[cs
->cdw
++] = WAIT_REG_MEM_EQUAL
; /* wait until the register is equal to the reference value */
319 cs
->buf
[cs
->cdw
++] = R_0084FC_CP_STRMOUT_CNTL
>> 2; /* register */
320 cs
->buf
[cs
->cdw
++] = 0;
321 cs
->buf
[cs
->cdw
++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value */
322 cs
->buf
[cs
->cdw
++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */
323 cs
->buf
[cs
->cdw
++] = 4; /* poll interval */
326 void evergreen_set_streamout_enable(struct r600_context
*ctx
, unsigned buffer_enable_bit
)
328 struct radeon_winsys_cs
*cs
= ctx
->cs
;
330 if (buffer_enable_bit
) {
331 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONTEXT_REG
, 1, 0);
332 cs
->buf
[cs
->cdw
++] = (R_028B94_VGT_STRMOUT_CONFIG
- SI_CONTEXT_REG_OFFSET
) >> 2;
333 cs
->buf
[cs
->cdw
++] = S_028B94_STREAMOUT_0_EN(1);
335 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONTEXT_REG
, 1, 0);
336 cs
->buf
[cs
->cdw
++] = (R_028B98_VGT_STRMOUT_BUFFER_CONFIG
- SI_CONTEXT_REG_OFFSET
) >> 2;
337 cs
->buf
[cs
->cdw
++] = S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit
);
339 cs
->buf
[cs
->cdw
++] = PKT3(PKT3_SET_CONTEXT_REG
, 1, 0);
340 cs
->buf
[cs
->cdw
++] = (R_028B94_VGT_STRMOUT_CONFIG
- SI_CONTEXT_REG_OFFSET
) >> 2;
341 cs
->buf
[cs
->cdw
++] = S_028B94_STREAMOUT_0_EN(0);