r600g: always map uninitialized buffer range as unsynchronized
[mesa.git] / src / gallium / drivers / r600 / evergreen_hw_context.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Jerome Glisse
25 */
26 #include "r600_hw_context_priv.h"
27 #include "evergreend.h"
28 #include "util/u_memory.h"
29 #include "util/u_math.h"
30
31 static const struct r600_reg evergreen_context_reg_list[] = {
32 {R_02861C_SPI_VS_OUT_ID_0, 0, 0},
33 {R_028620_SPI_VS_OUT_ID_1, 0, 0},
34 {R_028624_SPI_VS_OUT_ID_2, 0, 0},
35 {R_028628_SPI_VS_OUT_ID_3, 0, 0},
36 {R_02862C_SPI_VS_OUT_ID_4, 0, 0},
37 {R_028630_SPI_VS_OUT_ID_5, 0, 0},
38 {R_028634_SPI_VS_OUT_ID_6, 0, 0},
39 {R_028638_SPI_VS_OUT_ID_7, 0, 0},
40 {R_02863C_SPI_VS_OUT_ID_8, 0, 0},
41 {R_028640_SPI_VS_OUT_ID_9, 0, 0},
42 {GROUP_FORCE_NEW_BLOCK, 0, 0},
43 {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0},
44 {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0},
45 {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0},
46 {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0},
47 {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0},
48 {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0},
49 {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0},
50 {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0},
51 {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0},
52 {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0},
53 {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0},
54 {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0},
55 {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0},
56 {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0},
57 {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0},
58 {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0},
59 {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0},
60 {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0},
61 {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0},
62 {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0},
63 {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0},
64 {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0},
65 {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0},
66 {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0},
67 {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0},
68 {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0},
69 {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0},
70 {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0},
71 {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0},
72 {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0},
73 {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0},
74 {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0},
75 {GROUP_FORCE_NEW_BLOCK, 0, 0},
76 {R_0286C4_SPI_VS_OUT_CONFIG, 0, 0},
77 {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0},
78 {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0},
79 {R_0286D8_SPI_INPUT_Z, 0, 0},
80 {R_0286E0_SPI_BARYC_CNTL, 0, 0},
81 {R_0286E4_SPI_PS_IN_CONTROL_2, 0, 0},
82 {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0},
83 {R_028844_SQ_PGM_RESOURCES_PS, 0, 0},
84 {R_02884C_SQ_PGM_EXPORTS_PS, 0, 0},
85 {R_02885C_SQ_PGM_START_VS, REG_FLAG_NEED_BO, 0},
86 {R_028860_SQ_PGM_RESOURCES_VS, 0, 0},
87 };
88
89 static const struct r600_reg cayman_context_reg_list[] = {
90 {R_02861C_SPI_VS_OUT_ID_0, 0, 0},
91 {R_028620_SPI_VS_OUT_ID_1, 0, 0},
92 {R_028624_SPI_VS_OUT_ID_2, 0, 0},
93 {R_028628_SPI_VS_OUT_ID_3, 0, 0},
94 {R_02862C_SPI_VS_OUT_ID_4, 0, 0},
95 {R_028630_SPI_VS_OUT_ID_5, 0, 0},
96 {R_028634_SPI_VS_OUT_ID_6, 0, 0},
97 {R_028638_SPI_VS_OUT_ID_7, 0, 0},
98 {R_02863C_SPI_VS_OUT_ID_8, 0, 0},
99 {R_028640_SPI_VS_OUT_ID_9, 0, 0},
100 {R_028644_SPI_PS_INPUT_CNTL_0, 0, 0},
101 {R_028648_SPI_PS_INPUT_CNTL_1, 0, 0},
102 {R_02864C_SPI_PS_INPUT_CNTL_2, 0, 0},
103 {R_028650_SPI_PS_INPUT_CNTL_3, 0, 0},
104 {R_028654_SPI_PS_INPUT_CNTL_4, 0, 0},
105 {R_028658_SPI_PS_INPUT_CNTL_5, 0, 0},
106 {R_02865C_SPI_PS_INPUT_CNTL_6, 0, 0},
107 {R_028660_SPI_PS_INPUT_CNTL_7, 0, 0},
108 {R_028664_SPI_PS_INPUT_CNTL_8, 0, 0},
109 {R_028668_SPI_PS_INPUT_CNTL_9, 0, 0},
110 {R_02866C_SPI_PS_INPUT_CNTL_10, 0, 0},
111 {R_028670_SPI_PS_INPUT_CNTL_11, 0, 0},
112 {R_028674_SPI_PS_INPUT_CNTL_12, 0, 0},
113 {R_028678_SPI_PS_INPUT_CNTL_13, 0, 0},
114 {R_02867C_SPI_PS_INPUT_CNTL_14, 0, 0},
115 {R_028680_SPI_PS_INPUT_CNTL_15, 0, 0},
116 {R_028684_SPI_PS_INPUT_CNTL_16, 0, 0},
117 {R_028688_SPI_PS_INPUT_CNTL_17, 0, 0},
118 {R_02868C_SPI_PS_INPUT_CNTL_18, 0, 0},
119 {R_028690_SPI_PS_INPUT_CNTL_19, 0, 0},
120 {R_028694_SPI_PS_INPUT_CNTL_20, 0, 0},
121 {R_028698_SPI_PS_INPUT_CNTL_21, 0, 0},
122 {R_02869C_SPI_PS_INPUT_CNTL_22, 0, 0},
123 {R_0286A0_SPI_PS_INPUT_CNTL_23, 0, 0},
124 {R_0286A4_SPI_PS_INPUT_CNTL_24, 0, 0},
125 {R_0286A8_SPI_PS_INPUT_CNTL_25, 0, 0},
126 {R_0286AC_SPI_PS_INPUT_CNTL_26, 0, 0},
127 {R_0286B0_SPI_PS_INPUT_CNTL_27, 0, 0},
128 {R_0286B4_SPI_PS_INPUT_CNTL_28, 0, 0},
129 {R_0286B8_SPI_PS_INPUT_CNTL_29, 0, 0},
130 {R_0286BC_SPI_PS_INPUT_CNTL_30, 0, 0},
131 {R_0286C0_SPI_PS_INPUT_CNTL_31, 0, 0},
132 {R_0286C4_SPI_VS_OUT_CONFIG, 0, 0},
133 {R_0286CC_SPI_PS_IN_CONTROL_0, 0, 0},
134 {R_0286D0_SPI_PS_IN_CONTROL_1, 0, 0},
135 {R_0286D8_SPI_INPUT_Z, 0, 0},
136 {R_0286E0_SPI_BARYC_CNTL, 0, 0},
137 {R_0286E4_SPI_PS_IN_CONTROL_2, 0, 0},
138 {R_028840_SQ_PGM_START_PS, REG_FLAG_NEED_BO, 0},
139 {R_028844_SQ_PGM_RESOURCES_PS, 0, 0},
140 {R_02884C_SQ_PGM_EXPORTS_PS, 0, 0},
141 {R_02885C_SQ_PGM_START_VS, REG_FLAG_NEED_BO, 0},
142 {R_028860_SQ_PGM_RESOURCES_VS, 0, 0},
143 };
144
145 int evergreen_context_init(struct r600_context *ctx)
146 {
147 int r = 0;
148
149 /* add blocks */
150 if (ctx->family >= CHIP_CAYMAN)
151 r = r600_context_add_block(ctx, cayman_context_reg_list,
152 Elements(cayman_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET);
153 else
154 r = r600_context_add_block(ctx, evergreen_context_reg_list,
155 Elements(evergreen_context_reg_list), PKT3_SET_CONTEXT_REG, EVERGREEN_CONTEXT_REG_OFFSET);
156 if (r)
157 goto out_err;
158
159 r = r600_setup_block_table(ctx);
160 if (r)
161 goto out_err;
162
163 ctx->max_db = 8;
164 return 0;
165 out_err:
166 r600_context_fini(ctx);
167 return r;
168 }
169
170 void evergreen_flush_vgt_streamout(struct r600_context *ctx)
171 {
172 struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
173
174 r600_write_config_reg(cs, R_0084FC_CP_STRMOUT_CNTL, 0);
175
176 cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
177 cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0);
178
179 cs->buf[cs->cdw++] = PKT3(PKT3_WAIT_REG_MEM, 5, 0);
180 cs->buf[cs->cdw++] = WAIT_REG_MEM_EQUAL; /* wait until the register is equal to the reference value */
181 cs->buf[cs->cdw++] = R_0084FC_CP_STRMOUT_CNTL >> 2; /* register */
182 cs->buf[cs->cdw++] = 0;
183 cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* reference value */
184 cs->buf[cs->cdw++] = S_0084FC_OFFSET_UPDATE_DONE(1); /* mask */
185 cs->buf[cs->cdw++] = 4; /* poll interval */
186 }
187
188 void evergreen_set_streamout_enable(struct r600_context *ctx, unsigned buffer_enable_bit)
189 {
190 struct radeon_winsys_cs *cs = ctx->rings.gfx.cs;
191
192 if (buffer_enable_bit) {
193 r600_write_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
194 r600_write_value(cs, S_028B94_STREAMOUT_0_EN(1)); /* R_028B94_VGT_STRMOUT_CONFIG */
195 r600_write_value(cs, S_028B98_STREAM_0_BUFFER_EN(buffer_enable_bit)); /* R_028B98_VGT_STRMOUT_BUFFER_CONFIG */
196 } else {
197 r600_write_context_reg(cs, R_028B94_VGT_STRMOUT_CONFIG, S_028B94_STREAMOUT_0_EN(0));
198 }
199 }
200
201 void evergreen_dma_copy(struct r600_context *rctx,
202 struct pipe_resource *dst,
203 struct pipe_resource *src,
204 uint64_t dst_offset,
205 uint64_t src_offset,
206 uint64_t size)
207 {
208 struct radeon_winsys_cs *cs = rctx->rings.dma.cs;
209 unsigned i, ncopy, csize, sub_cmd, shift;
210 struct r600_resource *rdst = (struct r600_resource*)dst;
211 struct r600_resource *rsrc = (struct r600_resource*)src;
212
213 /* make sure that the dma ring is only one active */
214 rctx->rings.gfx.flush(rctx, RADEON_FLUSH_ASYNC);
215 dst_offset += r600_resource_va(&rctx->screen->screen, dst);
216 src_offset += r600_resource_va(&rctx->screen->screen, src);
217
218 /* see if we use dword or byte copy */
219 if (!(dst_offset & 0x3) && !(src_offset & 0x3) && !(size & 0x3)) {
220 size >>= 2;
221 sub_cmd = 0x00;
222 shift = 2;
223 } else {
224 sub_cmd = 0x40;
225 shift = 0;
226 }
227 ncopy = (size / 0x000fffff) + !!(size % 0x000fffff);
228
229 r600_need_dma_space(rctx, ncopy * 5);
230 for (i = 0; i < ncopy; i++) {
231 csize = size < 0x000fffff ? size : 0x000fffff;
232 /* emit reloc before writting cs so that cs is always in consistent state */
233 r600_context_bo_reloc(rctx, &rctx->rings.dma, rsrc, RADEON_USAGE_READ);
234 r600_context_bo_reloc(rctx, &rctx->rings.dma, rdst, RADEON_USAGE_WRITE);
235 cs->buf[cs->cdw++] = DMA_PACKET(DMA_PACKET_COPY, sub_cmd, csize);
236 cs->buf[cs->cdw++] = dst_offset & 0xffffffff;
237 cs->buf[cs->cdw++] = src_offset & 0xffffffff;
238 cs->buf[cs->cdw++] = (dst_offset >> 32UL) & 0xff;
239 cs->buf[cs->cdw++] = (src_offset >> 32UL) & 0xff;
240 dst_offset += csize << shift;
241 src_offset += csize << shift;
242 size -= csize;
243 }
244
245 util_range_add(&rdst->valid_buffer_range, dst_offset,
246 dst_offset + size);
247 }