2 * Copyright 2013 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Marek Olšák <marek.olsak@amd.com>
27 #include "radeonsi_pipe.h"
28 #include "radeonsi_resource.h"
29 #include "radeonsi_shader.h"
30 #include "r600_hw_context_priv.h"
32 #include "util/u_memory.h"
34 #define SI_NUM_CONTEXTS 256
36 static const uint32_t null_desc
[8]; /* zeros */
38 /* Set this if you want the 3D engine to wait until CP DMA is done.
39 * It should be set on the last CP DMA packet. */
40 #define R600_CP_DMA_SYNC (1 << 0) /* R600+ */
42 /* Set this if the source data was used as a destination in a previous CP DMA
43 * packet. It's for preventing a read-after-write (RAW) hazard between two
45 #define SI_CP_DMA_RAW_WAIT (1 << 1) /* SI+ */
47 /* Emit a CP DMA packet to do a copy from one buffer to another.
48 * The size must fit in bits [20:0]. Notes:
50 static void si_emit_cp_dma_copy_buffer(struct r600_context
*rctx
,
51 uint64_t dst_va
, uint64_t src_va
,
52 unsigned size
, unsigned flags
)
54 struct radeon_winsys_cs
*cs
= rctx
->cs
;
55 uint32_t sync_flag
= flags
& R600_CP_DMA_SYNC
? PKT3_CP_DMA_CP_SYNC
: 0;
56 uint32_t raw_wait
= flags
& SI_CP_DMA_RAW_WAIT
? PKT3_CP_DMA_CMD_RAW_WAIT
: 0;
59 assert((size
& ((1<<21)-1)) == size
);
61 if (rctx
->chip_class
>= CIK
) {
62 radeon_emit(cs
, PKT3(PKT3_DMA_DATA
, 5, 0));
63 radeon_emit(cs
, sync_flag
); /* CP_SYNC [31] */
64 radeon_emit(cs
, src_va
); /* SRC_ADDR_LO [31:0] */
65 radeon_emit(cs
, src_va
>> 32); /* SRC_ADDR_HI [31:0] */
66 radeon_emit(cs
, dst_va
); /* DST_ADDR_LO [31:0] */
67 radeon_emit(cs
, dst_va
>> 32); /* DST_ADDR_HI [31:0] */
68 radeon_emit(cs
, size
| raw_wait
); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
70 radeon_emit(cs
, PKT3(PKT3_CP_DMA
, 4, 0));
71 radeon_emit(cs
, src_va
); /* SRC_ADDR_LO [31:0] */
72 radeon_emit(cs
, sync_flag
| ((src_va
>> 32) & 0xffff)); /* CP_SYNC [31] | SRC_ADDR_HI [15:0] */
73 radeon_emit(cs
, dst_va
); /* DST_ADDR_LO [31:0] */
74 radeon_emit(cs
, (dst_va
>> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
75 radeon_emit(cs
, size
| raw_wait
); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
79 /* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
80 static void si_emit_cp_dma_clear_buffer(struct r600_context
*rctx
,
81 uint64_t dst_va
, unsigned size
,
82 uint32_t clear_value
, unsigned flags
)
84 struct radeon_winsys_cs
*cs
= rctx
->cs
;
85 uint32_t sync_flag
= flags
& R600_CP_DMA_SYNC
? PKT3_CP_DMA_CP_SYNC
: 0;
86 uint32_t raw_wait
= flags
& SI_CP_DMA_RAW_WAIT
? PKT3_CP_DMA_CMD_RAW_WAIT
: 0;
89 assert((size
& ((1<<21)-1)) == size
);
91 if (rctx
->chip_class
>= CIK
) {
92 radeon_emit(cs
, PKT3(PKT3_DMA_DATA
, 5, 0));
93 radeon_emit(cs
, sync_flag
| PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
94 radeon_emit(cs
, clear_value
); /* DATA [31:0] */
96 radeon_emit(cs
, dst_va
); /* DST_ADDR_LO [31:0] */
97 radeon_emit(cs
, dst_va
>> 32); /* DST_ADDR_HI [15:0] */
98 radeon_emit(cs
, size
| raw_wait
); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
100 radeon_emit(cs
, PKT3(PKT3_CP_DMA
, 4, 0));
101 radeon_emit(cs
, clear_value
); /* DATA [31:0] */
102 radeon_emit(cs
, sync_flag
| PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
103 radeon_emit(cs
, dst_va
); /* DST_ADDR_LO [31:0] */
104 radeon_emit(cs
, (dst_va
>> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
105 radeon_emit(cs
, size
| raw_wait
); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
109 static void si_init_descriptors(struct r600_context
*rctx
,
110 struct si_descriptors
*desc
,
111 unsigned shader_userdata_reg
,
112 unsigned element_dw_size
,
113 unsigned num_elements
,
114 void (*emit_func
)(struct r600_context
*ctx
, struct si_atom
*state
))
118 assert(num_elements
<= sizeof(desc
->enabled_mask
)*8);
119 assert(num_elements
<= sizeof(desc
->dirty_mask
)*8);
121 desc
->atom
.emit
= emit_func
;
122 desc
->shader_userdata_reg
= shader_userdata_reg
;
123 desc
->element_dw_size
= element_dw_size
;
124 desc
->num_elements
= num_elements
;
125 desc
->context_size
= num_elements
* element_dw_size
* 4;
127 desc
->buffer
= (struct si_resource
*)
128 pipe_buffer_create(rctx
->context
.screen
, PIPE_BIND_CUSTOM
,
130 SI_NUM_CONTEXTS
* desc
->context_size
);
132 r600_context_bo_reloc(rctx
, desc
->buffer
, RADEON_USAGE_READWRITE
);
133 va
= r600_resource_va(rctx
->context
.screen
, &desc
->buffer
->b
.b
);
135 /* We don't check for CS space here, because this should be called
136 * only once at context initialization. */
137 si_emit_cp_dma_clear_buffer(rctx
, va
, desc
->buffer
->b
.b
.width0
, 0,
141 static void si_release_descriptors(struct si_descriptors
*desc
)
143 pipe_resource_reference((struct pipe_resource
**)&desc
->buffer
, NULL
);
146 static void si_update_descriptors(struct si_descriptors
*desc
)
148 if (desc
->dirty_mask
) {
151 (4 + desc
->element_dw_size
) * util_bitcount(desc
->dirty_mask
) + /* update */
152 4; /* pointer update */
153 desc
->atom
.dirty
= true;
155 desc
->atom
.dirty
= false;
159 static void si_emit_shader_pointer(struct r600_context
*rctx
,
160 struct si_descriptors
*desc
)
162 struct radeon_winsys_cs
*cs
= rctx
->cs
;
163 uint64_t va
= r600_resource_va(rctx
->context
.screen
, &desc
->buffer
->b
.b
) +
164 desc
->current_context_id
* desc
->context_size
;
166 radeon_emit(cs
, PKT3(PKT3_SET_SH_REG
, 2, 0));
167 radeon_emit(cs
, (desc
->shader_userdata_reg
- SI_SH_REG_OFFSET
) >> 2);
169 radeon_emit(cs
, va
>> 32);
172 static void si_emit_descriptors(struct r600_context
*rctx
,
173 struct si_descriptors
*desc
,
174 const uint32_t **descriptors
)
176 struct radeon_winsys_cs
*cs
= rctx
->cs
;
180 int last_index
= desc
->num_elements
; /* point to a non-existing element */
181 unsigned dirty_mask
= desc
->dirty_mask
;
182 unsigned new_context_id
= (desc
->current_context_id
+ 1) % SI_NUM_CONTEXTS
;
186 va_base
= r600_resource_va(rctx
->context
.screen
, &desc
->buffer
->b
.b
);
188 /* Copy the descriptors to a new context slot. */
189 si_emit_cp_dma_copy_buffer(rctx
,
190 va_base
+ new_context_id
* desc
->context_size
,
191 va_base
+ desc
->current_context_id
* desc
->context_size
,
192 desc
->context_size
, R600_CP_DMA_SYNC
);
194 va_base
+= new_context_id
* desc
->context_size
;
196 /* Update the descriptors.
197 * Updates of consecutive descriptors are merged to one WRITE_DATA packet.
199 * XXX When unbinding lots of resources, consider clearing the memory
200 * with CP DMA instead of emitting zeros.
203 int i
= u_bit_scan(&dirty_mask
);
205 assert(i
< desc
->num_elements
);
207 if (last_index
+1 == i
&& packet_size
) {
208 /* Append new data at the end of the last packet. */
209 packet_size
+= desc
->element_dw_size
;
210 cs
->buf
[packet_start
] = PKT3(PKT3_WRITE_DATA
, packet_size
, 0);
212 /* Start a new packet. */
213 uint64_t va
= va_base
+ i
* desc
->element_dw_size
* 4;
215 packet_start
= cs
->cdw
;
216 packet_size
= 2 + desc
->element_dw_size
;
218 radeon_emit(cs
, PKT3(PKT3_WRITE_DATA
, packet_size
, 0));
219 radeon_emit(cs
, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC
) |
220 PKT3_WRITE_DATA_WR_CONFIRM
|
221 PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME
));
222 radeon_emit(cs
, va
& 0xFFFFFFFFUL
);
223 radeon_emit(cs
, (va
>> 32UL) & 0xFFFFFFFFUL
);
226 radeon_emit_array(cs
, descriptors
[i
], desc
->element_dw_size
);
231 desc
->dirty_mask
= 0;
232 desc
->current_context_id
= new_context_id
;
234 /* Now update the shader userdata pointer. */
235 si_emit_shader_pointer(rctx
, desc
);
238 static unsigned si_get_shader_user_data_base(unsigned shader
)
241 case PIPE_SHADER_VERTEX
:
242 return R_00B130_SPI_SHADER_USER_DATA_VS_0
;
243 case PIPE_SHADER_GEOMETRY
:
244 return R_00B230_SPI_SHADER_USER_DATA_GS_0
;
245 case PIPE_SHADER_FRAGMENT
:
246 return R_00B030_SPI_SHADER_USER_DATA_PS_0
;
255 static void si_emit_sampler_views(struct r600_context
*rctx
, struct si_atom
*atom
)
257 struct si_sampler_views
*views
= (struct si_sampler_views
*)atom
;
259 si_emit_descriptors(rctx
, &views
->desc
, views
->desc_data
);
262 static void si_init_sampler_views(struct r600_context
*rctx
,
263 struct si_sampler_views
*views
,
266 si_init_descriptors(rctx
, &views
->desc
,
267 si_get_shader_user_data_base(shader
) +
268 SI_SGPR_RESOURCE
* 4,
269 8, NUM_SAMPLER_VIEWS
, si_emit_sampler_views
);
272 static void si_release_sampler_views(struct si_sampler_views
*views
)
276 for (i
= 0; i
< Elements(views
->views
); i
++) {
277 pipe_sampler_view_reference(&views
->views
[i
], NULL
);
279 si_release_descriptors(&views
->desc
);
282 static void si_sampler_views_begin_new_cs(struct r600_context
*rctx
,
283 struct si_sampler_views
*views
)
285 unsigned mask
= views
->desc
.enabled_mask
;
287 /* Add relocations to the CS. */
289 int i
= u_bit_scan(&mask
);
290 struct si_pipe_sampler_view
*rview
=
291 (struct si_pipe_sampler_view
*)views
->views
[i
];
293 r600_context_bo_reloc(rctx
, rview
->resource
, RADEON_USAGE_READ
);
296 r600_context_bo_reloc(rctx
, views
->desc
.buffer
, RADEON_USAGE_READWRITE
);
298 si_emit_shader_pointer(rctx
, &views
->desc
);
301 void si_set_sampler_view(struct r600_context
*rctx
, unsigned shader
,
302 unsigned slot
, struct pipe_sampler_view
*view
,
305 struct si_sampler_views
*views
= &rctx
->samplers
[shader
].views
;
307 if (views
->views
[slot
] == view
)
311 struct si_pipe_sampler_view
*rview
=
312 (struct si_pipe_sampler_view
*)view
;
314 r600_context_bo_reloc(rctx
, rview
->resource
, RADEON_USAGE_READ
);
316 pipe_sampler_view_reference(&views
->views
[slot
], view
);
317 views
->desc_data
[slot
] = view_desc
;
318 views
->desc
.enabled_mask
|= 1 << slot
;
320 pipe_sampler_view_reference(&views
->views
[slot
], NULL
);
321 views
->desc_data
[slot
] = null_desc
;
322 views
->desc
.enabled_mask
&= ~(1 << slot
);
325 views
->desc
.dirty_mask
|= 1 << slot
;
326 si_update_descriptors(&views
->desc
);
331 void si_init_all_descriptors(struct r600_context
*rctx
)
335 for (i
= 0; i
< SI_NUM_SHADERS
; i
++) {
336 si_init_sampler_views(rctx
, &rctx
->samplers
[i
].views
, i
);
338 rctx
->atoms
.sampler_views
[i
] = &rctx
->samplers
[i
].views
.desc
.atom
;
342 void si_release_all_descriptors(struct r600_context
*rctx
)
346 for (i
= 0; i
< SI_NUM_SHADERS
; i
++) {
347 si_release_sampler_views(&rctx
->samplers
[i
].views
);
351 void si_all_descriptors_begin_new_cs(struct r600_context
*rctx
)
355 for (i
= 0; i
< SI_NUM_SHADERS
; i
++) {
356 si_sampler_views_begin_new_cs(rctx
, &rctx
->samplers
[i
].views
);