2 * Copyright 2013 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Marek Olšák <marek.olsak@amd.com>
27 #include "radeonsi_pipe.h"
28 #include "radeonsi_resource.h"
29 #include "radeonsi_shader.h"
30 #include "r600_hw_context_priv.h"
32 #include "util/u_memory.h"
34 #define SI_NUM_CONTEXTS 256
36 static const uint32_t null_desc
[8]; /* zeros */
38 /* Set this if you want the 3D engine to wait until CP DMA is done.
39 * It should be set on the last CP DMA packet. */
40 #define R600_CP_DMA_SYNC (1 << 0) /* R600+ */
42 /* Set this if the source data was used as a destination in a previous CP DMA
43 * packet. It's for preventing a read-after-write (RAW) hazard between two
45 #define SI_CP_DMA_RAW_WAIT (1 << 1) /* SI+ */
47 /* Emit a CP DMA packet to do a copy from one buffer to another.
48 * The size must fit in bits [20:0]. Notes:
50 static void si_emit_cp_dma_copy_buffer(struct r600_context
*rctx
,
51 uint64_t dst_va
, uint64_t src_va
,
52 unsigned size
, unsigned flags
)
54 struct radeon_winsys_cs
*cs
= rctx
->cs
;
55 uint32_t sync_flag
= flags
& R600_CP_DMA_SYNC
? PKT3_CP_DMA_CP_SYNC
: 0;
56 uint32_t raw_wait
= flags
& SI_CP_DMA_RAW_WAIT
? PKT3_CP_DMA_CMD_RAW_WAIT
: 0;
59 assert((size
& ((1<<21)-1)) == size
);
61 if (rctx
->chip_class
>= CIK
) {
62 radeon_emit(cs
, PKT3(PKT3_DMA_DATA
, 5, 0));
63 radeon_emit(cs
, sync_flag
); /* CP_SYNC [31] */
64 radeon_emit(cs
, src_va
); /* SRC_ADDR_LO [31:0] */
65 radeon_emit(cs
, src_va
>> 32); /* SRC_ADDR_HI [31:0] */
66 radeon_emit(cs
, dst_va
); /* DST_ADDR_LO [31:0] */
67 radeon_emit(cs
, dst_va
>> 32); /* DST_ADDR_HI [31:0] */
68 radeon_emit(cs
, size
| raw_wait
); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
70 radeon_emit(cs
, PKT3(PKT3_CP_DMA
, 4, 0));
71 radeon_emit(cs
, src_va
); /* SRC_ADDR_LO [31:0] */
72 radeon_emit(cs
, sync_flag
| ((src_va
>> 32) & 0xffff)); /* CP_SYNC [31] | SRC_ADDR_HI [15:0] */
73 radeon_emit(cs
, dst_va
); /* DST_ADDR_LO [31:0] */
74 radeon_emit(cs
, (dst_va
>> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
75 radeon_emit(cs
, size
| raw_wait
); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
79 /* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
80 static void si_emit_cp_dma_clear_buffer(struct r600_context
*rctx
,
81 uint64_t dst_va
, unsigned size
,
82 uint32_t clear_value
, unsigned flags
)
84 struct radeon_winsys_cs
*cs
= rctx
->cs
;
85 uint32_t sync_flag
= flags
& R600_CP_DMA_SYNC
? PKT3_CP_DMA_CP_SYNC
: 0;
86 uint32_t raw_wait
= flags
& SI_CP_DMA_RAW_WAIT
? PKT3_CP_DMA_CMD_RAW_WAIT
: 0;
89 assert((size
& ((1<<21)-1)) == size
);
91 if (rctx
->chip_class
>= CIK
) {
92 radeon_emit(cs
, PKT3(PKT3_DMA_DATA
, 5, 0));
93 radeon_emit(cs
, sync_flag
| PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
94 radeon_emit(cs
, clear_value
); /* DATA [31:0] */
96 radeon_emit(cs
, dst_va
); /* DST_ADDR_LO [31:0] */
97 radeon_emit(cs
, dst_va
>> 32); /* DST_ADDR_HI [15:0] */
98 radeon_emit(cs
, size
| raw_wait
); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
100 radeon_emit(cs
, PKT3(PKT3_CP_DMA
, 4, 0));
101 radeon_emit(cs
, clear_value
); /* DATA [31:0] */
102 radeon_emit(cs
, sync_flag
| PKT3_CP_DMA_SRC_SEL(2)); /* CP_SYNC [31] | SRC_SEL[30:29] */
103 radeon_emit(cs
, dst_va
); /* DST_ADDR_LO [31:0] */
104 radeon_emit(cs
, (dst_va
>> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
105 radeon_emit(cs
, size
| raw_wait
); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
109 static void si_init_descriptors(struct r600_context
*rctx
,
110 struct si_descriptors
*desc
,
111 unsigned shader_userdata_reg
,
112 unsigned element_dw_size
,
113 unsigned num_elements
,
114 void (*emit_func
)(struct r600_context
*ctx
, struct si_atom
*state
))
118 desc
->atom
.emit
= emit_func
;
119 desc
->shader_userdata_reg
= shader_userdata_reg
;
120 desc
->element_dw_size
= element_dw_size
;
121 desc
->num_elements
= num_elements
;
122 desc
->context_size
= num_elements
* element_dw_size
* 4;
124 desc
->buffer
= (struct si_resource
*)
125 pipe_buffer_create(rctx
->context
.screen
, PIPE_BIND_CUSTOM
,
127 SI_NUM_CONTEXTS
* desc
->context_size
);
129 r600_context_bo_reloc(rctx
, desc
->buffer
, RADEON_USAGE_READWRITE
);
130 va
= r600_resource_va(rctx
->context
.screen
, &desc
->buffer
->b
.b
);
132 /* We don't check for CS space here, because this should be called
133 * only once at context initialization. */
134 si_emit_cp_dma_clear_buffer(rctx
, va
, desc
->buffer
->b
.b
.width0
, 0,
138 static void si_release_descriptors(struct si_descriptors
*desc
)
140 pipe_resource_reference((struct pipe_resource
**)&desc
->buffer
, NULL
);
143 static void si_update_descriptors(struct si_descriptors
*desc
)
145 if (desc
->dirty_mask
) {
148 (4 + desc
->element_dw_size
) * util_bitcount(desc
->dirty_mask
) + /* update */
149 4; /* pointer update */
150 desc
->atom
.dirty
= true;
152 desc
->atom
.dirty
= false;
156 static void si_emit_shader_pointer(struct r600_context
*rctx
,
157 struct si_descriptors
*desc
)
159 struct radeon_winsys_cs
*cs
= rctx
->cs
;
160 uint64_t va
= r600_resource_va(rctx
->context
.screen
, &desc
->buffer
->b
.b
) +
161 desc
->current_context_id
* desc
->context_size
;
163 radeon_emit(cs
, PKT3(PKT3_SET_SH_REG
, 2, 0));
164 radeon_emit(cs
, (desc
->shader_userdata_reg
- SI_SH_REG_OFFSET
) >> 2);
166 radeon_emit(cs
, va
>> 32);
169 static void si_emit_descriptors(struct r600_context
*rctx
,
170 struct si_descriptors
*desc
,
171 const uint32_t **descriptors
)
173 struct radeon_winsys_cs
*cs
= rctx
->cs
;
177 int last_index
= desc
->num_elements
; /* point to a non-existing element */
178 unsigned dirty_mask
= desc
->dirty_mask
;
179 unsigned new_context_id
= (desc
->current_context_id
+ 1) % SI_NUM_CONTEXTS
;
183 va_base
= r600_resource_va(rctx
->context
.screen
, &desc
->buffer
->b
.b
);
185 /* Copy the descriptors to a new context slot. */
186 si_emit_cp_dma_copy_buffer(rctx
,
187 va_base
+ new_context_id
* desc
->context_size
,
188 va_base
+ desc
->current_context_id
* desc
->context_size
,
189 desc
->context_size
, R600_CP_DMA_SYNC
);
191 va_base
+= new_context_id
* desc
->context_size
;
193 /* Update the descriptors.
194 * Updates of consecutive descriptors are merged to one WRITE_DATA packet.
196 * XXX When unbinding lots of resources, consider clearing the memory
197 * with CP DMA instead of emitting zeros.
200 int i
= u_bit_scan(&dirty_mask
);
202 assert(i
< desc
->num_elements
);
204 if (last_index
+1 == i
&& packet_size
) {
205 /* Append new data at the end of the last packet. */
206 packet_size
+= desc
->element_dw_size
;
207 cs
->buf
[packet_start
] = PKT3(PKT3_WRITE_DATA
, packet_size
, 0);
209 /* Start a new packet. */
210 uint64_t va
= va_base
+ i
* desc
->element_dw_size
* 4;
212 packet_start
= cs
->cdw
;
213 packet_size
= 2 + desc
->element_dw_size
;
215 radeon_emit(cs
, PKT3(PKT3_WRITE_DATA
, packet_size
, 0));
216 radeon_emit(cs
, PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC
) |
217 PKT3_WRITE_DATA_WR_CONFIRM
|
218 PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME
));
219 radeon_emit(cs
, va
& 0xFFFFFFFFUL
);
220 radeon_emit(cs
, (va
>> 32UL) & 0xFFFFFFFFUL
);
223 radeon_emit_array(cs
, descriptors
[i
], desc
->element_dw_size
);
228 desc
->dirty_mask
= 0;
229 desc
->current_context_id
= new_context_id
;
231 /* Now update the shader userdata pointer. */
232 si_emit_shader_pointer(rctx
, desc
);
235 static unsigned si_get_shader_user_data_base(unsigned shader
)
238 case PIPE_SHADER_VERTEX
:
239 return R_00B130_SPI_SHADER_USER_DATA_VS_0
;
240 case PIPE_SHADER_GEOMETRY
:
241 return R_00B230_SPI_SHADER_USER_DATA_GS_0
;
242 case PIPE_SHADER_FRAGMENT
:
243 return R_00B030_SPI_SHADER_USER_DATA_PS_0
;
252 static void si_emit_sampler_views(struct r600_context
*rctx
, struct si_atom
*atom
)
254 struct si_sampler_views
*views
= (struct si_sampler_views
*)atom
;
256 si_emit_descriptors(rctx
, &views
->desc
, views
->desc_data
);
259 static void si_init_sampler_views(struct r600_context
*rctx
,
260 struct si_sampler_views
*views
,
263 si_init_descriptors(rctx
, &views
->desc
,
264 si_get_shader_user_data_base(shader
) +
265 SI_SGPR_RESOURCE
* 4,
266 8, 16, si_emit_sampler_views
);
269 static void si_release_sampler_views(struct si_sampler_views
*views
)
273 for (i
= 0; i
< Elements(views
->views
); i
++) {
274 pipe_sampler_view_reference(&views
->views
[i
], NULL
);
276 si_release_descriptors(&views
->desc
);
279 static void si_sampler_views_begin_new_cs(struct r600_context
*rctx
,
280 struct si_sampler_views
*views
)
282 unsigned mask
= views
->desc
.enabled_mask
;
284 /* Add relocations to the CS. */
286 int i
= u_bit_scan(&mask
);
287 struct si_pipe_sampler_view
*rview
=
288 (struct si_pipe_sampler_view
*)views
->views
[i
];
290 r600_context_bo_reloc(rctx
, rview
->resource
, RADEON_USAGE_READ
);
293 r600_context_bo_reloc(rctx
, views
->desc
.buffer
, RADEON_USAGE_READWRITE
);
295 si_emit_shader_pointer(rctx
, &views
->desc
);
298 void si_set_sampler_view(struct r600_context
*rctx
, unsigned shader
,
299 unsigned slot
, struct pipe_sampler_view
*view
,
302 struct si_sampler_views
*views
= &rctx
->samplers
[shader
].views
;
304 if (views
->views
[slot
] == view
)
308 struct si_pipe_sampler_view
*rview
=
309 (struct si_pipe_sampler_view
*)view
;
311 r600_context_bo_reloc(rctx
, rview
->resource
, RADEON_USAGE_READ
);
313 pipe_sampler_view_reference(&views
->views
[slot
], view
);
314 views
->desc_data
[slot
] = view_desc
;
315 views
->desc
.enabled_mask
|= 1 << slot
;
317 pipe_sampler_view_reference(&views
->views
[slot
], NULL
);
318 views
->desc_data
[slot
] = null_desc
;
319 views
->desc
.enabled_mask
&= ~(1 << slot
);
322 views
->desc
.dirty_mask
|= 1 << slot
;
323 si_update_descriptors(&views
->desc
);
328 void si_init_all_descriptors(struct r600_context
*rctx
)
332 for (i
= 0; i
< SI_NUM_SHADERS
; i
++) {
333 si_init_sampler_views(rctx
, &rctx
->samplers
[i
].views
, i
);
335 rctx
->atoms
.sampler_views
[i
] = &rctx
->samplers
[i
].views
.desc
.atom
;
339 void si_release_all_descriptors(struct r600_context
*rctx
)
343 for (i
= 0; i
< SI_NUM_SHADERS
; i
++) {
344 si_release_sampler_views(&rctx
->samplers
[i
].views
);
348 void si_all_descriptors_begin_new_cs(struct r600_context
*rctx
)
352 for (i
= 0; i
< SI_NUM_SHADERS
; i
++) {
353 si_sampler_views_begin_new_cs(rctx
, &rctx
->samplers
[i
].views
);