2 * Copyright 2016 Red Hat.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 #include "sp_context.h"
25 #include "sp_buffer.h"
26 #include "sp_texture.h"
28 #include "util/format/u_format.h"
31 get_dimensions(const struct pipe_shader_buffer
*bview
,
32 const struct softpipe_resource
*spr
,
35 *width
= bview
->buffer_size
;
37 * Bounds check the buffer size from the view
38 * and the buffer size from the underlying buffer.
40 if (*width
> spr
->base
.width0
)
46 * Implement the image LOAD operation.
49 sp_tgsi_load(const struct tgsi_buffer
*buffer
,
50 const struct tgsi_buffer_params
*params
,
51 const int s
[TGSI_QUAD_SIZE
],
52 float rgba
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
])
54 struct sp_tgsi_buffer
*sp_buf
= (struct sp_tgsi_buffer
*)buffer
;
55 struct pipe_shader_buffer
*bview
;
56 struct softpipe_resource
*spr
;
60 if (params
->unit
>= PIPE_MAX_SHADER_BUFFERS
)
61 goto fail_write_all_zero
;
63 bview
= &sp_buf
->sp_bview
[params
->unit
];
64 spr
= softpipe_resource(bview
->buffer
);
66 goto fail_write_all_zero
;
68 if (!get_dimensions(bview
, spr
, &width
))
71 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
73 bool fill_zero
= false;
75 if (!(params
->execmask
& (1 << j
)))
83 for (c
= 0; c
< 4; c
++)
87 uint32_t *src
= (uint32_t *)((unsigned char *)spr
->data
+
88 bview
->buffer_offset
+ s_coord
);
89 for (c
= 0; c
< 4; c
++) {
90 memcpy(&rgba
[c
][j
], &src
[c
], 4);
95 memset(rgba
, 0, TGSI_NUM_CHANNELS
* TGSI_QUAD_SIZE
* 4);
100 * Implement the buffer STORE operation.
103 sp_tgsi_store(const struct tgsi_buffer
*buffer
,
104 const struct tgsi_buffer_params
*params
,
105 const int s
[TGSI_QUAD_SIZE
],
106 float rgba
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
])
108 struct sp_tgsi_buffer
*sp_buf
= (struct sp_tgsi_buffer
*)buffer
;
109 struct pipe_shader_buffer
*bview
;
110 struct softpipe_resource
*spr
;
114 if (params
->unit
>= PIPE_MAX_SHADER_BUFFERS
)
117 bview
= &sp_buf
->sp_bview
[params
->unit
];
118 spr
= softpipe_resource(bview
->buffer
);
122 if (!get_dimensions(bview
, spr
, &width
))
125 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
128 if (!(params
->execmask
& (1 << j
)))
132 if (s_coord
>= width
)
135 uint32_t *dst
= (uint32_t *)((unsigned char *)spr
->data
+
136 bview
->buffer_offset
+ s_coord
);
138 for (c
= 0; c
< 4; c
++) {
139 if (params
->writemask
& (1 << c
))
140 memcpy(&dst
[c
], &rgba
[c
][j
], 4);
146 * Implement atomic operations on unsigned integers.
149 handle_op_atomic(const struct pipe_shader_buffer
*bview
,
151 unsigned char *data_ptr
,
153 enum tgsi_opcode opcode
,
155 float rgba
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
],
156 float rgba2
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
])
161 for (c
= 0; c
< 4; c
++) {
162 memcpy(&sdata
[c
], data_ptr
+ (c
* 4), 4);
166 for (c
= 0; c
< 4; c
++) {
167 ((uint32_t *)rgba
[c
])[qi
] = sdata
[c
];
173 case TGSI_OPCODE_ATOMUADD
:
174 for (c
= 0; c
< 4; c
++) {
175 unsigned temp
= sdata
[c
];
176 sdata
[c
] += ((uint32_t *)rgba
[c
])[qi
];
177 ((uint32_t *)rgba
[c
])[qi
] = temp
;
180 case TGSI_OPCODE_ATOMXCHG
:
181 for (c
= 0; c
< 4; c
++) {
182 unsigned temp
= sdata
[c
];
183 sdata
[c
] = ((uint32_t *)rgba
[c
])[qi
];
184 ((uint32_t *)rgba
[c
])[qi
] = temp
;
187 case TGSI_OPCODE_ATOMCAS
:
188 for (c
= 0; c
< 4; c
++) {
189 unsigned dst_x
= sdata
[c
];
190 unsigned cmp_x
= ((uint32_t *)rgba
[c
])[qi
];
191 unsigned src_x
= ((uint32_t *)rgba2
[c
])[qi
];
192 unsigned temp
= sdata
[c
];
193 sdata
[c
] = (dst_x
== cmp_x
) ? src_x
: dst_x
;
194 ((uint32_t *)rgba
[c
])[qi
] = temp
;
197 case TGSI_OPCODE_ATOMAND
:
198 for (c
= 0; c
< 4; c
++) {
199 unsigned temp
= sdata
[c
];
200 sdata
[c
] &= ((uint32_t *)rgba
[c
])[qi
];
201 ((uint32_t *)rgba
[c
])[qi
] = temp
;
204 case TGSI_OPCODE_ATOMOR
:
205 for (c
= 0; c
< 4; c
++) {
206 unsigned temp
= sdata
[c
];
207 sdata
[c
] |= ((uint32_t *)rgba
[c
])[qi
];
208 ((uint32_t *)rgba
[c
])[qi
] = temp
;
211 case TGSI_OPCODE_ATOMXOR
:
212 for (c
= 0; c
< 4; c
++) {
213 unsigned temp
= sdata
[c
];
214 sdata
[c
] ^= ((uint32_t *)rgba
[c
])[qi
];
215 ((uint32_t *)rgba
[c
])[qi
] = temp
;
218 case TGSI_OPCODE_ATOMUMIN
:
219 for (c
= 0; c
< 4; c
++) {
220 unsigned dst_x
= sdata
[c
];
221 unsigned src_x
= ((uint32_t *)rgba
[c
])[qi
];
222 sdata
[c
] = MIN2(dst_x
, src_x
);
223 ((uint32_t *)rgba
[c
])[qi
] = dst_x
;
226 case TGSI_OPCODE_ATOMUMAX
:
227 for (c
= 0; c
< 4; c
++) {
228 unsigned dst_x
= sdata
[c
];
229 unsigned src_x
= ((uint32_t *)rgba
[c
])[qi
];
230 sdata
[c
] = MAX2(dst_x
, src_x
);
231 ((uint32_t *)rgba
[c
])[qi
] = dst_x
;
234 case TGSI_OPCODE_ATOMIMIN
:
235 for (c
= 0; c
< 4; c
++) {
236 int dst_x
= sdata
[c
];
237 int src_x
= ((uint32_t *)rgba
[c
])[qi
];
238 sdata
[c
] = MIN2(dst_x
, src_x
);
239 ((uint32_t *)rgba
[c
])[qi
] = dst_x
;
242 case TGSI_OPCODE_ATOMIMAX
:
243 for (c
= 0; c
< 4; c
++) {
244 int dst_x
= sdata
[c
];
245 int src_x
= ((uint32_t *)rgba
[c
])[qi
];
246 sdata
[c
] = MAX2(dst_x
, src_x
);
247 ((uint32_t *)rgba
[c
])[qi
] = dst_x
;
250 case TGSI_OPCODE_ATOMFADD
:
251 for (c
= 0; c
< 4; c
++) {
252 float temp
= uif(sdata
[c
]);
253 sdata
[c
] = fui(temp
+ rgba
[c
][qi
]);
258 assert(!"Unexpected TGSI opcode in sp_tgsi_op");
262 for (c
= 0; c
< 4; c
++) {
263 if (writemask
& (1 << c
)) {
264 memcpy(data_ptr
+ (c
* 4), &sdata
[c
], 4);
270 * Implement atomic buffer operations.
273 sp_tgsi_op(const struct tgsi_buffer
*buffer
,
274 const struct tgsi_buffer_params
*params
,
275 enum tgsi_opcode opcode
,
276 const int s
[TGSI_QUAD_SIZE
],
277 float rgba
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
],
278 float rgba2
[TGSI_NUM_CHANNELS
][TGSI_QUAD_SIZE
])
280 struct sp_tgsi_buffer
*sp_buf
= (struct sp_tgsi_buffer
*)buffer
;
281 struct pipe_shader_buffer
*bview
;
282 struct softpipe_resource
*spr
;
285 unsigned char *data_ptr
;
287 if (params
->unit
>= PIPE_MAX_SHADER_BUFFERS
)
290 bview
= &sp_buf
->sp_bview
[params
->unit
];
291 spr
= softpipe_resource(bview
->buffer
);
293 goto fail_write_all_zero
;
295 if (!get_dimensions(bview
, spr
, &width
))
296 goto fail_write_all_zero
;
298 for (j
= 0; j
< TGSI_QUAD_SIZE
; j
++) {
300 bool just_read
= false;
303 if (s_coord
>= width
) {
304 for (c
= 0; c
< 4; c
++) {
310 /* just readback the value for atomic if execmask isn't set */
311 if (!(params
->execmask
& (1 << j
))) {
315 data_ptr
= (unsigned char *)spr
->data
+ bview
->buffer_offset
+ s_coord
;
316 /* we should see atomic operations on r32 formats */
318 handle_op_atomic(bview
, just_read
, data_ptr
, j
,
319 opcode
, params
->writemask
, rgba
, rgba2
);
323 memset(rgba
, 0, TGSI_NUM_CHANNELS
* TGSI_QUAD_SIZE
* 4);
328 * return size of the attached buffer for RESQ opcode.
331 sp_tgsi_get_dims(const struct tgsi_buffer
*buffer
,
332 const struct tgsi_buffer_params
*params
,
335 struct sp_tgsi_buffer
*sp_buf
= (struct sp_tgsi_buffer
*)buffer
;
336 struct pipe_shader_buffer
*bview
;
337 struct softpipe_resource
*spr
;
339 if (params
->unit
>= PIPE_MAX_SHADER_BUFFERS
)
342 bview
= &sp_buf
->sp_bview
[params
->unit
];
343 spr
= softpipe_resource(bview
->buffer
);
347 *dim
= bview
->buffer_size
;
350 struct sp_tgsi_buffer
*
351 sp_create_tgsi_buffer(void)
353 struct sp_tgsi_buffer
*buf
= CALLOC_STRUCT(sp_tgsi_buffer
);
357 buf
->base
.load
= sp_tgsi_load
;
358 buf
->base
.store
= sp_tgsi_store
;
359 buf
->base
.op
= sp_tgsi_op
;
360 buf
->base
.get_dims
= sp_tgsi_get_dims
;