r100/r200: add blit support for ARGB4444
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_blit.c
1 /*
2 * Copyright (C) 2010 Advanced Micro Devices, Inc.
3 *
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sublicense, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial
16 * portions of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 */
27
28 #include "radeon_common.h"
29 #include "radeon_context.h"
30 #include "radeon_blit.h"
31
32 static inline uint32_t cmdpacket0(struct radeon_screen *rscrn,
33 int reg, int count)
34 {
35 if (count)
36 return CP_PACKET0(reg, count - 1);
37 return CP_PACKET2;
38 }
39
40 static inline void emit_vtx_state(struct r100_context *r100)
41 {
42 BATCH_LOCALS(&r100->radeon);
43
44 BEGIN_BATCH(8);
45 if (r100->radeon.radeonScreen->chip_flags & RADEON_CHIPSET_TCL) {
46 OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, 0);
47 } else {
48 OUT_BATCH_REGVAL(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS);
49
50 }
51 OUT_BATCH_REGVAL(RADEON_SE_COORD_FMT, (RADEON_VTX_XY_PRE_MULT_1_OVER_W0 |
52 RADEON_TEX1_W_ROUTING_USE_W0));
53 OUT_BATCH_REGVAL(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY | RADEON_SE_VTX_FMT_ST0);
54 OUT_BATCH_REGVAL(RADEON_SE_CNTL, (RADEON_DIFFUSE_SHADE_GOURAUD |
55 RADEON_BFACE_SOLID |
56 RADEON_FFACE_SOLID |
57 RADEON_VTX_PIX_CENTER_OGL |
58 RADEON_ROUND_MODE_ROUND |
59 RADEON_ROUND_PREC_4TH_PIX));
60 END_BATCH();
61 }
62
63 static void inline emit_tx_setup(struct r100_context *r100,
64 gl_format mesa_format,
65 struct radeon_bo *bo,
66 intptr_t offset,
67 unsigned width,
68 unsigned height,
69 unsigned pitch)
70 {
71 uint32_t txformat = RADEON_TXFORMAT_NON_POWER2;
72 BATCH_LOCALS(&r100->radeon);
73
74 assert(width <= 2047);
75 assert(height <= 2047);
76 assert(offset % 32 == 0);
77
78 /* XXX others? BE/LE? */
79 switch (mesa_format) {
80 case MESA_FORMAT_ARGB8888:
81 txformat |= RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP;
82 break;
83 case MESA_FORMAT_XRGB8888:
84 txformat |= RADEON_TXFORMAT_ARGB8888;
85 break;
86 case MESA_FORMAT_RGB565:
87 txformat |= RADEON_TXFORMAT_RGB565;
88 break;
89 case MESA_FORMAT_ARGB4444:
90 txformat |= RADEON_TXFORMAT_ARGB4444 | RADEON_TXFORMAT_ALPHA_IN_MAP;
91 break;
92 case MESA_FORMAT_ARGB1555:
93 txformat |= RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP;
94 break;
95 case MESA_FORMAT_A8:
96 txformat |= RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP;
97 break;
98 default:
99 break;
100 }
101
102 BEGIN_BATCH(18);
103 OUT_BATCH_REGVAL(RADEON_PP_CNTL, RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE);
104 OUT_BATCH_REGVAL(RADEON_PP_TXCBLEND_0, (RADEON_COLOR_ARG_A_ZERO |
105 RADEON_COLOR_ARG_B_ZERO |
106 RADEON_COLOR_ARG_C_T0_COLOR |
107 RADEON_BLEND_CTL_ADD |
108 RADEON_CLAMP_TX));
109 OUT_BATCH_REGVAL(RADEON_PP_TXABLEND_0, (RADEON_ALPHA_ARG_A_ZERO |
110 RADEON_ALPHA_ARG_B_ZERO |
111 RADEON_ALPHA_ARG_C_T0_ALPHA |
112 RADEON_BLEND_CTL_ADD |
113 RADEON_CLAMP_TX));
114 OUT_BATCH_REGVAL(RADEON_PP_TXFILTER_0, (RADEON_CLAMP_S_CLAMP_LAST |
115 RADEON_CLAMP_T_CLAMP_LAST |
116 RADEON_MAG_FILTER_NEAREST |
117 RADEON_MIN_FILTER_NEAREST));
118 OUT_BATCH_REGVAL(RADEON_PP_TXFORMAT_0, txformat);
119 OUT_BATCH_REGVAL(RADEON_PP_TEX_SIZE_0, ((width - 1) |
120 ((height - 1) << RADEON_TEX_VSIZE_SHIFT)));
121 OUT_BATCH_REGVAL(RADEON_PP_TEX_PITCH_0, pitch - 32);
122
123 OUT_BATCH_REGSEQ(RADEON_PP_TXOFFSET_0, 1);
124 OUT_BATCH_RELOC(0, bo, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0, 0);
125
126 END_BATCH();
127 }
128
129 static inline void emit_cb_setup(struct r100_context *r100,
130 struct radeon_bo *bo,
131 intptr_t offset,
132 gl_format mesa_format,
133 unsigned pitch,
134 unsigned width,
135 unsigned height)
136 {
137 uint32_t dst_pitch = pitch;
138 uint32_t dst_format = 0;
139 BATCH_LOCALS(&r100->radeon);
140
141 /* XXX others? BE/LE? */
142 switch (mesa_format) {
143 case MESA_FORMAT_ARGB8888:
144 case MESA_FORMAT_XRGB8888:
145 dst_format = RADEON_COLOR_FORMAT_ARGB8888;
146 break;
147 case MESA_FORMAT_RGB565:
148 dst_format = RADEON_COLOR_FORMAT_RGB565;
149 break;
150 case MESA_FORMAT_ARGB4444:
151 dst_format = RADEON_COLOR_FORMAT_ARGB4444;
152 break;
153 case MESA_FORMAT_ARGB1555:
154 dst_format = RADEON_COLOR_FORMAT_ARGB1555;
155 break;
156 case MESA_FORMAT_A8:
157 dst_format = RADEON_COLOR_FORMAT_RGB8;
158 break;
159 default:
160 break;
161 }
162
163 BEGIN_BATCH_NO_AUTOSTATE(18);
164 OUT_BATCH_REGVAL(RADEON_RE_TOP_LEFT, 0);
165 OUT_BATCH_REGVAL(RADEON_RE_WIDTH_HEIGHT, ((width << RADEON_RE_WIDTH_SHIFT) |
166 (height << RADEON_RE_HEIGHT_SHIFT)));
167 OUT_BATCH_REGVAL(RADEON_RB3D_PLANEMASK, 0xffffffff);
168 OUT_BATCH_REGVAL(RADEON_RB3D_BLENDCNTL, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO);
169 OUT_BATCH_REGVAL(RADEON_RB3D_CNTL, dst_format);
170
171 OUT_BATCH_REGSEQ(RADEON_RB3D_COLOROFFSET, 1);
172 OUT_BATCH_RELOC(0, bo, 0, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
173 OUT_BATCH_REGSEQ(RADEON_RB3D_COLORPITCH, 1);
174 OUT_BATCH_RELOC(dst_pitch, bo, dst_pitch, 0, RADEON_GEM_DOMAIN_GTT|RADEON_GEM_DOMAIN_VRAM, 0);
175
176 END_BATCH();
177 }
178
179 static GLboolean validate_buffers(struct r100_context *r100,
180 struct radeon_bo *src_bo,
181 struct radeon_bo *dst_bo)
182 {
183 int ret;
184 radeon_cs_space_add_persistent_bo(r100->radeon.cmdbuf.cs,
185 src_bo, RADEON_GEM_DOMAIN_VRAM, 0);
186
187 radeon_cs_space_add_persistent_bo(r100->radeon.cmdbuf.cs,
188 dst_bo, 0, RADEON_GEM_DOMAIN_VRAM);
189
190 ret = radeon_cs_space_check_with_bo(r100->radeon.cmdbuf.cs,
191 first_elem(&r100->radeon.dma.reserved)->bo,
192 RADEON_GEM_DOMAIN_GTT, 0);
193 if (ret)
194 return GL_FALSE;
195
196 return GL_TRUE;
197 }
198
199 /**
200 * Calculate texcoords for given image region.
201 * Output values are [minx, maxx, miny, maxy]
202 */
203 static inline void calc_tex_coords(float img_width, float img_height,
204 float x, float y,
205 float reg_width, float reg_height,
206 unsigned flip_y, float *buf)
207 {
208 buf[0] = x / img_width;
209 buf[1] = buf[0] + reg_width / img_width;
210 buf[2] = y / img_height;
211 buf[3] = buf[2] + reg_height / img_height;
212 if (flip_y)
213 {
214 float tmp = buf[2];
215 buf[2] = 1.0 - buf[3];
216 buf[3] = 1.0 - tmp;
217 }
218 }
219
220 static inline void emit_draw_packet(struct r100_context *r100,
221 unsigned src_width, unsigned src_height,
222 unsigned src_x_offset, unsigned src_y_offset,
223 unsigned dst_x_offset, unsigned dst_y_offset,
224 unsigned reg_width, unsigned reg_height,
225 unsigned flip_y)
226 {
227 float texcoords[4];
228 float verts[12];
229 BATCH_LOCALS(&r100->radeon);
230
231 calc_tex_coords(src_width, src_height,
232 src_x_offset, src_y_offset,
233 reg_width, reg_height,
234 flip_y, texcoords);
235
236 verts[0] = dst_x_offset;
237 verts[1] = dst_y_offset + reg_height;
238 verts[2] = texcoords[0];
239 verts[3] = texcoords[2];
240
241 verts[4] = dst_x_offset + reg_width;
242 verts[5] = dst_y_offset + reg_height;
243 verts[6] = texcoords[1];
244 verts[7] = texcoords[2];
245
246 verts[8] = dst_x_offset + reg_width;
247 verts[9] = dst_y_offset;
248 verts[10] = texcoords[1];
249 verts[11] = texcoords[3];
250
251 BEGIN_BATCH(15);
252 OUT_BATCH(RADEON_CP_PACKET3_3D_DRAW_IMMD | (13 << 16));
253 OUT_BATCH(RADEON_CP_VC_FRMT_XY | RADEON_CP_VC_FRMT_ST0);
254 OUT_BATCH(RADEON_CP_VC_CNTL_PRIM_WALK_RING |
255 RADEON_CP_VC_CNTL_PRIM_TYPE_RECT_LIST |
256 RADEON_CP_VC_CNTL_MAOS_ENABLE |
257 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
258 (3 << 16));
259 OUT_BATCH_TABLE(verts, 12);
260 END_BATCH();
261 }
262
263 /**
264 * Copy a region of [@a width x @a height] pixels from source buffer
265 * to destination buffer.
266 * @param[in] r100 r100 context
267 * @param[in] src_bo source radeon buffer object
268 * @param[in] src_offset offset of the source image in the @a src_bo
269 * @param[in] src_mesaformat source image format
270 * @param[in] src_pitch aligned source image width
271 * @param[in] src_width source image width
272 * @param[in] src_height source image height
273 * @param[in] src_x_offset x offset in the source image
274 * @param[in] src_y_offset y offset in the source image
275 * @param[in] dst_bo destination radeon buffer object
276 * @param[in] dst_offset offset of the destination image in the @a dst_bo
277 * @param[in] dst_mesaformat destination image format
278 * @param[in] dst_pitch aligned destination image width
279 * @param[in] dst_width destination image width
280 * @param[in] dst_height destination image height
281 * @param[in] dst_x_offset x offset in the destination image
282 * @param[in] dst_y_offset y offset in the destination image
283 * @param[in] width region width
284 * @param[in] height region height
285 * @param[in] flip_y set if y coords of the source image need to be flipped
286 */
287 GLboolean r100_blit(struct r100_context *r100,
288 struct radeon_bo *src_bo,
289 intptr_t src_offset,
290 gl_format src_mesaformat,
291 unsigned src_pitch,
292 unsigned src_width,
293 unsigned src_height,
294 unsigned src_x_offset,
295 unsigned src_y_offset,
296 struct radeon_bo *dst_bo,
297 intptr_t dst_offset,
298 gl_format dst_mesaformat,
299 unsigned dst_pitch,
300 unsigned dst_width,
301 unsigned dst_height,
302 unsigned dst_x_offset,
303 unsigned dst_y_offset,
304 unsigned reg_width,
305 unsigned reg_height,
306 unsigned flip_y)
307 {
308 if (_mesa_get_format_bits(src_mesaformat, GL_DEPTH_BITS) > 0)
309 return GL_FALSE;
310
311 /* Make sure that colorbuffer has even width - hw limitation */
312 if (dst_pitch % 2 > 0)
313 ++dst_pitch;
314
315 /* Rendering to small buffer doesn't work.
316 * Looks like a hw limitation.
317 */
318 if (dst_pitch < 32)
319 return GL_FALSE;
320
321 /* Need to clamp the region size to make sure
322 * we don't read outside of the source buffer
323 * or write outside of the destination buffer.
324 */
325 if (reg_width + src_x_offset > src_width)
326 reg_width = src_width - src_x_offset;
327 if (reg_height + src_y_offset > src_height)
328 reg_height = src_height - src_y_offset;
329 if (reg_width + dst_x_offset > dst_width)
330 reg_width = dst_width - dst_x_offset;
331 if (reg_height + dst_y_offset > dst_height)
332 reg_height = dst_height - dst_y_offset;
333
334 if (src_bo == dst_bo) {
335 return GL_FALSE;
336 }
337
338 if (0) {
339 fprintf(stderr, "src: size [%d x %d], pitch %d, "
340 "offset [%d x %d], format %s, bo %p\n",
341 src_width, src_height, src_pitch,
342 src_x_offset, src_y_offset,
343 _mesa_get_format_name(src_mesaformat),
344 src_bo);
345 fprintf(stderr, "dst: pitch %d, offset[%d x %d], format %s, bo %p\n",
346 dst_pitch, dst_x_offset, dst_y_offset,
347 _mesa_get_format_name(dst_mesaformat), dst_bo);
348 fprintf(stderr, "region: %d x %d\n", reg_width, reg_height);
349 }
350
351 /* Flush is needed to make sure that source buffer has correct data */
352 radeonFlush(r100->radeon.glCtx);
353
354 rcommonEnsureCmdBufSpace(&r100->radeon, 59, __FUNCTION__);
355
356 if (!validate_buffers(r100, src_bo, dst_bo))
357 return GL_FALSE;
358
359 /* 8 */
360 emit_vtx_state(r100);
361 /* 18 */
362 emit_tx_setup(r100, src_mesaformat, src_bo, src_offset, src_width, src_height, src_pitch);
363 /* 18 */
364 emit_cb_setup(r100, dst_bo, dst_offset, dst_mesaformat, dst_pitch, dst_width, dst_height);
365 /* 15 */
366 emit_draw_packet(r100, src_width, src_height,
367 src_x_offset, src_y_offset,
368 dst_x_offset, dst_y_offset,
369 reg_width, reg_height,
370 flip_y);
371
372 radeonFlush(r100->radeon.glCtx);
373
374 return GL_TRUE;
375 }