r6xx/r7xx: add sw blit for tex upload
[mesa.git] / src / mesa / drivers / dri / r600 / r600_cmdbuf.c
1 /*
2 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
3
4 The Weather Channel (TM) funded Tungsten Graphics to develop the
5 initial release of the Radeon 8500 driver under the XFree86 license.
6 This notice must be preserved.
7
8 Permission is hereby granted, free of charge, to any person obtaining
9 a copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice (including the
17 next paragraph) shall be included in all copies or substantial
18 portions of the Software.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
23 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
24 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28 **************************************************************************/
29
30 /**
31 * Mostly coppied from \radeon\radeon_cs_legacy.c
32 */
33
34 #include <errno.h>
35
36 #include "main/glheader.h"
37 #include "main/state.h"
38 #include "main/imports.h"
39 #include "main/macros.h"
40 #include "main/context.h"
41 #include "main/simple_list.h"
42 #include "swrast/swrast.h"
43
44 #include "drm.h"
45 #include "radeon_drm.h"
46
47 #include "r600_context.h"
48 #include "radeon_reg.h"
49 #include "r600_cmdbuf.h"
50 #include "r600_emit.h"
51 #include "radeon_bocs_wrapper.h"
52 #include "radeon_mipmap_tree.h"
53 #include "radeon_reg.h"
54
55 struct r600_cs_manager_legacy
56 {
57 struct radeon_cs_manager base;
58 struct radeon_context *ctx;
59 /* hack for scratch stuff */
60 uint32_t pending_age;
61 uint32_t pending_count;
62 };
63
64 struct r600_cs_reloc_legacy {
65 struct radeon_cs_reloc base;
66 uint32_t cindices;
67 uint32_t *indices;
68 uint32_t *reloc_indices;
69 struct offset_modifiers offset_mod;
70 };
71
72 static struct radeon_cs * r600_cs_create(struct radeon_cs_manager *csm,
73 uint32_t ndw)
74 {
75 struct radeon_cs *cs;
76
77 cs = (struct radeon_cs*)calloc(1, sizeof(struct radeon_cs));
78 if (cs == NULL) {
79 return NULL;
80 }
81 cs->csm = csm;
82 cs->ndw = (ndw + 0x3FF) & (~0x3FF);
83 cs->packets = (uint32_t*)malloc(4*cs->ndw);
84 if (cs->packets == NULL) {
85 free(cs);
86 return NULL;
87 }
88 cs->relocs_total_size = 0;
89 return cs;
90 }
91
92 int r600_cs_write_reloc(struct radeon_cs *cs,
93 struct radeon_bo *bo,
94 uint32_t read_domain,
95 uint32_t write_domain,
96 uint32_t flags,
97 offset_modifiers* poffset_mod)
98 {
99 struct r600_cs_reloc_legacy *relocs;
100 int i;
101
102 relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
103 /* check domains */
104 if ((read_domain && write_domain) || (!read_domain && !write_domain)) {
105 /* in one CS a bo can only be in read or write domain but not
106 * in read & write domain at the same sime
107 */
108 return -EINVAL;
109 }
110 if (read_domain == RADEON_GEM_DOMAIN_CPU) {
111 return -EINVAL;
112 }
113 if (write_domain == RADEON_GEM_DOMAIN_CPU) {
114 return -EINVAL;
115 }
116 /* check if bo is already referenced */
117 for(i = 0; i < cs->crelocs; i++) {
118 uint32_t *indices;
119 uint32_t *reloc_indices;
120
121 if (relocs[i].base.bo->handle == bo->handle) {
122 /* Check domains must be in read or write. As we check already
123 * checked that in argument one of the read or write domain was
124 * set we only need to check that if previous reloc as the read
125 * domain set then the read_domain should also be set for this
126 * new relocation.
127 */
128 if (relocs[i].base.read_domain && !read_domain) {
129 return -EINVAL;
130 }
131 if (relocs[i].base.write_domain && !write_domain) {
132 return -EINVAL;
133 }
134 relocs[i].base.read_domain |= read_domain;
135 relocs[i].base.write_domain |= write_domain;
136 /* save indice */
137 relocs[i].cindices++;
138 indices = (uint32_t*)realloc(relocs[i].indices,
139 relocs[i].cindices * 4);
140 reloc_indices = (uint32_t*)realloc(relocs[i].reloc_indices,
141 relocs[i].cindices * 4);
142 if ( (indices == NULL) || (reloc_indices == NULL) ) {
143 relocs[i].cindices -= 1;
144 return -ENOMEM;
145 }
146 relocs[i].indices = indices;
147 relocs[i].reloc_indices = reloc_indices;
148 relocs[i].indices[relocs[i].cindices - 1] = cs->cdw - 1;
149 relocs[i].reloc_indices[relocs[i].cindices - 1] = cs->section_cdw;
150 cs->section_ndw += 2;
151 cs->section_cdw += 2;
152
153 relocs[i].offset_mod.shift = poffset_mod->shift;
154 relocs[i].offset_mod.shiftbits = poffset_mod->shiftbits;
155 relocs[i].offset_mod.mask = poffset_mod->mask;
156
157 return 0;
158 }
159 }
160 /* add bo to reloc */
161 relocs = (struct r600_cs_reloc_legacy*)
162 realloc(cs->relocs,
163 sizeof(struct r600_cs_reloc_legacy) * (cs->crelocs + 1));
164 if (relocs == NULL) {
165 return -ENOMEM;
166 }
167 cs->relocs = relocs;
168 relocs[cs->crelocs].base.bo = bo;
169 relocs[cs->crelocs].base.read_domain = read_domain;
170 relocs[cs->crelocs].base.write_domain = write_domain;
171 relocs[cs->crelocs].base.flags = flags;
172 relocs[cs->crelocs].indices = (uint32_t*)malloc(4);
173 relocs[cs->crelocs].reloc_indices = (uint32_t*)malloc(4);
174 if ( (relocs[cs->crelocs].indices == NULL) || (relocs[cs->crelocs].reloc_indices == NULL) )
175 {
176 return -ENOMEM;
177 }
178 relocs[cs->crelocs].offset_mod.shift = poffset_mod->shift;
179 relocs[cs->crelocs].offset_mod.shiftbits = poffset_mod->shiftbits;
180 relocs[cs->crelocs].offset_mod.mask = poffset_mod->mask;
181
182 relocs[cs->crelocs].indices[0] = cs->cdw - 1;
183 relocs[cs->crelocs].reloc_indices[0] = cs->section_cdw;
184 cs->section_ndw += 2;
185 cs->section_cdw += 2;
186 relocs[cs->crelocs].cindices = 1;
187 cs->relocs_total_size += radeon_bo_legacy_relocs_size(bo);
188 cs->crelocs++;
189
190 radeon_bo_ref(bo);
191
192 return 0;
193 }
194
195 static int r600_cs_begin(struct radeon_cs *cs,
196 uint32_t ndw,
197 const char *file,
198 const char *func,
199 int line)
200 {
201 if (cs->section) {
202 fprintf(stderr, "CS already in a section(%s,%s,%d)\n",
203 cs->section_file, cs->section_func, cs->section_line);
204 fprintf(stderr, "CS can't start section(%s,%s,%d)\n",
205 file, func, line);
206 return -EPIPE;
207 }
208
209 if (cs->cdw + ndw + 32 > cs->ndw) { /* Left 32 DWORD (8 offset+pitch) spare room for reloc indices */
210 uint32_t tmp, *ptr;
211 int num = (ndw > 0x3FF) ? ndw : 0x3FF;
212
213 tmp = (cs->cdw + 1 + num) & (~num);
214 ptr = (uint32_t*)realloc(cs->packets, 4 * tmp);
215 if (ptr == NULL) {
216 return -ENOMEM;
217 }
218 cs->packets = ptr;
219 cs->ndw = tmp;
220 }
221
222 cs->section = 1;
223 cs->section_ndw = 0;
224 cs->section_cdw = cs->cdw + ndw; /* start of reloc indices. */
225 cs->section_file = file;
226 cs->section_func = func;
227 cs->section_line = line;
228
229 return 0;
230 }
231
232 static int r600_cs_end(struct radeon_cs *cs,
233 const char *file,
234 const char *func,
235 int line)
236
237 {
238 if (!cs->section) {
239 fprintf(stderr, "CS no section to end at (%s,%s,%d)\n",
240 file, func, line);
241 return -EPIPE;
242 }
243 cs->section = 0;
244
245 if ( (cs->section_ndw + cs->cdw) != cs->section_cdw )
246 {
247 fprintf(stderr, "CS section size missmatch start at (%s,%s,%d) %d vs %d\n",
248 cs->section_file, cs->section_func, cs->section_line, cs->section_ndw, cs->section_cdw);
249 fprintf(stderr, "cs->section_ndw = %d, cs->cdw = %d, cs->section_cdw = %d \n",
250 cs->section_ndw, cs->cdw, cs->section_cdw);
251 fprintf(stderr, "CS section end at (%s,%s,%d)\n",
252 file, func, line);
253 return -EPIPE;
254 }
255
256 cs->cdw = cs->section_cdw;
257 return 0;
258 }
259
260 static int r600_cs_process_relocs(struct radeon_cs *cs,
261 uint32_t * reloc_chunk,
262 uint32_t * length_dw_reloc_chunk)
263 {
264 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
265 struct r600_cs_reloc_legacy *relocs;
266 int i, j, r;
267
268 uint32_t offset_dw = 0;
269
270 csm = (struct r600_cs_manager_legacy*)cs->csm;
271 relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
272 restart:
273 for (i = 0; i < cs->crelocs; i++)
274 {
275 for (j = 0; j < relocs[i].cindices; j++)
276 {
277 uint32_t soffset, eoffset, asicoffset;
278
279 r = radeon_bo_legacy_validate(relocs[i].base.bo,
280 &soffset, &eoffset);
281 if (r == -EAGAIN)
282 {
283 goto restart;
284 }
285 if (r)
286 {
287 fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
288 relocs[i].base.bo, soffset, eoffset);
289 return r;
290 }
291 asicoffset = soffset;
292 if (asicoffset >= eoffset)
293 {
294 /* radeon_bo_debug(relocs[i].base.bo, 12); */
295 fprintf(stderr, "validated %p [0x%08X, 0x%08X]\n",
296 relocs[i].base.bo, soffset, eoffset);
297 fprintf(stderr, "above end: %p 0x%08X 0x%08X\n",
298 relocs[i].base.bo,
299 cs->packets[relocs[i].indices[j]],
300 eoffset);
301 exit(0);
302 return -EINVAL;
303 }
304 /* apply offset operator */
305 switch (relocs[i].offset_mod.shift)
306 {
307 case NO_SHIFT:
308 asicoffset = asicoffset & relocs[i].offset_mod.mask;
309 break;
310 case LEFT_SHIFT:
311 asicoffset = (asicoffset << relocs[i].offset_mod.shiftbits) & relocs[i].offset_mod.mask;
312 break;
313 case RIGHT_SHIFT:
314 asicoffset = (asicoffset >> relocs[i].offset_mod.shiftbits) & relocs[i].offset_mod.mask;
315 break;
316 default:
317 break;
318 };
319
320 /* pkt3 nop header in ib chunk */
321 cs->packets[relocs[i].reloc_indices[j]] = 0xC0001000;
322
323 /* reloc index in ib chunk */
324 cs->packets[relocs[i].reloc_indices[j] + 1] = offset_dw;
325
326 /* asic offset in reloc chunk */ /* see alex drm r600_nomm_relocate */
327 reloc_chunk[offset_dw] = asicoffset;
328 reloc_chunk[offset_dw + 3] = 0;
329
330 offset_dw += 4;
331 }
332 }
333
334 *length_dw_reloc_chunk = offset_dw;
335
336 return 0;
337 }
338
339 static int r600_cs_set_age(struct radeon_cs *cs) /* -------------- */
340 {
341 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
342 struct r600_cs_reloc_legacy *relocs;
343 int i;
344
345 relocs = (struct r600_cs_reloc_legacy *)cs->relocs;
346 for (i = 0; i < cs->crelocs; i++) {
347 radeon_bo_legacy_pending(relocs[i].base.bo, csm->pending_age);
348 radeon_bo_unref(relocs[i].base.bo);
349 }
350 return 0;
351 }
352
353 static void dump_cmdbuf(struct radeon_cs *cs)
354 {
355 int i;
356 fprintf(stderr,"--start--\n");
357 for (i = 0; i < cs->cdw; i++){
358 fprintf(stderr,"0x%08x\n", cs->packets[i]);
359 }
360 fprintf(stderr,"--end--\n");
361
362 }
363
364 static int r600_cs_emit(struct radeon_cs *cs)
365 {
366 struct r600_cs_manager_legacy *csm = (struct r600_cs_manager_legacy*)cs->csm;
367 struct drm_radeon_cs cs_cmd;
368 struct drm_radeon_cs_chunk cs_chunk[2];
369 drm_radeon_cmd_buffer_t cmd;
370 /* drm_r300_cmd_header_t age; */
371 uint32_t length_dw_reloc_chunk;
372 uint64_t ull;
373 uint64_t chunk_ptrs[2];
374 uint32_t reloc_chunk[128];
375 int r;
376 int retry = 0;
377
378 /* TODO : put chip level things here if need. */
379 /* csm->ctx->vtbl.emit_cs_header(cs, csm->ctx); */
380
381 /* TODO : append buffer age */
382
383 r = r600_cs_process_relocs(cs, &(reloc_chunk[0]), &length_dw_reloc_chunk);
384 if (r) {
385 return 0;
386 }
387
388 /* raw ib chunk */
389 cs_chunk[0].chunk_id = RADEON_CHUNK_ID_IB;
390 cs_chunk[0].length_dw = cs->cdw;
391 cs_chunk[0].chunk_data = (unsigned long)(cs->packets);
392
393 /* reloc chaunk */
394 cs_chunk[1].chunk_id = RADEON_CHUNK_ID_RELOCS;
395 cs_chunk[1].length_dw = length_dw_reloc_chunk;
396 cs_chunk[1].chunk_data = (unsigned long)&(reloc_chunk[0]);
397
398 chunk_ptrs[0] = (uint64_t)(unsigned long)&(cs_chunk[0]);
399 chunk_ptrs[1] = (uint64_t)(unsigned long)&(cs_chunk[1]);
400
401 cs_cmd.num_chunks = 2;
402 /* cs_cmd.cs_id = 0; */
403 cs_cmd.chunks = (uint64_t)(unsigned long)chunk_ptrs;
404
405 dump_cmdbuf(cs);
406
407 do
408 {
409 r = drmCommandWriteRead(cs->csm->fd, DRM_RADEON_CS, &cs_cmd, sizeof(cs_cmd));
410 retry++;
411 } while (r == -EAGAIN && retry < 1000);
412
413 if (r) {
414 return r;
415 }
416
417 r600_cs_set_age(cs);
418
419 cs->csm->read_used = 0;
420 cs->csm->vram_write_used = 0;
421 cs->csm->gart_write_used = 0;
422
423 return 0;
424 }
425
426 static void inline r600_cs_free_reloc(void *relocs_p, int crelocs)
427 {
428 struct r600_cs_reloc_legacy *relocs = relocs_p;
429 int i;
430 if (!relocs_p)
431 return;
432 for (i = 0; i < crelocs; i++)
433 {
434 free(relocs[i].indices);
435 free(relocs[i].reloc_indices);
436 }
437 }
438
439 static int r600_cs_destroy(struct radeon_cs *cs)
440 {
441 r600_cs_free_reloc(cs->relocs, cs->crelocs);
442 free(cs->relocs);
443 free(cs->packets);
444 free(cs);
445 return 0;
446 }
447
448 static int r600_cs_erase(struct radeon_cs *cs)
449 {
450 r600_cs_free_reloc(cs->relocs, cs->crelocs);
451 free(cs->relocs);
452 cs->relocs_total_size = 0;
453 cs->relocs = NULL;
454 cs->crelocs = 0;
455 cs->cdw = 0;
456 cs->section = 0;
457 return 0;
458 }
459
460 static int r600_cs_need_flush(struct radeon_cs *cs)
461 {
462 /* this function used to flush when the BO usage got to
463 * a certain size, now the higher levels handle this better */
464 return 0;
465 }
466
467 static void r600_cs_print(struct radeon_cs *cs, FILE *file)
468 {
469 }
470
471 static int r600_cs_check_space(struct radeon_cs *cs, struct radeon_cs_space_check *bos, int num_bo)
472 {
473 struct radeon_cs_manager *csm = cs->csm;
474 int this_op_read = 0, this_op_gart_write = 0, this_op_vram_write = 0;
475 uint32_t read_domains, write_domain;
476 int i;
477 struct radeon_bo *bo;
478
479 /* check the totals for this operation */
480
481 if (num_bo == 0)
482 return 0;
483
484 /* prepare */
485 for (i = 0; i < num_bo; i++)
486 {
487 bo = bos[i].bo;
488
489 bos[i].new_accounted = 0;
490 read_domains = bos[i].read_domains;
491 write_domain = bos[i].write_domain;
492
493 /* pinned bos don't count */
494 if (radeon_legacy_bo_is_static(bo))
495 continue;
496
497 /* already accounted this bo */
498 if (write_domain && (write_domain == bo->space_accounted))
499 continue;
500
501 if (read_domains && ((read_domains << 16) == bo->space_accounted))
502 continue;
503
504 if (bo->space_accounted == 0)
505 {
506 if (write_domain == RADEON_GEM_DOMAIN_VRAM)
507 this_op_vram_write += bo->size;
508 else if (write_domain == RADEON_GEM_DOMAIN_GTT)
509 this_op_gart_write += bo->size;
510 else
511 this_op_read += bo->size;
512 bos[i].new_accounted = (read_domains << 16) | write_domain;
513 }
514 else
515 {
516 uint16_t old_read, old_write;
517
518 old_read = bo->space_accounted >> 16;
519 old_write = bo->space_accounted & 0xffff;
520
521 if (write_domain && (old_read & write_domain))
522 {
523 bos[i].new_accounted = write_domain;
524 /* moving from read to a write domain */
525 if (write_domain == RADEON_GEM_DOMAIN_VRAM)
526 {
527 this_op_read -= bo->size;
528 this_op_vram_write += bo->size;
529 }
530 else if (write_domain == RADEON_GEM_DOMAIN_VRAM)
531 {
532 this_op_read -= bo->size;
533 this_op_gart_write += bo->size;
534 }
535 }
536 else if (read_domains & old_write)
537 {
538 bos[i].new_accounted = bo->space_accounted & 0xffff;
539 }
540 else
541 {
542 /* rewrite the domains */
543 if (write_domain != old_write)
544 fprintf(stderr,"WRITE DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, write_domain, old_write);
545 if (read_domains != old_read)
546 fprintf(stderr,"READ DOMAIN RELOC FAILURE 0x%x %d %d\n", bo->handle, read_domains, old_read);
547 return RADEON_CS_SPACE_FLUSH;
548 }
549 }
550 }
551
552 if (this_op_read < 0)
553 this_op_read = 0;
554
555 /* check sizes - operation first */
556 if ((this_op_read + this_op_gart_write > csm->gart_limit) ||
557 (this_op_vram_write > csm->vram_limit)) {
558 return RADEON_CS_SPACE_OP_TO_BIG;
559 }
560
561 if (((csm->vram_write_used + this_op_vram_write) > csm->vram_limit) ||
562 ((csm->read_used + csm->gart_write_used + this_op_gart_write + this_op_read) > csm->gart_limit)) {
563 return RADEON_CS_SPACE_FLUSH;
564 }
565
566 csm->gart_write_used += this_op_gart_write;
567 csm->vram_write_used += this_op_vram_write;
568 csm->read_used += this_op_read;
569 /* commit */
570 for (i = 0; i < num_bo; i++) {
571 bo = bos[i].bo;
572 bo->space_accounted = bos[i].new_accounted;
573 }
574
575 return RADEON_CS_SPACE_OK;
576 }
577
578 static struct radeon_cs_funcs r600_cs_funcs = {
579 r600_cs_create,
580 r600_cs_write_reloc,
581 r600_cs_begin,
582 r600_cs_end,
583 r600_cs_emit,
584 r600_cs_destroy,
585 r600_cs_erase,
586 r600_cs_need_flush,
587 r600_cs_print,
588 r600_cs_check_space
589 };
590
591 struct radeon_cs_manager * r600_radeon_cs_manager_legacy_ctor(struct radeon_context *ctx)
592 {
593 struct r600_cs_manager_legacy *csm;
594
595 csm = (struct r600_cs_manager_legacy*)
596 calloc(1, sizeof(struct r600_cs_manager_legacy));
597 if (csm == NULL) {
598 return NULL;
599 }
600 csm->base.funcs = &r600_cs_funcs;
601 csm->base.fd = ctx->dri.fd;
602 csm->ctx = ctx;
603 csm->pending_age = 1;
604 return (struct radeon_cs_manager*)csm;
605 }
606
607 void r600InitCmdBuf(context_t *r600) /* from rcommonInitCmdBuf */
608 {
609 radeonContextPtr rmesa = &r600->radeon;
610
611 GLuint size;
612 /* Initialize command buffer */
613 size = 256 * driQueryOptioni(&rmesa->optionCache,
614 "command_buffer_size");
615 if (size < 2 * rmesa->hw.max_state_size) {
616 size = 2 * rmesa->hw.max_state_size + 65535;
617 }
618 if (size > 64 * 256)
619 size = 64 * 256;
620
621 if (rmesa->radeonScreen->kernel_mm) {
622 int fd = rmesa->radeonScreen->driScreen->fd;
623 rmesa->cmdbuf.csm = radeon_cs_manager_gem_ctor(fd);
624 } else {
625 rmesa->cmdbuf.csm = r600_radeon_cs_manager_legacy_ctor(rmesa);
626 }
627 if (rmesa->cmdbuf.csm == NULL) {
628 /* FIXME: fatal error */
629 return;
630 }
631 rmesa->cmdbuf.cs = radeon_cs_create(rmesa->cmdbuf.csm, size);
632 assert(rmesa->cmdbuf.cs != NULL);
633 rmesa->cmdbuf.size = size;
634
635 if (!rmesa->radeonScreen->kernel_mm) {
636 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, rmesa->radeonScreen->texSize[0]);
637 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, rmesa->radeonScreen->gartTextures.size);
638 } else {
639 struct drm_radeon_gem_info mminfo;
640
641 if (!drmCommandWriteRead(rmesa->dri.fd, DRM_RADEON_GEM_INFO, &mminfo, sizeof(mminfo)))
642 {
643 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_VRAM, mminfo.vram_visible);
644 radeon_cs_set_limit(rmesa->cmdbuf.cs, RADEON_GEM_DOMAIN_GTT, mminfo.gart_size);
645 }
646 }
647 }
648
649 void r600_sw_blit(char *srcp, int src_pitch, char *dstp, int dst_pitch,
650 int x, int y, int w, int h, int cpp)
651 {
652 char *src = srcp;
653 char *dst = dstp;
654
655 src += (y * src_pitch) + (x * cpp);
656 dst += (y * dst_pitch) + (x * cpp);
657
658 while (h--) {
659 memcpy(dst, src, w * cpp);
660 src += src_pitch;
661 dst += dst_pitch;
662 }
663 }
664