-#else
- // There will be 8 4x2 simd tiles in an 8x8 raster tile.
- uint8_t* pDst = (uint8_t*)ComputeSurfaceAddress<false, false>(x, y, pDstSurface->arrayIndex + renderTargetArrayIndex,
- pDstSurface->arrayIndex + renderTargetArrayIndex, sampleNum, pDstSurface->lod, pDstSurface);
- struct DstPtrs
- {
- uint8_t* ppDsts[8];
- } ptrs;
-
- // Need 8 pointers, 4 columns of 2 rows each
- for (uint32_t y = 0; y < 2; ++y)
- {
- for (uint32_t x = 0; x < 4; ++x)
- {
- ptrs.ppDsts[x * 2 + y] = pDst + y * TILE_Y_COL_WIDTH_BYTES + x * TILE_Y_COL_BYTES;
- }
- }
-
- for (uint32_t row = 0; row < KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM; ++row)
- {
- DstPtrs startPtrs = ptrs;
-
- for (uint32_t col = 0; col < KNOB_TILE_X_DIM / SIMD_TILE_X_DIM; ++col)
- {
- // Format conversion and convert from SOA to AOS, and store the rows.
- ConvertPixelsSOAtoAOS<SrcFormat, DstFormat>::Convert(pSrc, ptrs.ppDsts);
-
- ptrs.ppDsts[0] += DST_COLUMN_BYTES_PER_SRC;
- ptrs.ppDsts[1] += DST_COLUMN_BYTES_PER_SRC;
- ptrs.ppDsts[2] += DST_COLUMN_BYTES_PER_SRC;
- ptrs.ppDsts[3] += DST_COLUMN_BYTES_PER_SRC;
- ptrs.ppDsts[4] += DST_COLUMN_BYTES_PER_SRC;
- ptrs.ppDsts[5] += DST_COLUMN_BYTES_PER_SRC;
- ptrs.ppDsts[6] += DST_COLUMN_BYTES_PER_SRC;
- ptrs.ppDsts[7] += DST_COLUMN_BYTES_PER_SRC;
- pSrc += SRC_COLUMN_BYTES;
- }
-
- ptrs.ppDsts[0] = startPtrs.ppDsts[0] + 2 * TILE_Y_COL_WIDTH_BYTES;
- ptrs.ppDsts[1] = startPtrs.ppDsts[1] + 2 * TILE_Y_COL_WIDTH_BYTES;
- ptrs.ppDsts[2] = startPtrs.ppDsts[2] + 2 * TILE_Y_COL_WIDTH_BYTES;
- ptrs.ppDsts[3] = startPtrs.ppDsts[3] + 2 * TILE_Y_COL_WIDTH_BYTES;
- ptrs.ppDsts[4] = startPtrs.ppDsts[4] + 2 * TILE_Y_COL_WIDTH_BYTES;
- ptrs.ppDsts[5] = startPtrs.ppDsts[5] + 2 * TILE_Y_COL_WIDTH_BYTES;
- ptrs.ppDsts[6] = startPtrs.ppDsts[6] + 2 * TILE_Y_COL_WIDTH_BYTES;
- ptrs.ppDsts[7] = startPtrs.ppDsts[7] + 2 * TILE_Y_COL_WIDTH_BYTES;
- }
-#endif