For a separate source/dest SUBVL (again, no elwidth overrides):
# yield an outer-SUBVL, inner VL loop with SRC SUBVL
- def index_src():
- for j in range(SRC_SUBVL):
+ def index_src(outer):
+ if outer:
+ # outer on *dest* subvl, to match inner dest
+ for j in range(SUBVL):
+ for i in range(VL):
+ yield i+VL*j
+ else:
+ # inner on *src* subvl, to match *outer* src
for i in range(VL):
- yield i+VL*j
+ for j in range(SRC_SUBVL):
+ yield i*SRC_SUBVL+j
# yield an outer-SUBVL, inner VL loop with DEST SUBVL
- def index_dest():
- for j in range(SUBVL):
+ def index_dest(outer):
+ if outer:
+ # outer on *src* subvl, to match inner src
+ for j in range(SRC_SUBVL):
+ for i in range(VL):
+ yield i+VL*j
+ else:
+ # inner on *dest* subvl, to match *outer* dest
for i in range(VL):
- yield i+VL*j
+ for j in range(SUBVL):
+ yield i*SUBVL+j
# inner looping when SUBVLs are equal
if SRC_SUBVL == SUBVL: