vld1_dup_f32 (const float32_t * a)
{
float32x2_t result;
- __asm__ ("ld1r {%0.2s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.2s}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1_dup_f64 (const float64_t * a)
{
float64x1_t result;
- __asm__ ("ld1 {%0.1d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.1d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1_dup_p8 (const poly8_t * a)
{
poly8x8_t result;
- __asm__ ("ld1r {%0.8b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.8b}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1_dup_p16 (const poly16_t * a)
{
poly16x4_t result;
- __asm__ ("ld1r {%0.4h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.4h}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1_dup_s8 (const int8_t * a)
{
int8x8_t result;
- __asm__ ("ld1r {%0.8b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.8b}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1_dup_s16 (const int16_t * a)
{
int16x4_t result;
- __asm__ ("ld1r {%0.4h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.4h}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1_dup_s32 (const int32_t * a)
{
int32x2_t result;
- __asm__ ("ld1r {%0.2s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.2s}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1_dup_s64 (const int64_t * a)
{
int64x1_t result;
- __asm__ ("ld1 {%0.1d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.1d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1_dup_u8 (const uint8_t * a)
{
uint8x8_t result;
- __asm__ ("ld1r {%0.8b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.8b}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1_dup_u16 (const uint16_t * a)
{
uint16x4_t result;
- __asm__ ("ld1r {%0.4h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.4h}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1_dup_u32 (const uint32_t * a)
{
uint32x2_t result;
- __asm__ ("ld1r {%0.2s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.2s}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1_dup_u64 (const uint64_t * a)
{
uint64x1_t result;
- __asm__ ("ld1 {%0.1d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.1d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1_f32 (const float32_t * a)
{
float32x2_t result;
- __asm__ ("ld1 {%0.2s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.2s}, %1"
+ : "=w"(result)
+ : "Utv"(({const float32x2_t *_a = (float32x2_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1_f64 (const float64_t * a)
{
float64x1_t result;
- __asm__ ("ld1 {%0.1d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.1d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
float32x2_t b_ = (b); \
const float32_t * a_ = (a); \
float32x2_t result; \
- __asm__ ("ld1 {%0.s}[%3],[%1]" \
+ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
float64x1_t b_ = (b); \
const float64_t * a_ = (a); \
float64x1_t result; \
- __asm__ ("ld1 {%0.d}[%3],[%1]" \
+ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
poly8x8_t b_ = (b); \
const poly8_t * a_ = (a); \
poly8x8_t result; \
- __asm__ ("ld1 {%0.b}[%3],[%1]" \
+ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
poly16x4_t b_ = (b); \
const poly16_t * a_ = (a); \
poly16x4_t result; \
- __asm__ ("ld1 {%0.h}[%3],[%1]" \
+ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
int8x8_t b_ = (b); \
const int8_t * a_ = (a); \
int8x8_t result; \
- __asm__ ("ld1 {%0.b}[%3],[%1]" \
+ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
int16x4_t b_ = (b); \
const int16_t * a_ = (a); \
int16x4_t result; \
- __asm__ ("ld1 {%0.h}[%3],[%1]" \
+ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
int32x2_t b_ = (b); \
const int32_t * a_ = (a); \
int32x2_t result; \
- __asm__ ("ld1 {%0.s}[%3],[%1]" \
+ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
int64x1_t b_ = (b); \
const int64_t * a_ = (a); \
int64x1_t result; \
- __asm__ ("ld1 {%0.d}[%3],[%1]" \
+ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
uint8x8_t b_ = (b); \
const uint8_t * a_ = (a); \
uint8x8_t result; \
- __asm__ ("ld1 {%0.b}[%3],[%1]" \
+ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
uint16x4_t b_ = (b); \
const uint16_t * a_ = (a); \
uint16x4_t result; \
- __asm__ ("ld1 {%0.h}[%3],[%1]" \
+ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
uint32x2_t b_ = (b); \
const uint32_t * a_ = (a); \
uint32x2_t result; \
- __asm__ ("ld1 {%0.s}[%3],[%1]" \
+ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
uint64x1_t b_ = (b); \
const uint64_t * a_ = (a); \
uint64x1_t result; \
- __asm__ ("ld1 {%0.d}[%3],[%1]" \
+ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i" (c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
vld1_p8 (const poly8_t * a)
{
poly8x8_t result;
- __asm__ ("ld1 {%0.8b}, [%1]"
- : "=w"(result)
- : "r"(a)
- : /* No clobbers */);
+ __asm__ ("ld1 {%0.8b}, %1"
+ : "=w"(result)
+ : "Utv"(({const poly8x8_t *_a = (poly8x8_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1_p16 (const poly16_t * a)
{
poly16x4_t result;
- __asm__ ("ld1 {%0.4h}, [%1]"
- : "=w"(result)
- : "r"(a)
- : /* No clobbers */);
+ __asm__ ("ld1 {%0.4h}, %1"
+ : "=w"(result)
+ : "Utv"(({const poly16x4_t *_a = (poly16x4_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1_s8 (const int8_t * a)
{
int8x8_t result;
- __asm__ ("ld1 {%0.8b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.8b}, %1"
+ : "=w"(result)
+ : "Utv"(({const int8x8_t *_a = (int8x8_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1_s16 (const int16_t * a)
{
int16x4_t result;
- __asm__ ("ld1 {%0.4h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.4h}, %1"
+ : "=w"(result)
+ : "Utv"(({const int16x4_t *_a = (int16x4_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1_s32 (const int32_t * a)
{
int32x2_t result;
- __asm__ ("ld1 {%0.2s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.2s}, %1"
+ : "=w"(result)
+ : "Utv"(({const int32x2_t *_a = (int32x2_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1_s64 (const int64_t * a)
{
int64x1_t result;
- __asm__ ("ld1 {%0.1d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.1d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1_u8 (const uint8_t * a)
{
uint8x8_t result;
- __asm__ ("ld1 {%0.8b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.8b}, %1"
+ : "=w"(result)
+ : "Utv"(({const uint8x8_t *_a = (uint8x8_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1_u16 (const uint16_t * a)
{
uint16x4_t result;
- __asm__ ("ld1 {%0.4h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.4h}, %1"
+ : "=w"(result)
+ : "Utv"(({const uint16x4_t *_a = (uint16x4_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1_u32 (const uint32_t * a)
{
uint32x2_t result;
- __asm__ ("ld1 {%0.2s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.2s}, %1"
+ : "=w"(result)
+ : "Utv"(({const uint32x2_t *_a = (uint32x2_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1_u64 (const uint64_t * a)
{
uint64x1_t result;
- __asm__ ("ld1 {%0.1d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.1d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1q_dup_f32 (const float32_t * a)
{
float32x4_t result;
- __asm__ ("ld1r {%0.4s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.4s}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1q_dup_f64 (const float64_t * a)
{
float64x2_t result;
- __asm__ ("ld1r {%0.2d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.2d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1q_dup_p8 (const poly8_t * a)
{
poly8x16_t result;
- __asm__ ("ld1r {%0.16b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.16b}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1q_dup_p16 (const poly16_t * a)
{
poly16x8_t result;
- __asm__ ("ld1r {%0.8h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.8h}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1q_dup_s8 (const int8_t * a)
{
int8x16_t result;
- __asm__ ("ld1r {%0.16b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.16b}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1q_dup_s16 (const int16_t * a)
{
int16x8_t result;
- __asm__ ("ld1r {%0.8h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.8h}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1q_dup_s32 (const int32_t * a)
{
int32x4_t result;
- __asm__ ("ld1r {%0.4s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.4s}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1q_dup_s64 (const int64_t * a)
{
int64x2_t result;
- __asm__ ("ld1r {%0.2d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.2d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1q_dup_u8 (const uint8_t * a)
{
uint8x16_t result;
- __asm__ ("ld1r {%0.16b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.16b}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1q_dup_u16 (const uint16_t * a)
{
uint16x8_t result;
- __asm__ ("ld1r {%0.8h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.8h}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1q_dup_u32 (const uint32_t * a)
{
uint32x4_t result;
- __asm__ ("ld1r {%0.4s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.4s}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1q_dup_u64 (const uint64_t * a)
{
uint64x2_t result;
- __asm__ ("ld1r {%0.2d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1r {%0.2d}, %1"
+ : "=w"(result)
+ : "Utv"(*a)
+ : /* No clobbers */);
return result;
}
vld1q_f32 (const float32_t * a)
{
float32x4_t result;
- __asm__ ("ld1 {%0.4s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.4s}, %1"
+ : "=w"(result)
+ : "Utv"(({const float32x4_t *_a = (float32x4_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1q_f64 (const float64_t * a)
{
float64x2_t result;
- __asm__ ("ld1 {%0.2d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.2d}, %1"
+ : "=w"(result)
+ : "Utv"(({const float64x2_t *_a = (float64x2_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
float32x4_t b_ = (b); \
const float32_t * a_ = (a); \
float32x4_t result; \
- __asm__ ("ld1 {%0.s}[%3],[%1]" \
+ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
float64x2_t b_ = (b); \
const float64_t * a_ = (a); \
float64x2_t result; \
- __asm__ ("ld1 {%0.d}[%3],[%1]" \
+ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
poly8x16_t b_ = (b); \
const poly8_t * a_ = (a); \
poly8x16_t result; \
- __asm__ ("ld1 {%0.b}[%3],[%1]" \
+ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
poly16x8_t b_ = (b); \
const poly16_t * a_ = (a); \
poly16x8_t result; \
- __asm__ ("ld1 {%0.h}[%3],[%1]" \
+ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
int8x16_t b_ = (b); \
const int8_t * a_ = (a); \
int8x16_t result; \
- __asm__ ("ld1 {%0.b}[%3],[%1]" \
+ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
int16x8_t b_ = (b); \
const int16_t * a_ = (a); \
int16x8_t result; \
- __asm__ ("ld1 {%0.h}[%3],[%1]" \
+ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
int32x4_t b_ = (b); \
const int32_t * a_ = (a); \
int32x4_t result; \
- __asm__ ("ld1 {%0.s}[%3],[%1]" \
+ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
int64x2_t b_ = (b); \
const int64_t * a_ = (a); \
int64x2_t result; \
- __asm__ ("ld1 {%0.d}[%3],[%1]" \
+ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
uint8x16_t b_ = (b); \
const uint8_t * a_ = (a); \
uint8x16_t result; \
- __asm__ ("ld1 {%0.b}[%3],[%1]" \
+ __asm__ ("ld1 {%0.b}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
uint16x8_t b_ = (b); \
const uint16_t * a_ = (a); \
uint16x8_t result; \
- __asm__ ("ld1 {%0.h}[%3],[%1]" \
+ __asm__ ("ld1 {%0.h}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
uint32x4_t b_ = (b); \
const uint32_t * a_ = (a); \
uint32x4_t result; \
- __asm__ ("ld1 {%0.s}[%3],[%1]" \
+ __asm__ ("ld1 {%0.s}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
uint64x2_t b_ = (b); \
const uint64_t * a_ = (a); \
uint64x2_t result; \
- __asm__ ("ld1 {%0.d}[%3],[%1]" \
+ __asm__ ("ld1 {%0.d}[%1], %2" \
: "=w"(result) \
- : "r"(a_), "0"(b_), "i"(c) \
+ : "i"(c), "Utv"(*a_), "0"(b_) \
: /* No clobbers */); \
result; \
})
vld1q_p8 (const poly8_t * a)
{
poly8x16_t result;
- __asm__ ("ld1 {%0.16b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.16b}, %1"
+ : "=w"(result)
+ : "Utv"(({const poly8x16_t *_a = (poly8x16_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1q_p16 (const poly16_t * a)
{
poly16x8_t result;
- __asm__ ("ld1 {%0.8h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.16b}, %1"
+ : "=w"(result)
+ : "Utv"(({const poly16x8_t *_a = (poly16x8_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1q_s8 (const int8_t * a)
{
int8x16_t result;
- __asm__ ("ld1 {%0.16b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.16b}, %1"
+ : "=w"(result)
+ : "Utv"(({const int8x16_t *_a = (int8x16_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1q_s16 (const int16_t * a)
{
int16x8_t result;
- __asm__ ("ld1 {%0.8h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.8h}, %1"
+ : "=w"(result)
+ : "Utv"(({const int16x8_t *_a = (int16x8_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1q_s32 (const int32_t * a)
{
int32x4_t result;
- __asm__ ("ld1 {%0.4s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.4s}, %1"
+ : "=w"(result)
+ : "Utv"(({const int32x4_t *_a = (int32x4_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1q_s64 (const int64_t * a)
{
int64x2_t result;
- __asm__ ("ld1 {%0.2d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.2d}, %1"
+ : "=w"(result)
+ : "Utv"(({const int64x2_t *_a = (int64x2_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1q_u8 (const uint8_t * a)
{
uint8x16_t result;
- __asm__ ("ld1 {%0.16b},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.16b}, %1"
+ : "=w"(result)
+ : "Utv"(({const uint8x16_t *_a = (uint8x16_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1q_u16 (const uint16_t * a)
{
uint16x8_t result;
- __asm__ ("ld1 {%0.8h},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.8h}, %1"
+ : "=w"(result)
+ : "Utv"(({const uint16x8_t *_a = (uint16x8_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1q_u32 (const uint32_t * a)
{
uint32x4_t result;
- __asm__ ("ld1 {%0.4s},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.4s}, %1"
+ : "=w"(result)
+ : "Utv"(({const uint32x4_t *_a = (uint32x4_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}
vld1q_u64 (const uint64_t * a)
{
uint64x2_t result;
- __asm__ ("ld1 {%0.2d},[%1]"
- : "=w"(result)
- : "r"(a)
- : "memory");
+ __asm__ ("ld1 {%0.2d}, %1"
+ : "=w"(result)
+ : "Utv"(({const uint64x2_t *_a = (uint64x2_t *) a; *_a;}))
+ : /* No clobbers */);
return result;
}