Revision control
Copy as Markdown
Other Tools
use core::{↩
arch::{↩
aarch64::{float32x4_t, float64x2_t, uint16x4_t},↩
asm,↩
},↩
mem::MaybeUninit,↩
ptr,↩
};↩
↩
#[target_feature(enable = "fp16")]↩
#[inline]↩
pub(super) unsafe fn f16_to_f32_fp16(i: u16) -> f32 {↩
let result: f32;↩
asm!(↩
"fcvt {0:s}, {1:h}",↩
out(vreg) result,↩
in(vreg) i,↩
options(pure, nomem, nostack, preserves_flags));↩
result↩
}↩
↩
#[target_feature(enable = "fp16")]↩
#[inline]↩
pub(super) unsafe fn f16_to_f64_fp16(i: u16) -> f64 {↩
let result: f64;↩
asm!(↩
"fcvt {0:d}, {1:h}",↩
out(vreg) result,↩
in(vreg) i,↩
options(pure, nomem, nostack, preserves_flags));↩
result↩
}↩
↩
#[target_feature(enable = "fp16")]↩
#[inline]↩
pub(super) unsafe fn f32_to_f16_fp16(f: f32) -> u16 {↩
let result: u16;↩
asm!(↩
"fcvt {0:h}, {1:s}",↩
out(vreg) result,↩
in(vreg) f,↩
options(pure, nomem, nostack, preserves_flags));↩
result↩
}↩
↩
#[target_feature(enable = "fp16")]↩
#[inline]↩
pub(super) unsafe fn f64_to_f16_fp16(f: f64) -> u16 {↩
let result: u16;↩
asm!(↩
"fcvt {0:h}, {1:d}",↩
out(vreg) result,↩
in(vreg) f,↩
options(pure, nomem, nostack, preserves_flags));↩
result↩
}↩
↩
#[target_feature(enable = "fp16")]↩
#[inline]↩
pub(super) unsafe fn f16x4_to_f32x4_fp16(v: &[u16; 4]) -> [f32; 4] {↩
let mut vec = MaybeUninit::<uint16x4_t>::uninit();↩
ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4);↩
let result: float32x4_t;↩
asm!(↩
"fcvtl {0:v}.4s, {1:v}.4h",↩
out(vreg) result,↩
in(vreg) vec.assume_init(),↩
options(pure, nomem, nostack));↩
*(&result as *const float32x4_t).cast()↩
}↩
↩
#[target_feature(enable = "fp16")]↩
#[inline]↩
pub(super) unsafe fn f32x4_to_f16x4_fp16(v: &[f32; 4]) -> [u16; 4] {↩
let mut vec = MaybeUninit::<float32x4_t>::uninit();↩
ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4);↩
let result: uint16x4_t;↩
asm!(↩
"fcvtn {0:v}.4h, {1:v}.4s",↩
out(vreg) result,↩
in(vreg) vec.assume_init(),↩
options(pure, nomem, nostack));↩
*(&result as *const uint16x4_t).cast()↩
}↩
↩
#[target_feature(enable = "fp16")]↩
#[inline]↩
pub(super) unsafe fn f16x4_to_f64x4_fp16(v: &[u16; 4]) -> [f64; 4] {↩
let mut vec = MaybeUninit::<uint16x4_t>::uninit();↩
ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4);↩
let low: float64x2_t;↩
let high: float64x2_t;↩
asm!(↩
"fcvtl {2:v}.4s, {3:v}.4h", // Convert to f32↩
"fcvtl {0:v}.2d, {2:v}.2s", // Convert low part to f64↩
"fcvtl2 {1:v}.2d, {2:v}.4s", // Convert high part to f64↩
lateout(vreg) low,↩
lateout(vreg) high,↩
out(vreg) _,↩
in(vreg) vec.assume_init(),↩
options(pure, nomem, nostack));↩
*[low, high].as_ptr().cast()↩
}↩
↩
#[target_feature(enable = "fp16")]↩
#[inline]↩
pub(super) unsafe fn f64x4_to_f16x4_fp16(v: &[f64; 4]) -> [u16; 4] {↩
let mut low = MaybeUninit::<float64x2_t>::uninit();↩
let mut high = MaybeUninit::<float64x2_t>::uninit();↩
ptr::copy_nonoverlapping(v.as_ptr(), low.as_mut_ptr().cast(), 2);↩
ptr::copy_nonoverlapping(v[2..].as_ptr(), high.as_mut_ptr().cast(), 2);↩
let result: uint16x4_t;↩
asm!(↩
"fcvtn {1:v}.2s, {2:v}.2d", // Convert low to f32↩
"fcvtn2 {1:v}.4s, {3:v}.2d", // Convert high to f32↩
"fcvtn {0:v}.4h, {1:v}.4s", // Convert to f16↩
lateout(vreg) result,↩
out(vreg) _,↩
in(vreg) low.assume_init(),↩
in(vreg) high.assume_init(),↩
options(pure, nomem, nostack));↩
*(&result as *const uint16x4_t).cast()↩
}↩
↩
#[target_feature(enable = "fp16")]↩
#[inline]↩
pub(super) unsafe fn add_f16_fp16(a: u16, b: u16) -> u16 {↩
let result: u16;↩
asm!(↩
"fadd {0:h}, {1:h}, {2:h}",↩
out(vreg) result,↩
in(vreg) a,↩
in(vreg) b,↩
options(pure, nomem, nostack));↩
result↩
}↩
↩
#[target_feature(enable = "fp16")]↩
#[inline]↩
pub(super) unsafe fn subtract_f16_fp16(a: u16, b: u16) -> u16 {↩
let result: u16;↩
asm!(↩
"fsub {0:h}, {1:h}, {2:h}",↩
out(vreg) result,↩
in(vreg) a,↩
in(vreg) b,↩
options(pure, nomem, nostack));↩
result↩
}↩
↩
#[target_feature(enable = "fp16")]↩
#[inline]↩
pub(super) unsafe fn multiply_f16_fp16(a: u16, b: u16) -> u16 {↩
let result: u16;↩
asm!(↩
"fmul {0:h}, {1:h}, {2:h}",↩
out(vreg) result,↩
in(vreg) a,↩
in(vreg) b,↩
options(pure, nomem, nostack));↩
result↩
}↩
↩
#[target_feature(enable = "fp16")]↩
#[inline]↩
pub(super) unsafe fn divide_f16_fp16(a: u16, b: u16) -> u16 {↩
let result: u16;↩
asm!(↩
"fdiv {0:h}, {1:h}, {2:h}",↩
out(vreg) result,↩
in(vreg) a,↩
in(vreg) b,↩
options(pure, nomem, nostack));↩
result↩
}↩