diff --git a/src/abi.rs b/src/abi.rs
index 2f5c555b702..d7b8afa65ab 100644
--- a/src/abi.rs
+++ b/src/abi.rs
@@ -86,6 +86,7 @@ impl GccType for Reg {
         match self.kind {
             RegKind::Integer => cx.type_ix(self.size.bits()),
             RegKind::Float => match self.size.bits() {
+                16 => cx.f16_abi_type,
                 32 => cx.type_f32(),
                 64 => cx.type_f64(),
                 _ => bug!("unsupported float: {:?}", self),
diff --git a/src/builder.rs b/src/builder.rs
index 33f0f6fc2f8..5e6e3f1156b 100644
--- a/src/builder.rs
+++ b/src/builder.rs
@@ -446,6 +446,55 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         );
         result.to_rvalue()
     }
+
+    fn f16_to_float_ext(&self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> Option<RValue<'gcc>> {
+        if !self.cx.is_f16_abi_storage_type(value.get_type()) {
+            return None;
+        }
+
+        self.cx.f16_to_float_libcall(value, dest_ty, self.location)
+    }
+
+    fn float_to_f16_trunc(&self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> Option<RValue<'gcc>> {
+        if !self.cx.is_f16_abi_storage_type(dest_ty) {
+            return None;
+        }
+
+        self.cx.float_to_f16_libcall(value, dest_ty, self.location)
+    }
+
+    fn int_to_f16_trunc(
+        &self,
+        value: RValue<'gcc>,
+        dest_ty: Type<'gcc>,
+        signed: bool,
+    ) -> Option<RValue<'gcc>> {
+        if !self.cx.is_f16_abi_storage_type(dest_ty) {
+            return None;
+        }
+
+        let value = if signed {
+            self.gcc_int_to_float_cast(value, self.cx.type_f32())
+        } else {
+            self.gcc_uint_to_float_cast(value, self.cx.type_f32())
+        };
+        self.float_to_f16_trunc(value, dest_ty)
+    }
+
+    fn f16_arithmetic_binary_op(
+        &mut self,
+        a: RValue<'gcc>,
+        b: RValue<'gcc>,
+        op: impl FnOnce(&mut Self, RValue<'gcc>, RValue<'gcc>) -> RValue<'gcc>,
+    ) -> Option<RValue<'gcc>> {
+        let dest_ty = a.get_type();
+        let a = self.f16_to_float_ext(a, self.cx.type_f32())?;
+        let b = self
+            .f16_to_float_ext(b, self.cx.type_f32())
+            .expect("f16 binary operands should have the same type");
+        let result = op(self, a, b);
+        Some(self.float_to_f16_trunc(result, dest_ty).expect("f32 should truncate to f16"))
+    }
 }
 
 impl<'tcx> HasTyCtxt<'tcx> for Builder<'_, '_, 'tcx> {
@@ -675,6 +724,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn fadd(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(a, b, |this, a, b| this.assign_to_var(a + b))
+        {
+            return value;
+        }
         self.assign_to_var(a + b)
     }
 
@@ -684,6 +738,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn fsub(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(a, b, |this, a, b| this.assign_to_var(a - b))
+        {
+            return value;
+        }
         self.assign_to_var(a - b)
     }
 
@@ -692,6 +751,17 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn fmul(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
+        if let Some(value) = self.f16_arithmetic_binary_op(a, b, |this, a, b| {
+            this.assign_to_var(this.cx.context.new_binary_op(
+                this.location,
+                BinaryOp::Mult,
+                a.get_type(),
+                a,
+                b,
+            ))
+        }) {
+            return value;
+        }
         self.assign_to_var(self.cx.context.new_binary_op(
             self.location,
             BinaryOp::Mult,
@@ -728,6 +798,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn fdiv(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(a, b, |this, a, b| this.assign_to_var(a / b))
+        {
+            return value;
+        }
         self.assign_to_var(a / b)
     }
 
@@ -740,6 +815,12 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn frem(&mut self, a: RValue<'gcc>, b: RValue<'gcc>) -> RValue<'gcc> {
+        if let Some(value) = self.f16_arithmetic_binary_op(a, b, |this, a, b| {
+            let fmodf = this.context.get_builtin_function("fmodf");
+            this.context.new_call(this.location, fmodf, &[a, b])
+        }) {
+            return value;
+        }
         // FIXME(antoyo): add check in libgccjit since using the binary operator % causes the following error:
         // during RTL pass: expand
         // libgccjit.so: error: in expmed_mode_index, at expmed.h:240
@@ -775,14 +856,6 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
 
         #[cfg(feature = "master")]
         match self.cx.type_kind(a_type) {
-            TypeKind::Half => {
-                let fmodf = self.context.get_builtin_function("fmodf");
-                let f32_type = self.type_f32();
-                let a = self.context.new_cast(self.location, a, f32_type);
-                let b = self.context.new_cast(self.location, b, f32_type);
-                let result = self.context.new_call(self.location, fmodf, &[a, b]);
-                return self.context.new_cast(self.location, result, a_type);
-            }
             TypeKind::Float => {
                 let fmodf = self.context.get_builtin_function("fmodf");
                 return self.context.new_call(self.location, fmodf, &[a, b]);
@@ -862,6 +935,10 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn fneg(&mut self, a: RValue<'gcc>) -> RValue<'gcc> {
+        let dest_ty = a.get_type();
+        if self.cx.is_f16_abi_storage_type(dest_ty) {
+            return self.cx.f16_neg(a, dest_ty);
+        }
         set_rvalue_location(
             self,
             self.cx.context.new_unary_op(self.location, UnaryOp::Minus, a.get_type(), a),
@@ -874,24 +951,44 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
 
     fn fadd_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, lhs, rhs| this.assign_to_var(lhs + rhs))
+        {
+            return value;
+        }
         let result = set_rvalue_location(self, lhs + rhs);
         self.assign_to_var(result)
     }
 
     fn fsub_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, lhs, rhs| this.assign_to_var(lhs - rhs))
+        {
+            return value;
+        }
         let result = set_rvalue_location(self, lhs - rhs);
         self.assign_to_var(result)
     }
 
     fn fmul_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, lhs, rhs| this.assign_to_var(lhs * rhs))
+        {
+            return value;
+        }
         let result = set_rvalue_location(self, lhs * rhs);
         self.assign_to_var(result)
     }
 
     fn fdiv_fast(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, lhs, rhs| this.assign_to_var(lhs / rhs))
+        {
+            return value;
+        }
         let result = set_rvalue_location(self, lhs / rhs);
         self.assign_to_var(result)
     }
@@ -905,21 +1002,41 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
 
     fn fadd_algebraic(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, a, b| this.assign_to_var(a + b))
+        {
+            return value;
+        }
         self.assign_to_var(lhs + rhs)
     }
 
     fn fsub_algebraic(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, a, b| this.assign_to_var(a - b))
+        {
+            return value;
+        }
         self.assign_to_var(lhs - rhs)
     }
 
     fn fmul_algebraic(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, a, b| this.assign_to_var(a * b))
+        {
+            return value;
+        }
         self.assign_to_var(lhs * rhs)
     }
 
     fn fdiv_algebraic(&mut self, lhs: RValue<'gcc>, rhs: RValue<'gcc>) -> RValue<'gcc> {
         // NOTE: it seems like we cannot enable fast-mode for a single operation in GCC.
+        if let Some(value) =
+            self.f16_arithmetic_binary_op(lhs, rhs, |this, a, b| this.assign_to_var(a / b))
+        {
+            return value;
+        }
         self.assign_to_var(lhs / rhs)
     }
 
@@ -1262,27 +1379,41 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     }
 
     fn fptoui(&mut self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        let value = self.f16_to_float_ext(value, self.cx.type_f32()).unwrap_or(value);
         set_rvalue_location(self, self.gcc_float_to_uint_cast(value, dest_ty))
     }
 
     fn fptosi(&mut self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        let value = self.f16_to_float_ext(value, self.cx.type_f32()).unwrap_or(value);
         set_rvalue_location(self, self.gcc_float_to_int_cast(value, dest_ty))
     }
 
     fn uitofp(&mut self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        if let Some(value) = self.int_to_f16_trunc(value, dest_ty, false) {
+            return set_rvalue_location(self, value);
+        }
         set_rvalue_location(self, self.gcc_uint_to_float_cast(value, dest_ty))
     }
 
     fn sitofp(&mut self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        if let Some(value) = self.int_to_f16_trunc(value, dest_ty, true) {
+            return set_rvalue_location(self, value);
+        }
         set_rvalue_location(self, self.gcc_int_to_float_cast(value, dest_ty))
     }
 
     fn fptrunc(&mut self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        if let Some(value) = self.float_to_f16_trunc(value, dest_ty) {
+            return set_rvalue_location(self, value);
+        }
         // FIXME(antoyo): make sure it truncates.
         set_rvalue_location(self, self.context.new_cast(self.location, value, dest_ty))
     }
 
     fn fpext(&mut self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        if let Some(value) = self.f16_to_float_ext(value, dest_ty) {
+            return set_rvalue_location(self, value);
+        }
         set_rvalue_location(self, self.context.new_cast(self.location, value, dest_ty))
     }
 
@@ -1348,6 +1479,10 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         // LLVM has a concept of "unordered compares", where eg ULT returns true if either the two
         // arguments are unordered (i.e. either is NaN), or the lhs is less than the rhs. GCC does
         // not natively have this concept, so in some cases we must manually handle NaNs
+
+        let lhs = self.f16_to_float_ext(lhs, self.cx.type_f32()).unwrap_or(lhs);
+        let rhs = self.f16_to_float_ext(rhs, self.cx.type_f32()).unwrap_or(rhs);
+
         let must_handle_nan = match op {
             RealPredicate::RealPredicateFalse => unreachable!(),
             RealPredicate::RealOEQ => false,
@@ -1828,6 +1963,9 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         if scalar.is_bool() {
             return self.unchecked_utrunc(val, self.cx().type_i1());
         }
+        if let abi::Primitive::Float(abi::Float::F16) = scalar.primitive() {
+            return self.cx.bitcast_if_needed(val, self.cx.f16_abi_type);
+        }
         val
     }
 
@@ -1844,9 +1982,12 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     fn fptoint_sat(
         &mut self,
         signed: bool,
-        val: RValue<'gcc>,
+        mut val: RValue<'gcc>,
         dest_ty: Type<'gcc>,
     ) -> RValue<'gcc> {
+        if let Some(extended) = self.f16_to_float_ext(val, self.cx.type_f32()) {
+            val = extended;
+        }
         let src_ty = self.cx.val_ty(val);
         let (float_ty, int_ty) = if self.cx.type_kind(src_ty) == TypeKind::Vector {
             assert_eq!(self.cx.vector_length(src_ty), self.cx.vector_length(dest_ty));
diff --git a/src/context.rs b/src/context.rs
index e0810a35b04..1dbc120b344 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -1,9 +1,12 @@
 use std::cell::{Cell, RefCell};
 use std::collections::HashMap;
 
-use gccjit::{Block, CType, Context, Function, FunctionType, LValue, Location, RValue, Type};
+use gccjit::{
+    BinaryOp, Block, CType, Context, Function, FunctionType, LValue, Location, RValue, Type,
+};
 use rustc_abi::{Align, HasDataLayout, PointeeInfo, Size, TargetDataLayout, VariantIdx};
 use rustc_codegen_ssa::base::wants_msvc_seh;
+use rustc_codegen_ssa::common::TypeKind;
 use rustc_codegen_ssa::errors as ssa_errors;
 use rustc_codegen_ssa::traits::{BackendTypes, BaseTypeCodegenMethods, MiscCodegenMethods};
 use rustc_data_structures::base_n::{ALPHANUMERIC_ONLY, ToBaseN};
@@ -59,6 +62,8 @@ pub struct CodegenCx<'gcc, 'tcx> {
     pub u128_type: Type<'gcc>,
     pub usize_type: Type<'gcc>,
 
+    pub f16_abi_type: Type<'gcc>,
+
     pub char_type: Type<'gcc>,
     pub uchar_type: Type<'gcc>,
     pub short_type: Type<'gcc>,
@@ -184,6 +189,8 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
         let u32_type = create_type(CType::UInt32t, tcx.types.u32);
         let u64_type = create_type(CType::UInt64t, tcx.types.u64);
 
+        let f16_abi_type = context.new_int_type(2, false);
+
         let (i128_type, u128_type) = if supports_128bit_integers {
             let i128_type = create_type(CType::Int128t, tcx.types.i128);
             let u128_type = create_type(CType::UInt128t, tcx.types.u128);
@@ -260,6 +267,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             u32_type,
             u64_type,
             u128_type,
+            f16_abi_type,
             char_type,
             uchar_type,
             short_type,
@@ -371,6 +379,82 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             value
         }
     }
+
+    fn call_unary_fn(
+        &self,
+        name: &str,
+        value: RValue<'gcc>,
+        param_ty: Type<'gcc>,
+        return_ty: Type<'gcc>,
+        location: Option<Location<'gcc>>,
+    ) -> RValue<'gcc> {
+        let param = self.context.new_parameter(None, param_ty, "a");
+        let func =
+            self.context.new_function(None, FunctionType::Extern, return_ty, &[param], name, false);
+        self.context.new_call(location, func, &[value])
+    }
+
+    fn f16_ext_fn_name(&self, dest_ty: Type<'gcc>) -> Option<&'static str> {
+        match self.type_kind(dest_ty) {
+            TypeKind::Float => Some("__extendhfsf2"),
+            TypeKind::Double => Some("__extendhfdf2"),
+            _ => None,
+        }
+    }
+
+    fn f16_trunc_fn_name(&self, src_ty: Type<'gcc>) -> Option<&'static str> {
+        match self.type_kind(src_ty) {
+            TypeKind::Float => Some("__truncsfhf2"),
+            TypeKind::Double => Some("__truncdfhf2"),
+            _ => None,
+        }
+    }
+
+    pub fn is_f16_abi_storage_type(&self, typ: Type<'gcc>) -> bool {
+        // Callers use this only for Rust f16 operations. The compatibility arm handles
+        // GCC versions that hand back an equivalent u16 storage type instead of the exact handle.
+        let kind = self.type_kind(typ);
+        kind == TypeKind::Half
+            || (kind == TypeKind::Integer
+                && typ.get_size() == 2
+                && typ.is_compatible_with(self.f16_abi_type))
+    }
+
+    pub fn f16_to_float_libcall(
+        &self,
+        value: RValue<'gcc>,
+        dest_ty: Type<'gcc>,
+        location: Option<Location<'gcc>>,
+    ) -> Option<RValue<'gcc>> {
+        let name = self.f16_ext_fn_name(dest_ty)?;
+        let value = self.bitcast_if_needed(value, self.f16_abi_type);
+        Some(self.call_unary_fn(name, value, self.f16_abi_type, dest_ty, location))
+    }
+
+    pub fn float_to_f16_libcall(
+        &self,
+        value: RValue<'gcc>,
+        dest_ty: Type<'gcc>,
+        location: Option<Location<'gcc>>,
+    ) -> Option<RValue<'gcc>> {
+        let value_type = value.get_type();
+        let name = self.f16_trunc_fn_name(value_type)?;
+        let value = self.call_unary_fn(name, value, value_type, self.f16_abi_type, location);
+        Some(self.bitcast_if_needed(value, dest_ty))
+    }
+
+    pub fn f16_neg(&self, value: RValue<'gcc>, dest_ty: Type<'gcc>) -> RValue<'gcc> {
+        let value = self.bitcast_if_needed(value, self.f16_abi_type);
+        let sign_bit = self.gcc_uint(self.f16_abi_type, 0x8000);
+        let value = self.context.new_binary_op(
+            None,
+            BinaryOp::BitwiseXor,
+            self.f16_abi_type,
+            value,
+            sign_bit,
+        );
+        self.bitcast_if_needed(value, dest_ty)
+    }
 }
 
 impl<'gcc, 'tcx> BackendTypes for CodegenCx<'gcc, 'tcx> {
diff --git a/src/intrinsic/llvm.rs b/src/intrinsic/llvm.rs
index d58697f1bf2..587670608b6 100644
--- a/src/intrinsic/llvm.rs
+++ b/src/intrinsic/llvm.rs
@@ -1,6 +1,6 @@
 use std::borrow::Cow;
 
-use gccjit::{CType, Context, Field, Function, FunctionPtrType, RValue, ToRValue, Type};
+use gccjit::{Context, Field, Function, FunctionPtrType, RValue, ToRValue, Type};
 use rustc_codegen_ssa::traits::BuilderMethods;
 
 use crate::builder::Builder;
@@ -62,6 +62,25 @@ fn aes_output_type<'a, 'gcc, 'tcx>(
     (typ, field1, field2)
 }
 
+fn splat_scalar_to_vector<'a, 'gcc, 'tcx>(
+    builder: &Builder<'a, 'gcc, 'tcx>,
+    vector_ty: Type<'gcc>,
+    value: RValue<'gcc>,
+) -> RValue<'gcc> {
+    let vector_ty_info = vector_ty.dyncast_vector().expect("vector type");
+    let element_ty = vector_ty_info.get_element_type();
+    let value = if value.get_type() != element_ty {
+        builder.context.new_bitcast(None, value, element_ty)
+    } else {
+        value
+    };
+    builder.context.new_rvalue_from_vector(
+        None,
+        vector_ty,
+        &vec![value; vector_ty_info.get_num_units()],
+    )
+}
+
 fn wide_aes_output_type<'a, 'gcc, 'tcx>(
     builder: &Builder<'a, 'gcc, 'tcx>,
 ) -> (Type<'gcc>, Field<'gcc>, Field<'gcc>) {
@@ -576,9 +595,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
                 let arg2_type = gcc_func.get_param_type(1);
                 let arg3_type = gcc_func.get_param_type(2);
                 let arg4_type = gcc_func.get_param_type(3);
-                let a = builder.context.new_rvalue_from_vector(None, arg1_type, &[new_args[0]; 8]);
-                let b = builder.context.new_rvalue_from_vector(None, arg2_type, &[new_args[1]; 8]);
-                let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 8]);
+                let a = splat_scalar_to_vector(builder, arg1_type, new_args[0]);
+                let b = splat_scalar_to_vector(builder, arg2_type, new_args[1]);
+                let c = splat_scalar_to_vector(builder, arg3_type, new_args[2]);
                 let arg4 = builder.context.new_rvalue_from_int(arg4_type, -1);
                 args = vec![a, b, c, arg4, new_args[3]].into();
             }
@@ -675,9 +694,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
                 let arg1_type = gcc_func.get_param_type(0);
                 let arg2_type = gcc_func.get_param_type(1);
                 let arg3_type = gcc_func.get_param_type(2);
-                let a = builder.context.new_rvalue_from_vector(None, arg1_type, &[new_args[0]; 4]);
-                let b = builder.context.new_rvalue_from_vector(None, arg2_type, &[new_args[1]; 4]);
-                let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 4]);
+                let a = splat_scalar_to_vector(builder, arg1_type, new_args[0]);
+                let b = splat_scalar_to_vector(builder, arg2_type, new_args[1]);
+                let c = splat_scalar_to_vector(builder, arg3_type, new_args[2]);
                 args = vec![a, b, c, new_args[3]].into();
             }
             "__builtin_ia32_vfmaddsd3_round" => {
@@ -685,9 +704,9 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
                 let arg1_type = gcc_func.get_param_type(0);
                 let arg2_type = gcc_func.get_param_type(1);
                 let arg3_type = gcc_func.get_param_type(2);
-                let a = builder.context.new_rvalue_from_vector(None, arg1_type, &[new_args[0]; 2]);
-                let b = builder.context.new_rvalue_from_vector(None, arg2_type, &[new_args[1]; 2]);
-                let c = builder.context.new_rvalue_from_vector(None, arg3_type, &[new_args[2]; 2]);
+                let a = splat_scalar_to_vector(builder, arg1_type, new_args[0]);
+                let b = splat_scalar_to_vector(builder, arg2_type, new_args[1]);
+                let c = splat_scalar_to_vector(builder, arg3_type, new_args[2]);
                 args = vec![a, b, c, new_args[3]].into();
             }
             "__builtin_ia32_ldmxcsr" => {
@@ -750,13 +769,6 @@ pub fn adjust_intrinsic_arguments<'a, 'b, 'gcc, 'tcx>(
                 ]
                 .into();
             }
-            "fma" => {
-                let mut new_args = args.to_vec();
-                new_args[0] = builder.context.new_cast(None, new_args[0], builder.double_type);
-                new_args[1] = builder.context.new_cast(None, new_args[1], builder.double_type);
-                new_args[2] = builder.context.new_cast(None, new_args[2], builder.double_type);
-                args = new_args.into();
-            }
             "__builtin_ia32_sqrtsh_mask_round"
             | "__builtin_ia32_vcvtss2sh_mask_round"
             | "__builtin_ia32_vcvtsd2sh_mask_round"
@@ -801,6 +813,10 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(
                 let zero = builder.context.new_rvalue_zero(builder.int_type);
                 return_value =
                     builder.context.new_vector_access(None, return_value, zero).to_rvalue();
+                let expected_type = orig_args[0].get_type();
+                if return_value.get_type() != expected_type {
+                    return_value = builder.context.new_bitcast(None, return_value, expected_type);
+                }
             }
         }
         "__builtin_ia32_addcarryx_u64"
@@ -854,10 +870,6 @@ pub fn adjust_intrinsic_return_value<'a, 'gcc, 'tcx>(
                 &[random_number, success_variable.to_rvalue()],
             );
         }
-        "fma" => {
-            let f16_type = builder.context.new_c_type(CType::Float16);
-            return_value = builder.context.new_cast(None, return_value, f16_type);
-        }
         "__builtin_ia32_encodekey128_u32" => {
             // The builtin __builtin_ia32_encodekey128_u32 writes the result in its pointer argument while
             // llvm.x86.encodekey128 returns a value.
diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs
index dc45fc49a79..eb29cf34956 100644
--- a/src/intrinsic/mod.rs
+++ b/src/intrinsic/mod.rs
@@ -144,17 +144,12 @@ fn generic_f16_builtin<'gcc, 'tcx>(
     name: Symbol,
     args: &[OperandRef<'tcx, RValue<'gcc>>],
 ) -> RValue<'gcc> {
-    let f32_type = cx.type_f32();
     let builtin_name = match name {
         sym::fabs => "fabsf",
         _ => unreachable!(),
     };
 
-    let func = cx.context.get_builtin_function(builtin_name);
-    let args: Vec<_> =
-        args.iter().map(|arg| cx.context.new_cast(None, arg.immediate(), f32_type)).collect();
-    let result = cx.context.new_call(None, func, &args);
-    cx.context.new_cast(None, result, cx.type_f16())
+    call_f32_builtin_for_f16(cx, builtin_name, args)
 }
 
 fn f16_builtin<'gcc, 'tcx>(
@@ -162,7 +157,6 @@ fn f16_builtin<'gcc, 'tcx>(
     name: Symbol,
     args: &[OperandRef<'tcx, RValue<'gcc>>],
 ) -> RValue<'gcc> {
-    let f32_type = cx.type_f32();
     let builtin_name = match name {
         sym::ceilf16 => "__builtin_ceilf",
         sym::copysignf16 => "__builtin_copysignf",
@@ -181,11 +175,41 @@ fn f16_builtin<'gcc, 'tcx>(
         _ => unreachable!(),
     };
 
+    call_f32_builtin_for_f16(cx, builtin_name, args)
+}
+
+fn call_f32_builtin_for_f16<'gcc, 'tcx>(
+    cx: &CodegenCx<'gcc, 'tcx>,
+    builtin_name: &str,
+    args: &[OperandRef<'tcx, RValue<'gcc>>],
+) -> RValue<'gcc> {
     let func = cx.context.get_builtin_function(builtin_name);
-    let args: Vec<_> =
-        args.iter().map(|arg| cx.context.new_cast(None, arg.immediate(), f32_type)).collect();
+    let args: Vec<_> = args.iter().map(|arg| f16_to_f32(cx, arg.immediate())).collect();
     let result = cx.context.new_call(None, func, &args);
-    cx.context.new_cast(None, result, cx.type_f16())
+    float_to_f16(cx, result, cx.f16_abi_type)
+}
+
+fn f16_to_float<'gcc, 'tcx>(
+    cx: &CodegenCx<'gcc, 'tcx>,
+    value: RValue<'gcc>,
+    dest_ty: Type<'gcc>,
+) -> RValue<'gcc> {
+    cx.f16_to_float_libcall(value, dest_ty, None)
+        .unwrap_or_else(|| bug!("cannot extend f16 to {:?}", cx.type_kind(dest_ty)))
+}
+
+fn f16_to_f32<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, value: RValue<'gcc>) -> RValue<'gcc> {
+    f16_to_float(cx, value, cx.type_f32())
+}
+
+fn float_to_f16<'gcc, 'tcx>(
+    cx: &CodegenCx<'gcc, 'tcx>,
+    value: RValue<'gcc>,
+    dest_ty: Type<'gcc>,
+) -> RValue<'gcc> {
+    let value_ty = value.get_type();
+    cx.float_to_f16_libcall(value, dest_ty, None)
+        .unwrap_or_else(|| bug!("cannot truncate {:?} to f16", cx.type_kind(value_ty)))
 }
 
 impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
@@ -314,10 +338,10 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
             }
             sym::powif16 => {
                 let func = self.cx.context.get_builtin_function("__builtin_powif");
-                let arg0 = self.cx.context.new_cast(None, args[0].immediate(), self.cx.type_f32());
+                let arg0 = f16_to_f32(self.cx, args[0].immediate());
                 let args = [arg0, args[1].immediate()];
                 let result = self.cx.context.new_call(None, func, &args);
-                self.cx.context.new_cast(None, result, self.cx.type_f16())
+                float_to_f16(self.cx, result, self.cx.f16_abi_type)
             }
             sym::powif128 => {
                 let f128_type = self.cx.type_f128();
@@ -591,24 +615,20 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tc
         args: &[OperandRef<'tcx, Self::Value>],
         is_cleanup: bool,
     ) -> Self::Value {
+        let sym = self.tcx.symbol_name(instance).name;
+        if sym == "llvm.fma.f16" {
+            return call_f32_builtin_for_f16(self.cx, "fmaf", args);
+        }
+
         let func = if let Some(&func) = self.intrinsic_instances.borrow().get(&instance) {
             func
         } else {
-            let sym = self.tcx.symbol_name(instance).name;
-
             let func = if let Some(func) = self.intrinsics.borrow().get(sym) {
                 *func
             } else {
                 self.linkage.set(FunctionType::Extern);
 
-                let func = match sym {
-                    "llvm.fma.f16" => {
-                        // fma is not a target builtin, but a normal builtin, so we handle it differently
-                        // here.
-                        self.context.get_builtin_function("fma")
-                    }
-                    _ => llvm::intrinsic(sym, self),
-                };
+                let func = llvm::intrinsic(sym, self);
 
                 self.intrinsics.borrow_mut().insert(sym.to_string(), func);
 
diff --git a/src/intrinsic/simd.rs b/src/intrinsic/simd.rs
index a32592b45e5..32a9f8c6812 100644
--- a/src/intrinsic/simd.rs
+++ b/src/intrinsic/simd.rs
@@ -699,7 +699,61 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             _ => Style::Unsupported,
         };
 
+        let in_is_f16 = matches!(in_elem.kind(), ty::Float(f) if f.bit_width() == 16);
+        let out_is_f16 = matches!(out_elem.kind(), ty::Float(f) if f.bit_width() == 16);
+
         match (in_style, out_style) {
+            (Style::Float, Style::Float) if in_is_f16 || out_is_f16 => {
+                let arg = args[0].immediate();
+                let result_elem_ty = bx.element_type(llret_ty);
+                let mut elements = Vec::with_capacity(in_len as usize);
+                for i in 0..in_len {
+                    let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
+                    let mut element = bx.extract_element(arg, index).to_rvalue();
+                    if in_is_f16 {
+                        element = super::f16_to_float(bx.cx, element, result_elem_ty);
+                    } else if out_is_f16 {
+                        element = super::float_to_f16(bx.cx, element, result_elem_ty);
+                    } else {
+                        element = bx.context.new_cast(None, element, result_elem_ty);
+                    }
+                    elements.push(element);
+                }
+                return Ok(bx.context.new_rvalue_from_vector(None, llret_ty, &elements));
+            }
+            (Style::Int, Style::Float) if out_is_f16 => {
+                let arg = args[0].immediate();
+                let result_elem_ty = bx.element_type(llret_ty);
+                let mut elements = Vec::with_capacity(in_len as usize);
+                for i in 0..in_len {
+                    let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
+                    let element = bx.extract_element(arg, index).to_rvalue();
+                    let element = match *in_elem.kind() {
+                        ty::Int(_) => bx.sitofp(element, result_elem_ty),
+                        ty::Uint(_) => bx.uitofp(element, result_elem_ty),
+                        _ => unreachable!(),
+                    };
+                    elements.push(element);
+                }
+                return Ok(bx.context.new_rvalue_from_vector(None, llret_ty, &elements));
+            }
+            (Style::Float, Style::Int) if in_is_f16 => {
+                let arg = args[0].immediate();
+                let result_elem_ty = bx.element_type(llret_ty);
+                let mut elements = Vec::with_capacity(in_len as usize);
+                for i in 0..in_len {
+                    let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
+                    let element = bx.extract_element(arg, index).to_rvalue();
+                    let element = super::f16_to_f32(bx.cx, element);
+                    let element = match *out_elem.kind() {
+                        ty::Int(_) => bx.fptosi(element, result_elem_ty),
+                        ty::Uint(_) => bx.fptoui(element, result_elem_ty),
+                        _ => unreachable!(),
+                    };
+                    elements.push(element);
+                }
+                return Ok(bx.context.new_rvalue_from_vector(None, llret_ty, &elements));
+            }
             (Style::Unsupported, Style::Unsupported) => {
                 require!(
                     false,
@@ -801,12 +855,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         }
     }
 
+    #[expect(clippy::too_many_arguments)]
     fn simd_simple_float_intrinsic<'gcc, 'tcx>(
         name: Symbol,
         in_elem: Ty<'_>,
         in_ty: Ty<'_>,
         in_len: u64,
         bx: &mut Builder<'_, 'gcc, 'tcx>,
+        llret_ty: Type<'gcc>,
         span: Span,
         args: &[OperandRef<'tcx, RValue<'gcc>>],
     ) -> Result<RValue<'gcc>, ()> {
@@ -820,10 +876,10 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             return_error!(InvalidMonomorphization::BasicFloatType { span, name, ty: in_ty });
         };
         let elem_ty = bx.cx.type_float_from_ty(*f);
-        let (elem_ty_str, elem_ty, cast_type) = match f.bit_width() {
-            16 => ("", elem_ty, Some(bx.cx.double_type)),
-            32 => ("f", elem_ty, None),
-            64 => ("", elem_ty, None),
+        let is_f16 = f.bit_width() == 16;
+        let elem_ty_str = match f.bit_width() {
+            16 | 32 => "f",
+            64 => "",
             _ => {
                 return_error!(InvalidMonomorphization::FloatingPointVector {
                     span,
@@ -834,7 +890,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             }
         };
 
-        let vec_ty = bx.cx.type_vector(elem_ty, in_len);
+        let vec_ty = if is_f16 { llret_ty } else { bx.cx.type_vector(elem_ty, in_len) };
+        let result_elem_ty =
+            vec_ty.unqualified().dyncast_vector().expect("vector return type").get_element_type();
 
         let intr_name = match name {
             sym::simd_ceil => "ceil",
@@ -868,14 +926,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 let mut element = bx.extract_element(arg.immediate(), index).to_rvalue();
                 // FIXME: it would probably be better to not have casts here and use the proper
                 // instructions.
-                if let Some(typ) = cast_type {
-                    element = bx.context.new_cast(None, element, typ);
+                if is_f16 {
+                    element = super::f16_to_f32(bx.cx, element);
                 }
                 arguments.push(element);
             }
             let mut result = bx.context.new_call(None, function, &arguments);
-            if cast_type.is_some() {
-                result = bx.context.new_cast(None, result, elem_ty);
+            if is_f16 {
+                result = super::float_to_f16(bx.cx, result, result_elem_ty);
             }
             vector_elements.push(result);
         }
@@ -902,7 +960,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             | sym::simd_round_ties_even
             | sym::simd_trunc
     ) {
-        return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, span, args);
+        return simd_simple_float_intrinsic(name, in_elem, in_ty, in_len, bx, llret_ty, span, args);
     }
 
     #[cfg(feature = "master")]
@@ -1217,19 +1275,44 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         return Ok(bx.context.new_rvalue_zero(bx.i32_type));
     }
 
-    arith_binary! {
-        simd_add: Uint, Int => add, Float => fadd;
-        simd_sub: Uint, Int => sub, Float => fsub;
-        simd_mul: Uint, Int => mul, Float => fmul;
-        simd_div: Uint => udiv, Int => sdiv, Float => fdiv;
-        simd_rem: Uint => urem, Int => srem, Float => frem;
-        simd_shl: Uint, Int => shl;
-        simd_shr: Uint => lshr, Int => ashr;
-        simd_and: Uint, Int => and;
-        simd_or: Uint, Int => or; // FIXME(antoyo): calling `or` might not work on vectors.
-        simd_xor: Uint, Int => xor;
-        simd_minimum_number_nsz: Float => vector_minimum_number_nsz;
-        simd_maximum_number_nsz: Float => vector_maximum_number_nsz;
+    fn simd_f16_neg<'gcc, 'tcx>(
+        bx: &mut Builder<'_, 'gcc, 'tcx>,
+        value: RValue<'gcc>,
+        result_ty: Type<'gcc>,
+    ) -> RValue<'gcc> {
+        let vector_type = result_ty.unqualified().dyncast_vector().expect("vector result type");
+        let elem_ty = vector_type.get_element_type();
+        let elements = (0..vector_type.get_num_units())
+            .map(|i| {
+                let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
+                let value = bx.extract_element(value, index).to_rvalue();
+                bx.cx.f16_neg(value, elem_ty)
+            })
+            .collect::<Vec<_>>();
+        bx.context.new_rvalue_from_vector(None, result_ty, &elements)
+    }
+
+    fn simd_f16_binary_op<'gcc, 'tcx>(
+        bx: &mut Builder<'_, 'gcc, 'tcx>,
+        lhs: RValue<'gcc>,
+        rhs: RValue<'gcc>,
+        result_ty: Type<'gcc>,
+        op: impl Fn(&mut Builder<'_, 'gcc, 'tcx>, RValue<'gcc>, RValue<'gcc>) -> RValue<'gcc>,
+    ) -> RValue<'gcc> {
+        let vector_type = result_ty.unqualified().dyncast_vector().expect("vector result type");
+        let elem_ty = vector_type.get_element_type();
+        let elements = (0..vector_type.get_num_units())
+            .map(|i| {
+                let index = bx.context.new_rvalue_from_long(bx.ulong_type, i as i64);
+                let lhs = bx.extract_element(lhs, index).to_rvalue();
+                let rhs = bx.extract_element(rhs, index).to_rvalue();
+                let lhs = super::f16_to_f32(bx.cx, lhs);
+                let rhs = super::f16_to_f32(bx.cx, rhs);
+                let result = op(bx, lhs, rhs);
+                super::float_to_f16(bx.cx, result, elem_ty)
+            })
+            .collect::<Vec<_>>();
+        bx.context.new_rvalue_from_vector(None, result_ty, &elements)
     }
 
     macro_rules! arith_unary {
@@ -1246,6 +1329,80 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         }
     }
 
+    if let ty::Float(ref f) = *in_elem.kind()
+        && f.bit_width() == 16
+    {
+        match name {
+            sym::simd_add => {
+                return Ok(simd_f16_binary_op(
+                    bx,
+                    args[0].immediate(),
+                    args[1].immediate(),
+                    llret_ty,
+                    |_, lhs, rhs| lhs + rhs,
+                ));
+            }
+            sym::simd_sub => {
+                return Ok(simd_f16_binary_op(
+                    bx,
+                    args[0].immediate(),
+                    args[1].immediate(),
+                    llret_ty,
+                    |_, lhs, rhs| lhs - rhs,
+                ));
+            }
+            sym::simd_mul => {
+                return Ok(simd_f16_binary_op(
+                    bx,
+                    args[0].immediate(),
+                    args[1].immediate(),
+                    llret_ty,
+                    |bx, lhs, rhs| {
+                        bx.context.new_binary_op(None, BinaryOp::Mult, lhs.get_type(), lhs, rhs)
+                    },
+                ));
+            }
+            sym::simd_div => {
+                return Ok(simd_f16_binary_op(
+                    bx,
+                    args[0].immediate(),
+                    args[1].immediate(),
+                    llret_ty,
+                    |_, lhs, rhs| lhs / rhs,
+                ));
+            }
+            sym::simd_rem => {
+                let fmodf = bx.context.get_builtin_function("fmodf");
+                return Ok(simd_f16_binary_op(
+                    bx,
+                    args[0].immediate(),
+                    args[1].immediate(),
+                    llret_ty,
+                    |bx, lhs, rhs| bx.context.new_call(None, fmodf, &[lhs, rhs]),
+                ));
+            }
+            sym::simd_neg => {
+                return Ok(simd_f16_neg(bx, args[0].immediate(), llret_ty));
+            }
+            _ => {}
+        }
+    }
+
+    arith_binary! {
+        simd_add: Uint, Int => add, Float => fadd;
+        simd_sub: Uint, Int => sub, Float => fsub;
+        simd_mul: Uint, Int => mul, Float => fmul;
+        simd_div: Uint => udiv, Int => sdiv, Float => fdiv;
+        simd_rem: Uint => urem, Int => srem, Float => frem;
+        simd_shl: Uint, Int => shl;
+        simd_shr: Uint => lshr, Int => ashr;
+        simd_and: Uint, Int => and;
+        simd_or: Uint, Int => or; // FIXME(antoyo): calling `or` might not work on vectors.
+        simd_xor: Uint, Int => xor;
+        simd_minimum_number_nsz: Float => vector_minimum_number_nsz;
+        simd_maximum_number_nsz: Float => vector_maximum_number_nsz;
+    }
+
     arith_unary! {
         simd_neg: Int => neg, Float => fneg;
     }
diff --git a/src/type_.rs b/src/type_.rs
index 5252f93a92e..ba33fca7c39 100644
--- a/src/type_.rs
+++ b/src/type_.rs
@@ -153,7 +153,7 @@ impl<'gcc, 'tcx> BaseTypeCodegenMethods for CodegenCx<'gcc, 'tcx> {
         if self.supports_f16_type {
             return self.context.new_c_type(CType::Float16);
         }
-        bug!("unsupported float width 16")
+        self.f16_abi_type
     }
 
     fn type_f32(&self) -> Type<'gcc> {
@@ -186,7 +186,9 @@ impl<'gcc, 'tcx> BaseTypeCodegenMethods for CodegenCx<'gcc, 'tcx> {
 
     #[cfg(feature = "master")]
     fn type_kind(&self, typ: Type<'gcc>) -> TypeKind {
-        if self.is_int_type_or_bool(typ) {
+        if typ == self.f16_abi_type {
+            TypeKind::Half
+        } else if self.is_int_type_or_bool(typ) {
             TypeKind::Integer
         } else if typ.get_pointee().is_some() {
             TypeKind::Pointer
@@ -220,7 +222,9 @@ impl<'gcc, 'tcx> BaseTypeCodegenMethods for CodegenCx<'gcc, 'tcx> {
 
     #[cfg(not(feature = "master"))]
     fn type_kind(&self, typ: Type<'gcc>) -> TypeKind {
-        if self.is_int_type_or_bool(typ) {
+        if typ == self.f16_abi_type {
+            TypeKind::Half
+        } else if self.is_int_type_or_bool(typ) {
             TypeKind::Integer
         } else if typ.is_compatible_with(self.float_type) {
             TypeKind::Float
@@ -270,6 +274,9 @@ impl<'gcc, 'tcx> BaseTypeCodegenMethods for CodegenCx<'gcc, 'tcx> {
 
     #[cfg(feature = "master")]
     fn float_width(&self, typ: Type<'gcc>) -> usize {
+        if typ == self.f16_abi_type {
+            return 16;
+        }
         if typ.is_floating_point() {
             (typ.get_size() * u8::BITS).try_into().unwrap()
         } else {
@@ -281,7 +288,9 @@ impl<'gcc, 'tcx> BaseTypeCodegenMethods for CodegenCx<'gcc, 'tcx> {
     fn float_width(&self, typ: Type<'gcc>) -> usize {
         let f32 = self.context.new_type::<f32>();
         let f64 = self.context.new_type::<f64>();
-        if typ.is_compatible_with(f32) {
+        if typ == self.f16_abi_type {
+            16
+        } else if typ.is_compatible_with(f32) {
             32
         } else if typ.is_compatible_with(f64) {
             64
diff --git a/src/type_of.rs b/src/type_of.rs
index 5b198eeaf01..cfbf797db7a 100644
--- a/src/type_of.rs
+++ b/src/type_of.rs
@@ -285,6 +285,7 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> {
         match scalar.primitive() {
             Int(i, true) => cx.type_from_integer(i),
             Int(i, false) => cx.type_from_unsigned_integer(i),
+            Float(abi::Float::F16) => cx.f16_abi_type,
             Float(f) => cx.type_from_float(f),
             Pointer(address_space) => {
                 // If we know the alignment, pick something better than i8.
diff --git a/tests/compile/f16-abi.rs b/tests/compile/f16-abi.rs
new file mode 100644
index 00000000000..49f399361ff
--- /dev/null
+++ b/tests/compile/f16-abi.rs
@@ -0,0 +1,49 @@
+// Compiler:
+
+#![crate_type = "lib"]
+#![feature(f16)]
+
+#[unsafe(no_mangle)]
+pub extern "C" fn f16_identity(a: f16) -> f16 {
+    a
+}
+
+#[unsafe(no_mangle)]
+pub fn f16_to_f32(a: f16) -> f32 {
+    a as f32
+}
+
+#[unsafe(no_mangle)]
+pub fn f16_to_f64(a: f16) -> f64 {
+    a as f64
+}
+
+#[unsafe(no_mangle)]
+pub fn f16_to_i32(a: f16) -> i32 {
+    a as i32
+}
+
+#[unsafe(no_mangle)]
+pub fn f16_to_u32(a: f16) -> u32 {
+    a as u32
+}
+
+#[unsafe(no_mangle)]
+pub fn i32_to_f16(a: i32) -> f16 {
+    a as f16
+}
+
+#[unsafe(no_mangle)]
+pub fn u32_to_f16(a: u32) -> f16 {
+    a as f16
+}
+
+#[unsafe(no_mangle)]
+pub fn f32_to_f16(a: f32) -> f16 {
+    a as f16
+}
+
+#[unsafe(no_mangle)]
+pub fn f64_to_f16(a: f64) -> f16 {
+    a as f16
+}
diff --git a/tests/lang_tests.rs b/tests/lang_tests.rs
index 6afd54e1c3f..056b7aa1bb3 100644
--- a/tests/lang_tests.rs
+++ b/tests/lang_tests.rs
@@ -213,7 +213,7 @@ fn run_tests(tempdir: PathBuf, current_dir: String) {
         "[DEBUG] lang run",
         "tests/run",
         TestMode::CompileAndRun,
-        &[],
+        &["f16.rs"],
     );
     build_test_runner(
         tempdir,
@@ -222,7 +222,7 @@ fn run_tests(tempdir: PathBuf, current_dir: String) {
         "[RELEASE] lang run",
         "tests/run",
         TestMode::CompileAndRun,
-        &[],
+        &["f16.rs"],
     );
 }
 
diff --git a/tests/run/f16.rs b/tests/run/f16.rs
new file mode 100644
index 00000000000..040c442ce78
--- /dev/null
+++ b/tests/run/f16.rs
@@ -0,0 +1,64 @@
+// Compiler:
+//
+// Run-time:
+//   status: 0
+
+#![feature(core_intrinsics, f16, float_algebraic, link_llvm_intrinsics)]
+#![allow(internal_features)]
+
+use std::cmp::Ordering;
+use std::hint::black_box;
+use std::intrinsics::{fadd_fast, fdiv_fast, fmaf16, fmul_fast, fsub_fast, powif16};
+
+unsafe extern "C" {
+    #[link_name = "llvm.fma.f16"]
+    fn llvm_fma_f16(a: f16, b: f16, c: f16) -> f16;
+}
+
+fn assert_f16_bits(value: f16, bits: u16) {
+    assert_eq!(value.to_bits(), bits);
+}
+
+fn main() {
+    let one_and_half = black_box(f16::from_bits(0x3e00));
+    assert_eq!(one_and_half as f32, 1.5f32);
+    assert_eq!(one_and_half as f64, 1.5f64);
+
+    let three_and_three_quarters = black_box(f16::from_bits(0x4380));
+    assert_eq!(three_and_three_quarters as i32, 3);
+    assert_eq!(three_and_three_quarters as u32, 3);
+
+    let negative_two_and_half = black_box(f16::from_bits(0xc100));
+    assert_eq!(negative_two_and_half as i32, -2);
+    assert_eq!(negative_two_and_half as u32, 0);
+
+    assert_f16_bits(black_box(1.5f32) as f16, 0x3e00);
+    assert_f16_bits(black_box(-2.0f32) as f16, 0xc000);
+    assert_f16_bits(black_box(1.5f64) as f16, 0x3e00);
+    assert_f16_bits(black_box(42i32) as f16, 0x5140);
+    assert_f16_bits(black_box(42u32) as f16, 0x5140);
+
+    let one = black_box(1.0f16);
+    let two = black_box(2.0f16);
+    let three = black_box(3.0f16);
+    assert_f16_bits(one + two, 0x4200);
+    assert_f16_bits(two * three, 0x4600);
+    assert_f16_bits(two / one, 0x4000);
+    assert_f16_bits(-three, 0xc200);
+    assert_f16_bits(unsafe { fadd_fast(one, two) }, 0x4200);
+    assert_f16_bits(unsafe { fsub_fast(two, one) }, 0x3c00);
+    assert_f16_bits(unsafe { fmul_fast(two, three) }, 0x4600);
+    assert_f16_bits(unsafe { fdiv_fast(three, two) }, 0x3e00);
+    assert_f16_bits(fmaf16(one, two, -three), 0xbc00);
+    assert_f16_bits(unsafe { llvm_fma_f16(one, two, -three) }, 0xbc00);
+    assert_f16_bits(powif16(two, 3), 0x4800);
+
+    assert_f16_bits(black_box(123.0f16).algebraic_add(black_box(456.0f16)), 0x6086);
+    assert_f16_bits(black_box(123.0f16).algebraic_rem(black_box(17.0f16)), 0x4400);
+
+    let q_nan = f16::from_bits(0x7e00);
+    let s_nan = f16::from_bits(0x7c2a);
+    assert_f16_bits(-q_nan, 0xfe00);
+    assert_f16_bits(-s_nan, 0xfc2a);
+    assert_eq!(f16::total_cmp(&-q_nan, &-s_nan), Ordering::Less);
+}
diff --git a/tools/cspell_dicts/rustc_codegen_gcc.txt b/tools/cspell_dicts/rustc_codegen_gcc.txt
index 4fb018b3ecd..09ff66eb4b2 100644
--- a/tools/cspell_dicts/rustc_codegen_gcc.txt
+++ b/tools/cspell_dicts/rustc_codegen_gcc.txt
@@ -13,6 +13,8 @@ ctlz
 ctpop
 cttz
 ctzll
+extendhfdf
+extendhfsf
 flto
 fmaximumf
 fmuladd
@@ -70,6 +72,8 @@ spir
 subo
 sysv
 tbaa
+truncdfhf
+truncsfhf
 uitofp
 unord
 uninlined