sysdeps/aarch64/fpu/coshf_sve.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64

/* Single-precision vector (SVE) cosh function

   Copyright (C) 2024 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <https://www.gnu.org/licenses/>.  */

#include "sv_math.h"
#include "sv_expf_inline.h"

static const struct data
{
  struct sv_expf_data expf_consts;
  float special_bound;
} data = {
  .expf_consts = SV_EXPF_DATA,
  /* 0x1.5a92d8p+6: expf overflows above this, so have to use special case.  */
  .special_bound = 0x1.5a92d8p+6,
};

static svfloat32_t NOINLINE
special_case (svfloat32_t x, svfloat32_t half_e, svfloat32_t half_over_e,
	      svbool_t pg)
{
  return sv_call_f32 (coshf, x, svadd_x (svptrue_b32 (), half_e, half_over_e),
		      pg);
}

/* Single-precision vector cosh, using vector expf.
   Maximum error is 2.77 ULP:
   _ZGVsMxv_coshf(-0x1.5b38f4p+1) got 0x1.e45946p+2
				 want 0x1.e4594cp+2.  */
svfloat32_t SV_NAME_F1 (cosh) (svfloat32_t x, svbool_t pg)
{
  const struct data *d = ptr_barrier (&data);

  svbool_t special = svacge (pg, x, d->special_bound);

  /* Calculate cosh by exp(x) / 2 + exp(-x) / 2.
     Note that x is passed to exp here, rather than |x|. This is to avoid using
     destructive unary ABS for better register usage. However it means the
     routine is not exactly symmetrical, as the exp helper is slightly less
     accurate in the negative range.  */
  svfloat32_t e = expf_inline (x, pg, &d->expf_consts);
  svfloat32_t half_e = svmul_x (svptrue_b32 (), e, 0.5);
  svfloat32_t half_over_e = svdivr_x (pg, e, 0.5);

  if (__glibc_unlikely (svptest_any (pg, special)))
    return special_case (x, half_e, half_over_e, special);

  return svadd_x (svptrue_b32 (), half_e, half_over_e);
}