Skip to content

Commit 7299250

Browse files
authored
DAG: Use fast variants of fast math libcalls (#147481)
Hexagon currently has an untested global flag to control fast math variants of libcalls. Add fast variants as explicit libcall options so this can be a flag based lowering decision, and implement it. I have no idea what fast math flags the hexagon case requires, so I picked the maximally potentially relevant set of flags although this probably is refinable per call. Looking in compiler-rt, I'm not sure if the fast variants are anything more than aliases.
1 parent 433a5a7 commit 7299250

File tree

4 files changed

+501
-60
lines changed

4 files changed

+501
-60
lines changed

llvm/include/llvm/IR/RuntimeLibcalls.td

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,24 @@ foreach IntTy = ["I32", "I64", "I128"] in {
6262

6363
foreach FPTy = ["F32", "F64", "F80", "F128", "PPCF128"] in {
6464
def ADD_#FPTy : RuntimeLibcall;
65+
def FAST_ADD_#FPTy : RuntimeLibcall;
66+
6567
def SUB_#FPTy : RuntimeLibcall;
68+
def FAST_SUB_#FPTy : RuntimeLibcall;
69+
6670
def MUL_#FPTy : RuntimeLibcall;
71+
def FAST_MUL_#FPTy : RuntimeLibcall;
72+
6773
def DIV_#FPTy : RuntimeLibcall;
74+
def FAST_DIV_#FPTy : RuntimeLibcall;
75+
6876
def REM_#FPTy : RuntimeLibcall;
6977
def FMA_#FPTy : RuntimeLibcall;
7078
def POWI_#FPTy : RuntimeLibcall;
79+
7180
def SQRT_#FPTy : RuntimeLibcall;
81+
def FAST_SQRT_#FPTy : RuntimeLibcall;
82+
7283
def CBRT_#FPTy : RuntimeLibcall;
7384
def LOG_#FPTy : RuntimeLibcall;
7485
def LOG_FINITE_#FPTy : RuntimeLibcall;
@@ -1470,27 +1481,26 @@ def __hexagon_moddi3 : RuntimeLibcallImpl<SREM_I64>;
14701481
def __hexagon_umodsi3 : RuntimeLibcallImpl<UREM_I32>;
14711482
def __hexagon_umoddi3 : RuntimeLibcallImpl<UREM_I64>;
14721483

1473-
// FIXME: "Fast" versions should be treated as a separate RTLIB::FAST_* function
14741484
def __hexagon_adddf3 : RuntimeLibcallImpl<ADD_F64>;
1475-
def __hexagon_fast_adddf3 : RuntimeLibcallImpl<ADD_F64>;
1485+
def __hexagon_fast_adddf3 : RuntimeLibcallImpl<FAST_ADD_F64>;
14761486

14771487
def __hexagon_subdf3 : RuntimeLibcallImpl<SUB_F64>;
1478-
def __hexagon_fast_subdf3 : RuntimeLibcallImpl<SUB_F64>;
1488+
def __hexagon_fast_subdf3 : RuntimeLibcallImpl<FAST_SUB_F64>;
14791489

14801490
def __hexagon_muldf3 : RuntimeLibcallImpl<MUL_F64>;
1481-
def __hexagon_fast_muldf3 : RuntimeLibcallImpl<MUL_F64>;
1491+
def __hexagon_fast_muldf3 : RuntimeLibcallImpl<FAST_MUL_F64>;
14821492

14831493
def __hexagon_divdf3 : RuntimeLibcallImpl<DIV_F64>;
1484-
def __hexagon_fast_divdf3 : RuntimeLibcallImpl<DIV_F64>;
1494+
def __hexagon_fast_divdf3 : RuntimeLibcallImpl<FAST_DIV_F64>;
14851495

14861496
def __hexagon_divsf3 : RuntimeLibcallImpl<DIV_F32>;
1487-
def __hexagon_fast_divsf3 : RuntimeLibcallImpl<DIV_F32>;
1497+
def __hexagon_fast_divsf3 : RuntimeLibcallImpl<FAST_DIV_F32>;
14881498

14891499
def __hexagon_sqrtf : RuntimeLibcallImpl<SQRT_F32>;
1490-
def __hexagon_fast2_sqrtf : RuntimeLibcallImpl<SQRT_F32>;
1500+
def __hexagon_fast2_sqrtf : RuntimeLibcallImpl<FAST_SQRT_F32>;
14911501

14921502
// This is the only fast library function for sqrtd.
1493-
def __hexagon_fast2_sqrtdf2 : RuntimeLibcallImpl<SQRT_F64>;
1503+
def __hexagon_fast2_sqrtdf2 : RuntimeLibcallImpl<FAST_SQRT_F64>;
14941504

14951505
def __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
14961506
: RuntimeLibcallImpl<HEXAGON_MEMCPY_LIKELY_ALIGNED_MIN32BYTES_MULT8BYTES>;

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 99 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,19 @@ class SelectionDAGLegalize {
140140
RTLIB::Libcall Call_F128,
141141
RTLIB::Libcall Call_PPCF128,
142142
SmallVectorImpl<SDValue> &Results);
143-
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
144-
RTLIB::Libcall Call_I8,
145-
RTLIB::Libcall Call_I16,
146-
RTLIB::Libcall Call_I32,
147-
RTLIB::Libcall Call_I64,
148-
RTLIB::Libcall Call_I128);
143+
144+
void
145+
ExpandFastFPLibCall(SDNode *Node, bool IsFast,
146+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
147+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
148+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
149+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
150+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
151+
SmallVectorImpl<SDValue> &Results);
152+
153+
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
154+
RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
155+
RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128);
149156
void ExpandArgFPLibCall(SDNode *Node,
150157
RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
151158
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -2228,6 +2235,37 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
22282235
ExpandFPLibCall(Node, LC, Results);
22292236
}
22302237

2238+
void SelectionDAGLegalize::ExpandFastFPLibCall(
2239+
SDNode *Node, bool IsFast,
2240+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F32,
2241+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F64,
2242+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F80,
2243+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_F128,
2244+
std::pair<RTLIB::Libcall, RTLIB::Libcall> Call_PPCF128,
2245+
SmallVectorImpl<SDValue> &Results) {
2246+
2247+
EVT VT = Node->getSimpleValueType(0);
2248+
2249+
RTLIB::Libcall LC;
2250+
2251+
// FIXME: Probably should define fast to respect nan/inf and only be
2252+
// approximate functions.
2253+
2254+
if (IsFast) {
2255+
LC = RTLIB::getFPLibCall(VT, Call_F32.first, Call_F64.first, Call_F80.first,
2256+
Call_F128.first, Call_PPCF128.first);
2257+
}
2258+
2259+
if (!IsFast || TLI.getLibcallImpl(LC) == RTLIB::Unsupported) {
2260+
// Fall back if we don't have a fast implementation.
2261+
LC = RTLIB::getFPLibCall(VT, Call_F32.second, Call_F64.second,
2262+
Call_F80.second, Call_F128.second,
2263+
Call_PPCF128.second);
2264+
}
2265+
2266+
ExpandFPLibCall(Node, LC, Results);
2267+
}
2268+
22312269
SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
22322270
RTLIB::Libcall Call_I8,
22332271
RTLIB::Libcall Call_I16,
@@ -4514,6 +4552,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
45144552
return true;
45154553
}
45164554

4555+
/// Return if we can use the FAST_* variant of a math libcall for the node.
4556+
/// FIXME: This is just guessing, we probably should have unique specific sets
4557+
/// flags required per libcall.
4558+
static bool canUseFastMathLibcall(const SDNode *Node) {
4559+
// FIXME: Probably should define fast to respect nan/inf and only be
4560+
// approximate functions.
4561+
4562+
SDNodeFlags Flags = Node->getFlags();
4563+
return Flags.hasApproximateFuncs() && Flags.hasNoNaNs() &&
4564+
Flags.hasNoInfs() && Flags.hasNoSignedZeros();
4565+
}
4566+
45174567
void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
45184568
LLVM_DEBUG(dbgs() << "Trying to convert node to libcall\n");
45194569
SmallVector<SDValue, 8> Results;
@@ -4634,11 +4684,18 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
46344684
RTLIB::FMAXIMUM_NUM_PPCF128, Results);
46354685
break;
46364686
case ISD::FSQRT:
4637-
case ISD::STRICT_FSQRT:
4638-
ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64,
4639-
RTLIB::SQRT_F80, RTLIB::SQRT_F128,
4640-
RTLIB::SQRT_PPCF128, Results);
4687+
case ISD::STRICT_FSQRT: {
4688+
// FIXME: Probably should define fast to respect nan/inf and only be
4689+
// approximate functions.
4690+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4691+
{RTLIB::FAST_SQRT_F32, RTLIB::SQRT_F32},
4692+
{RTLIB::FAST_SQRT_F64, RTLIB::SQRT_F64},
4693+
{RTLIB::FAST_SQRT_F80, RTLIB::SQRT_F80},
4694+
{RTLIB::FAST_SQRT_F128, RTLIB::SQRT_F128},
4695+
{RTLIB::FAST_SQRT_PPCF128, RTLIB::SQRT_PPCF128},
4696+
Results);
46414697
break;
4698+
}
46424699
case ISD::FCBRT:
46434700
ExpandFPLibCall(Node, RTLIB::CBRT_F32, RTLIB::CBRT_F64,
46444701
RTLIB::CBRT_F80, RTLIB::CBRT_F128,
@@ -4875,11 +4932,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
48754932
RTLIB::LLRINT_PPCF128, Results);
48764933
break;
48774934
case ISD::FDIV:
4878-
case ISD::STRICT_FDIV:
4879-
ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
4880-
RTLIB::DIV_F80, RTLIB::DIV_F128,
4881-
RTLIB::DIV_PPCF128, Results);
4935+
case ISD::STRICT_FDIV: {
4936+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4937+
{RTLIB::FAST_DIV_F32, RTLIB::DIV_F32},
4938+
{RTLIB::FAST_DIV_F64, RTLIB::DIV_F64},
4939+
{RTLIB::FAST_DIV_F80, RTLIB::DIV_F80},
4940+
{RTLIB::FAST_DIV_F128, RTLIB::DIV_F128},
4941+
{RTLIB::FAST_DIV_PPCF128, RTLIB::DIV_PPCF128}, Results);
48824942
break;
4943+
}
48834944
case ISD::FREM:
48844945
case ISD::STRICT_FREM:
48854946
ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
@@ -4893,17 +4954,25 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
48934954
RTLIB::FMA_PPCF128, Results);
48944955
break;
48954956
case ISD::FADD:
4896-
case ISD::STRICT_FADD:
4897-
ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
4898-
RTLIB::ADD_F80, RTLIB::ADD_F128,
4899-
RTLIB::ADD_PPCF128, Results);
4957+
case ISD::STRICT_FADD: {
4958+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4959+
{RTLIB::FAST_ADD_F32, RTLIB::ADD_F32},
4960+
{RTLIB::FAST_ADD_F64, RTLIB::ADD_F64},
4961+
{RTLIB::FAST_ADD_F80, RTLIB::ADD_F80},
4962+
{RTLIB::FAST_ADD_F128, RTLIB::ADD_F128},
4963+
{RTLIB::FAST_ADD_PPCF128, RTLIB::ADD_PPCF128}, Results);
49004964
break;
4965+
}
49014966
case ISD::FMUL:
4902-
case ISD::STRICT_FMUL:
4903-
ExpandFPLibCall(Node, RTLIB::MUL_F32, RTLIB::MUL_F64,
4904-
RTLIB::MUL_F80, RTLIB::MUL_F128,
4905-
RTLIB::MUL_PPCF128, Results);
4967+
case ISD::STRICT_FMUL: {
4968+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
4969+
{RTLIB::FAST_MUL_F32, RTLIB::MUL_F32},
4970+
{RTLIB::FAST_MUL_F64, RTLIB::MUL_F64},
4971+
{RTLIB::FAST_MUL_F80, RTLIB::MUL_F80},
4972+
{RTLIB::FAST_MUL_F128, RTLIB::MUL_F128},
4973+
{RTLIB::FAST_MUL_PPCF128, RTLIB::MUL_PPCF128}, Results);
49064974
break;
4975+
}
49074976
case ISD::FP16_TO_FP:
49084977
if (Node->getValueType(0) == MVT::f32) {
49094978
Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false).first);
@@ -5076,11 +5145,15 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
50765145
break;
50775146
}
50785147
case ISD::FSUB:
5079-
case ISD::STRICT_FSUB:
5080-
ExpandFPLibCall(Node, RTLIB::SUB_F32, RTLIB::SUB_F64,
5081-
RTLIB::SUB_F80, RTLIB::SUB_F128,
5082-
RTLIB::SUB_PPCF128, Results);
5148+
case ISD::STRICT_FSUB: {
5149+
ExpandFastFPLibCall(Node, canUseFastMathLibcall(Node),
5150+
{RTLIB::FAST_SUB_F32, RTLIB::SUB_F32},
5151+
{RTLIB::FAST_SUB_F64, RTLIB::SUB_F64},
5152+
{RTLIB::FAST_SUB_F80, RTLIB::SUB_F80},
5153+
{RTLIB::FAST_SUB_F128, RTLIB::SUB_F128},
5154+
{RTLIB::FAST_SUB_PPCF128, RTLIB::SUB_PPCF128}, Results);
50835155
break;
5156+
}
50845157
case ISD::SREM:
50855158
Results.push_back(ExpandIntLibCall(Node, true,
50865159
RTLIB::SREM_I8,

llvm/lib/IR/RuntimeLibcalls.cpp

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,6 @@ using namespace RTLIB;
1818
#undef GET_INIT_RUNTIME_LIBCALL_NAMES
1919
#undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS
2020

21-
static cl::opt<bool>
22-
HexagonEnableFastMathRuntimeCalls("hexagon-fast-math", cl::Hidden,
23-
cl::desc("Enable Fast Math processing"));
24-
2521
static void setARMLibcallNames(RuntimeLibcallsInfo &Info, const Triple &TT,
2622
FloatABI::ABIType FloatABIType,
2723
EABI EABIVersion) {
@@ -268,32 +264,25 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
268264
setLibcallImpl(RTLIB::UREM_I32, RTLIB::__hexagon_umodsi3);
269265
setLibcallImpl(RTLIB::UREM_I64, RTLIB::__hexagon_umoddi3);
270266

271-
const bool FastMath = HexagonEnableFastMathRuntimeCalls;
272-
// This is the only fast library function for sqrtd.
273-
if (FastMath)
274-
setLibcallImpl(RTLIB::SQRT_F64, RTLIB::__hexagon_fast2_sqrtdf2);
275-
276267
// Prefix is: nothing for "slow-math",
277268
// "fast2_" for V5+ fast-math double-precision
278269
// (actually, keep fast-math and fast-math2 separate for now)
279-
if (FastMath) {
280-
setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_fast_adddf3);
281-
setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_fast_subdf3);
282-
setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_fast_muldf3);
283-
setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_fast_divdf3);
284-
setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_fast_divsf3);
285-
} else {
286-
setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_adddf3);
287-
setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_subdf3);
288-
setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_muldf3);
289-
setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_divdf3);
290-
setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_divsf3);
291-
}
292270

293-
if (FastMath)
294-
setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_fast2_sqrtf);
295-
else
296-
setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_sqrtf);
271+
setLibcallImpl(RTLIB::FAST_ADD_F64, RTLIB::__hexagon_fast_adddf3);
272+
setLibcallImpl(RTLIB::FAST_SUB_F64, RTLIB::__hexagon_fast_subdf3);
273+
setLibcallImpl(RTLIB::FAST_MUL_F64, RTLIB::__hexagon_fast_muldf3);
274+
setLibcallImpl(RTLIB::FAST_DIV_F64, RTLIB::__hexagon_fast_divdf3);
275+
setLibcallImpl(RTLIB::FAST_DIV_F32, RTLIB::__hexagon_fast_divsf3);
276+
setLibcallImpl(RTLIB::FAST_SQRT_F32, RTLIB::__hexagon_fast2_sqrtf);
277+
// This is the only fast library function for sqrtd.
278+
setLibcallImpl(RTLIB::FAST_SQRT_F64, RTLIB::__hexagon_fast2_sqrtdf2);
279+
280+
setLibcallImpl(RTLIB::ADD_F64, RTLIB::__hexagon_adddf3);
281+
setLibcallImpl(RTLIB::SUB_F64, RTLIB::__hexagon_subdf3);
282+
setLibcallImpl(RTLIB::MUL_F64, RTLIB::__hexagon_muldf3);
283+
setLibcallImpl(RTLIB::DIV_F64, RTLIB::__hexagon_divdf3);
284+
setLibcallImpl(RTLIB::DIV_F32, RTLIB::__hexagon_divsf3);
285+
setLibcallImpl(RTLIB::SQRT_F32, RTLIB::__hexagon_sqrtf);
297286

298287
setLibcallImpl(
299288
RTLIB::HEXAGON_MEMCPY_LIKELY_ALIGNED_MIN32BYTES_MULT8BYTES,

0 commit comments

Comments
 (0)