diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 1b2482a2363de9ef0af19107937cdb3cc0e72e4c..2dd7b7e53df749bc70fd16321b95a5c3b9054faf 100644 --- a/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -190,6 +190,11 @@ private: Value *optimizePow(CallInst *CI, IRBuilderBase &B); Value *replacePowWithExp(CallInst *Pow, IRBuilderBase &B); Value *replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B); + Value *replaceNestedPowAndSqrtWithPow(CallInst *Pow, IRBuilderBase &B); + Value *replaceNestedPowAndPowWithPow(CallInst *Pow, IRBuilderBase &B); + Value *replaceNestedSqrtAndPowWithPow(CallInst *Sqrt,IRBuilderBase &B); + Value *optimizeCbrt(CallInst *CI, IRBuilderBase &B); + Value *optimizeExp2(CallInst *CI, IRBuilderBase &B); Value *optimizeFMinFMax(CallInst *CI, IRBuilderBase &B); Value *optimizeLog(CallInst *CI, IRBuilderBase &B); diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index 245f2d4e442a435cedaf1f596f37e3637a968203..e45fdb03c232b4e676006745c0fa100ddbee1d69 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1971,6 +1971,324 @@ Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) { return Sqrt; } +// pow(sqrt(x),y) -> pow(x,y*0.5) +Value *LibCallSimplifier::replaceNestedPowAndSqrtWithPow(CallInst *Pow, + IRBuilderBase &B) { + Value *NewPow = nullptr; + Value *Base = Pow->getArgOperand(0); + Value *Y = Pow->getArgOperand(1); + Module *Mod = Pow->getModule(); + Type *Ty = Pow->getType(); + CallInst *BaseFn = dyn_cast(Base); + if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && Pow->isFast()) { + Function *CalleeFn = BaseFn->getCalledFunction(); + LibFunc LibFn; + + // Check if Pow complies with the conversion rules. + if (IntrinsicInst *II = dyn_cast(Pow)) { + if (II->getIntrinsicID() == Intrinsic::pow && CalleeFn && + CalleeFn->getIntrinsicID() == Intrinsic::sqrt) { + Value *X = BaseFn->getOperand(0); + // Create a new node Y * 0.5. + Value *Mul = B.CreateFMul(Y, ConstantFP::get(Ty, 0.5)); + NewPow = B.CreateCall( + Intrinsic::getDeclaration(Mod, Pow->getIntrinsicID(), Ty), + {X, Mul}); + } + } else if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) && + isLibFuncEmittable(Mod, TLI, LibFn)) { + LibFunc floatFn, doubleFn, longDFn; + switch (LibFn) { + case LibFunc_sqrtf: + case LibFunc_sqrt: + case LibFunc_sqrtl: + floatFn = LibFunc_powf; + doubleFn = LibFunc_pow; + longDFn = LibFunc_powl; + break; + case LibFunc_sqrtf_finite: + case LibFunc_sqrt_finite: + case LibFunc_sqrtl_finite: + floatFn = LibFunc_powf_finite; + doubleFn = LibFunc_pow_finite; + longDFn = LibFunc_powl_finite; + break; + default: + return nullptr; + } + Value *X = BaseFn->getOperand(0); + NewPow = emitBinaryFloatFnCall( + X, B.CreateFMul(Y, ConstantFP::get(Ty, 0.5)), TLI, doubleFn, floatFn, + longDFn, B, CalleeFn->getAttributes()); + } + if (NewPow) { + Pow->replaceAllUsesWith(NewPow); + return NewPow; + } + } + return nullptr; +} + +//pow(pow(x,y),z)-> pow(x,y*z) +Value *LibCallSimplifier::replaceNestedPowAndPowWithPow(CallInst *Pow, IRBuilderBase &B){ + Value *NewPow=nullptr; + Value *Base = Pow->getArgOperand(0); + Value *Z = Pow->getArgOperand(1); + Module *Mod = Pow->getModule(); + Type *Ty = Pow->getType(); + CallInst *BaseFn = dyn_cast(Base); + + if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && Pow->isFast()){ + Function *CalleeFn = BaseFn->getCalledFunction(); + LibFunc LibFn; + // Check if Pow complies with the conversion rules. + if (IntrinsicInst *II = dyn_cast(Pow)) { + if (II->getIntrinsicID() == Intrinsic::pow && CalleeFn && + CalleeFn->getIntrinsicID() == Intrinsic::pow) { + Value *X = BaseFn->getOperand(0); + Value *Y = BaseFn->getOperand(1); + Value *Mul = B.CreateFMul(Y, Z); + NewPow = B.CreateCall( + Intrinsic::getDeclaration(Mod, Pow->getIntrinsicID(), Ty), {X, Mul}); + } + } else if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) && + isLibFuncEmittable(Mod, TLI, LibFn)) { + LibFunc floatFn,doubleFn,longDFn; + switch (LibFn) + { + case LibFunc_powf: + case LibFunc_pow: + case LibFunc_powl: + floatFn = LibFunc_powf; + doubleFn = LibFunc_pow; + longDFn = LibFunc_powl; + break; + case LibFunc_powf_finite: + case LibFunc_pow_finite: + case LibFunc_powl_finite: + floatFn = LibFunc_powf_finite; + doubleFn = LibFunc_pow_finite; + longDFn = LibFunc_powl_finite; + break; + default: + return nullptr; + } + Value *X=BaseFn->getOperand(0); + Value *Y=BaseFn->getOperand(1); + Value *Mul=B.CreateFMul(Y,Z); + NewPow = emitBinaryFloatFnCall(X, Mul, TLI, doubleFn, floatFn, longDFn, + B, CalleeFn->getAttributes()); + } + if (NewPow) { + Pow->replaceAllUsesWith(NewPow); + return NewPow; + } + } + return nullptr; +} + +// sqrt(pow(x,y)) -> pow(|x|,y*0.5) +Value *LibCallSimplifier::replaceNestedSqrtAndPowWithPow(CallInst *Sqrt, + IRBuilderBase &B) { + Value *NewPow = nullptr; + Value *OldPow = Sqrt->getArgOperand(0); + Module *Mod = Sqrt->getModule(); + Type *Ty = Sqrt->getType(); + CallInst *Pow = dyn_cast(OldPow); + if (Pow && Pow->hasOneUse() && Pow->isFast() && Sqrt->isFast()) { + Function *CalleeFn = Pow->getCalledFunction(); + IRBuilderBase::FastMathFlagGuard Guard(B); + B.setFastMathFlags(Sqrt->getFastMathFlags()); + LibFunc LibFn; + // Check if Sqrt complies with the conversion rules. + if (IntrinsicInst *II = dyn_cast(Sqrt)) { + if (II->getIntrinsicID() == Intrinsic::sqrt && CalleeFn && + CalleeFn->getIntrinsicID() == Intrinsic::pow) { + Value *X = Pow->getOperand(0); + Value *Y = Pow->getOperand(1); + Value *AbsX = B.CreateSelect( + B.CreateFCmpOGT(X, ConstantFP::get(Ty, 0.0)), X, B.CreateFNeg(X)); + Value *Mul = B.CreateFMul(Y, ConstantFP::get(Ty, 0.5)); + NewPow = B.CreateCall( + Intrinsic::getDeclaration(Mod, Pow->getIntrinsicID(), Ty), + {AbsX, Mul}); + } + } else if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) && + isLibFuncEmittable(Mod, TLI, LibFn)) { + LibFunc floatFn, doubleFn, longDFn; + switch (LibFn) { + case LibFunc_powf: + case LibFunc_pow: + case LibFunc_powl: + floatFn = LibFunc_powf; + doubleFn = LibFunc_pow; + longDFn = LibFunc_powl; + break; + case LibFunc_powf_finite: + case LibFunc_pow_finite: + case LibFunc_powl_finite: + floatFn = LibFunc_powf_finite; + doubleFn = LibFunc_pow_finite; + longDFn = LibFunc_powl_finite; + break; + default: + return nullptr; + } + Value *X = Pow->getOperand(0); + Value *Y = Pow->getOperand(1); + Value *AbsX = B.CreateSelect( + B.CreateFCmpOGT(X, ConstantFP::get(Ty, 0.0)), X, B.CreateFNeg(X)); + Value *Mul = B.CreateFMul(Y, ConstantFP::get(Ty, 0.5)); + NewPow = emitBinaryFloatFnCall(AbsX, Mul, TLI, doubleFn, floatFn, longDFn, + B, CalleeFn->getAttributes()); + } + if (NewPow) { + Sqrt->replaceAllUsesWith(NewPow); + return NewPow; + } + } + return nullptr; +} + +/* cbrt(expN(X)) -> expN(x/3) + * cbrt(sqrt(x)) -> pow(x,1/6) + * cbrt(cbrt(x)) -> pow(x,1/9) (incorrect transformation) + * When x < 0, the third transformation would yield incorrect results. + * Therefore, it is necessary to handle the transformation of x differently + * based on different cases. + * cbrt(cbrt(x)) -> x>=0?pow(x,1/9):-pow(-x,1/9) + */ +Value *LibCallSimplifier::optimizeCbrt(CallInst *CI, IRBuilderBase &B) { + Module *M = CI->getModule(); + Value *Base = CI->getArgOperand(0); + CallInst *BaseFn = dyn_cast(Base); + Type *Ty = CI->getType(); + Value *Ret = nullptr, *TempRet1 = nullptr, *TempRet2 = nullptr; + if (!TargetLibraryInfoImpl::isCallingConvCCompatible(CI)) + return nullptr; + IRBuilderBase::FastMathFlagGuard Guard(B); + B.setFastMathFlags(CI->getFastMathFlags()); + // Confirming that the internal representation of the cbrt function + // also involves a function call, and the fast-math flag is enabled + if (BaseFn && BaseFn->hasOneUse() && BaseFn->isFast() && CI->isFast()) { + LibFunc LibFn; + Function *CalleeFn = BaseFn->getCalledFunction(); + Value *X; + // If the internal representation is an intrinsic call + if (IntrinsicInst *II = dyn_cast(BaseFn)) { + Intrinsic::ID IntrinsicID = II->getIntrinsicID(); + switch (IntrinsicID) { + // cbrt(exp(X)) -> exp(X/3) + // cbrt(exp2(X)) -> exp2(X/3) + case Intrinsic::exp: + case Intrinsic::exp2: + X = BaseFn->getOperand(0); + Ret = B.CreateCall(Intrinsic::getDeclaration(M, IntrinsicID, Ty), + B.CreateFDiv(X, ConstantFP::get(Ty, 3.0))); + break; + // cbrt(sqrt(X)) -> pow(X,1/6) + case Intrinsic::sqrt: + X = BaseFn->getOperand(0); + Ret = B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::pow, Ty), + {X, B.CreateFDiv(ConstantFP::get(Ty, 1.0), + ConstantFP::get(Ty, 6.0))}); + break; + default: + return nullptr; + } + } else if (CalleeFn && TLI->getLibFunc(CalleeFn->getName(), LibFn) && + isLibFuncEmittable(M, TLI, LibFn)) { + switch (LibFn) { + // cbrt(exp(X)) -> exp(X/3) + case LibFunc_exp: + case LibFunc_expf: + case LibFunc_expl: + X = BaseFn->getOperand(0); + Ret = emitUnaryFloatFnCall(B.CreateFDiv(X, ConstantFP::get(Ty, 3.0)), + TLI, LibFunc_exp, LibFunc_expf, LibFunc_expl, + B, CalleeFn->getAttributes()); + break; + case LibFunc_exp_finite: + case LibFunc_expf_finite: + case LibFunc_expl_finite: + X = BaseFn->getOperand(0); + Ret = emitUnaryFloatFnCall(B.CreateFDiv(X, ConstantFP::get(Ty, 3.0)), + TLI, LibFunc_exp_finite, LibFunc_expf_finite, + LibFunc_expl_finite, B, + CalleeFn->getAttributes()); + break; + // cbrt(exp2(X)) -> exp2(X/3) + case LibFunc_exp2: + case LibFunc_exp2f: + case LibFunc_exp2l: + X = BaseFn->getOperand(0); + Ret = emitUnaryFloatFnCall(B.CreateFDiv(X, ConstantFP::get(Ty, 3.0)), + TLI, LibFunc_exp2, LibFunc_exp2f, + LibFunc_exp2l, B, CalleeFn->getAttributes()); + break; + case LibFunc_exp2_finite: + case LibFunc_exp2f_finite: + case LibFunc_exp2l_finite: + X = BaseFn->getOperand(0); + Ret = emitUnaryFloatFnCall(B.CreateFDiv(X, ConstantFP::get(Ty, 3.0)), + TLI, LibFunc_exp2_finite, + LibFunc_exp2f_finite, LibFunc_exp2l_finite, + B, CalleeFn->getAttributes()); + break; + // cbrt(sqrt(X)) -> pow(X,1/6) + case LibFunc_sqrt: + case LibFunc_sqrtf: + case LibFunc_sqrtl: + X = BaseFn->getOperand(0); + Ret = emitBinaryFloatFnCall( + X, B.CreateFDiv(ConstantFP::get(Ty, 1.0), ConstantFP::get(Ty, 6.0)), + TLI, LibFunc_pow, LibFunc_powf, LibFunc_powl, B, + BaseFn->getAttributes()); + break; + // cbrt(sqrt(X)) -> pow(X,1/6) + case LibFunc_sqrt_finite: + case LibFunc_sqrtf_finite: + case LibFunc_sqrtl_finite: + X = BaseFn->getOperand(0); + Ret = emitBinaryFloatFnCall( + X, B.CreateFDiv(ConstantFP::get(Ty, 1.0), ConstantFP::get(Ty, 6.0)), + TLI, LibFunc_pow_finite, LibFunc_powf_finite, LibFunc_powl_finite, + B, BaseFn->getAttributes()); + break; + // cbrt(cbrt(X)) -> pow(X,1/9) + case LibFunc_cbrt: + case LibFunc_cbrtf: + case LibFunc_cbrtl: + X = BaseFn->getOperand(0); + // When X >= 0, it can be transformed into pow(X, 1/9) + TempRet1 = emitBinaryFloatFnCall( + X, B.CreateFDiv(ConstantFP::get(Ty, 1.0), ConstantFP::get(Ty, 9.0)), + TLI, LibFunc_pow, LibFunc_powf, LibFunc_powl, B, + BaseFn->getAttributes()); + // When X < 0, it can be transformed into -pow(-X, 1/9) + TempRet2 = B.CreateFNeg(emitBinaryFloatFnCall( + B.CreateFNeg(X), + B.CreateFDiv(ConstantFP::get(Ty, 1.0), ConstantFP::get(Ty, 9.0)), + TLI, LibFunc_pow, LibFunc_powf, LibFunc_powl, B, + BaseFn->getAttributes())); + Ret = B.CreateSelect(B.CreateFCmpOGE(X, ConstantFP::get(Ty, 0.0)), + TempRet1, TempRet2); + break; + default: + return nullptr; + } + } + if (Ret) { + CI->replaceAllUsesWith(Ret); + return Ret; + } + } + // Reverting to the original handling of the cbrt function + if (UnsafeFPShrink && hasFloatVersion(M, CI->getCalledFunction()->getName())) + return optimizeUnaryDoubleFP(CI, B, TLI, true); + return nullptr; +} + static Value *createPowWithIntegerExponent(Value *Base, Value *Expo, Module *M, IRBuilderBase &B) { Value *Args[] = {Base, Expo}; @@ -2021,6 +2339,13 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) { if (Value *Sqrt = replacePowWithSqrt(Pow, B)) return Sqrt; + + //pow(sqrt(x),y) -> pow(x,y*0.5) + if (Value *V = replaceNestedPowAndSqrtWithPow(Pow, B)) + return V; + //pow(pow(x,y),z)-> pow(x,y*z) + if (Value *V = replaceNestedPowAndPowWithPow(Pow, B)) + return V; // If we can approximate pow: // pow(x, n) -> powi(x, n) * sqrt(x) if n has exactly a 0.5 fraction @@ -2314,6 +2639,10 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilderBase &B) { if (!CI->isFast()) return Ret; + // sqrt(pow(x,y)) -> pow(|x|,y*0.5) + if (Value *V = replaceNestedSqrtAndPowWithPow(CI, B)) + return V; + Instruction *I = dyn_cast(CI->getArgOperand(0)); if (!I || I->getOpcode() != Instruction::FMul || !I->isFast()) return Ret; @@ -3274,6 +3603,9 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, case LibFunc_powf: case LibFunc_pow: case LibFunc_powl: + case LibFunc_pow_finite: + case LibFunc_powf_finite: + case LibFunc_powl_finite: return optimizePow(CI, Builder); case LibFunc_exp2l: case LibFunc_exp2: @@ -3286,6 +3618,9 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, case LibFunc_sqrtf: case LibFunc_sqrt: case LibFunc_sqrtl: + case LibFunc_sqrtf_finite: + case LibFunc_sqrt_finite: + case LibFunc_sqrtl_finite: return optimizeSqrt(CI, Builder); case LibFunc_logf: case LibFunc_log: @@ -3327,7 +3662,6 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, case LibFunc_asinh: case LibFunc_atan: case LibFunc_atanh: - case LibFunc_cbrt: case LibFunc_cosh: case LibFunc_exp: case LibFunc_exp10: @@ -3354,6 +3688,10 @@ Value *LibCallSimplifier::optimizeFloatingPointLibCall(CallInst *CI, case LibFunc_cabsf: case LibFunc_cabsl: return optimizeCAbs(CI, Builder); + case LibFunc_cbrtf: + case LibFunc_cbrt: + case LibFunc_cbrtl: + return optimizeCbrt(CI, Builder); default: return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/pow-sqrt-exp-cbrt.ll b/llvm/test/Transforms/InstCombine/pow-sqrt-exp-cbrt.ll new file mode 100644 index 0000000000000000000000000000000000000000..19e5d7034a7b9d28f5c0783a239204f5c12f846f --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pow-sqrt-exp-cbrt.ll @@ -0,0 +1,222 @@ +; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; In each test case, an extra instruction is introduced during the transformation, +; which will be eliminated in the subsequent dead code elimination optimization. + +define double @pow_sqrt(double %x, double %y) { +; CHECK-LABEL: @pow_sqrt( +; CHECK-NEXT: [[UNUSED:%.*]] = call fast double @sqrt(double [[X:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[Y:%.*]], 5.000000e-01 +; CHECK-NEXT: [[POW:%.*]] = call fast double @pow(double [[X:%.*]], double [[MUL]]) +; CHECK-NEXT: ret double [[POW]] +; + %call = call fast double @sqrt(double %x) + %pow = call fast double @pow(double %call, double %y) + ret double %pow +} + +define float @powf_sqrtf(float %x, float %y) { +; CHECK-LABEL: @powf_sqrtf( +; CHECK-NEXT: [[UNUSED:%.*]] = call fast float @sqrtf(float [[X:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[Y:%.*]], 5.000000e-01 +; CHECK-NEXT: [[POW:%.*]] = call fast float @powf(float [[X:%.*]], float [[MUL]]) +; CHECK-NEXT: ret float [[POW]] +; + %call = call fast float @sqrtf(float %x) + %pow = call fast float @powf(float %call, float %y) + ret float %pow +} + +define double @pow_pow(double %x, double %y, double %z) { +; CHECK-LABEL: @pow_pow( +; CHECK-NEXT: [[UNUSED:%.*]] = call fast double @pow(double [[X:%.*]], double [[Y:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[POW:%.*]] = call fast double @pow(double [[X:%.*]], double [[MUL]]) +; CHECK-NEXT: ret double [[POW]] +; + %call = call fast double @pow(double %x, double %y) + %pow = call fast double @pow(double %call, double %z) + ret double %pow +} + +define float @powf_powf(float %x, float %y, float %z) { +; CHECK-LABEL: @powf_powf( +; CHECK-NEXT: [[UNUSED:%.*]] = call fast float @powf(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[POW:%.*]] = call fast float @powf(float [[X:%.*]], float [[MUL]]) +; CHECK-NEXT: ret float [[POW]] +; + %call = call fast float @powf(float %x, float %y) + %pow = call fast float @powf(float %call, float %z) + ret float %pow +} + +define double @sqrt_nroot(double %x, double %n){ +; CHECK-LABEL: @sqrt_nroot( +; CHECK-NEXT: [[DIV:%.*]] = fdiv double 1.000000e+00, [[N:%.*]] +; CHECK-NEXT: [[UNUSED:%.*]] = call fast double @pow(double [[X:%.*]], double [[DIV]]) +; CHECK-NEXT: [[ABSX:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[DIV]], 5.000000e-01 +; CHECK-NEXT: [[POW:%.*]] = call fast double @pow(double [[ABSX]], double [[MUL]]) +; CHECK-NEXT: ret double [[POW]] +; + %div = fdiv double 1.000000e+00, %n + %call = call fast double @pow(double %x, double %div) + %call6 = call fast double @sqrt(double %call) + ret double %call6 +} + +define float @sqrtf_nroot(float %x, float %n){ +; CHECK-LABEL: @sqrtf_nroot( +; CHECK-NEXT: [[DIV:%.*]] = fdiv float 1.000000e+00, [[N:%.*]] +; CHECK-NEXT: [[UNUSED:%.*]] = call fast float @powf(float [[X:%.*]], float [[DIV]]) +; CHECK-NEXT: [[ABSX:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[DIV]], 5.000000e-01 +; CHECK-NEXT: [[POW:%.*]] = call fast float @powf(float [[ABSX]], float [[MUL]]) +; CHECK-NEXT: ret float [[POW]] +; + %div = fdiv float 1.000000e+00, %n + %call = call fast float @powf(float %x, float %div) + %call6 = call fast float @sqrtf(float %call) + ret float %call6 +} + +define double @sqrt_pow(double %x, double %y) { +; CHECK-LABEL: @sqrt_pow( +; CHECK-NEXT: [[UNUSED:%.*]] = call fast double @pow(double [[X:%.*]], double [[Y:%.*]]) +; CHECK-NEXT: [[ABSX:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[Y:%.*]], 5.000000e-01 +; CHECK-NEXT: [[POW:%.*]] = call fast double @pow(double [[ABSX]], double [[MUL]]) +; CHECK-NEXT: ret double [[POW]] +; + %call = call fast double @pow(double %x, double %y) + %pow = call fast double @sqrt(double %call) + ret double %pow +} + +define float @sqrtf_powf(float %x, float %y) { +; CHECK-LABEL: @sqrtf_powf( +; CHECK-NEXT: [[UNUSED:%.*]] = call fast float @powf(float [[X:%.*]], float [[Y:%.*]]) +; CHECK-NEXT: [[ABSX:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[Y:%.*]], 5.000000e-01 +; CHECK-NEXT: [[POW:%.*]] = call fast float @powf(float [[ABSX]], float [[MUL]]) +; CHECK-NEXT: ret float [[POW]] +; + %call = call fast float @powf(float %x, float %y) + %pow = call fast float @sqrtf(float %call) + ret float %pow +} + +define double @cbrt_exp(double %x) { +; CHECK-LABEL: @cbrt_exp( +; CHECK-NEXT: [[UNUSED:%.*]] = call fast double @exp(double [[X:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[X:%.*]], 0x3FD5555555555555 +; CHECK-NEXT: [[EXP:%.*]] = call fast double @exp(double [[MUL]]) +; CHECK-NEXT: ret double [[EXP]] +; + %call = call fast double @exp(double %x) + %pow = call fast double @cbrt(double %call) + ret double %pow +} + +define float @cbrtf_expf(float %x) { +; CHECK-LABEL: @cbrtf_expf( +; CHECK-NEXT: [[UNUSED:%.*]] = call fast float @expf(float [[X:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[X:%.*]], 0x3FD5555560000000 +; CHECK-NEXT: [[EXP:%.*]] = call fast float @expf(float [[MUL]]) +; CHECK-NEXT: ret float [[EXP]] +; + %call = call fast float @expf(float %x) + %pow = call fast float @cbrtf(float %call) + ret float %pow +} + +define double @cbrt_exp2(double %x) { +; CHECK-LABEL: @cbrt_exp2( +; CHECK-NEXT: [[UNUSED:%.*]] = call fast double @exp2(double [[X:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[X:%.*]], 0x3FD5555555555555 +; CHECK-NEXT: [[EXP:%.*]] = call fast double @exp2(double [[MUL]]) +; CHECK-NEXT: ret double [[EXP]] +; + %call = call fast double @exp2(double %x) + %pow = call fast double @cbrt(double %call) + ret double %pow +} + +define float @cbrtf_exp2f(float %x) { +; CHECK-LABEL: @cbrtf_exp2f( +; CHECK-NEXT: [[UNUSED:%.*]] = call fast float @exp2f(float [[X:%.*]]) +; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[X:%.*]], 0x3FD5555560000000 +; CHECK-NEXT: [[EXP:%.*]] = call fast float @exp2f(float [[MUL]]) +; CHECK-NEXT: ret float [[EXP]] +; + %call = call fast float @exp2f(float %x) + %pow = call fast float @cbrtf(float %call) + ret float %pow +} + +define double @cbrt_sqrt(double %x) { +; CHECK-LABEL: @cbrt_sqrt( +; CHECK-NEXT: [[UNUSED:%.*]] = call fast double @sqrt(double [[X:%.*]]) +; CHECK-NEXT: [[POW:%.*]] = call fast double @pow(double [[X:%.*]], double 0x3FC5555555555555) +; CHECK-NEXT: ret double [[POW]] +; + %call = call fast double @sqrt(double %x) + %pow = call fast double @cbrt(double %call) + ret double %pow +} + +define float @cbrtf_sqrtf(float %x) { +; CHECK-LABEL: @cbrtf_sqrtf( +; CHECK-NEXT: [[UNUSED:%.*]] = call fast float @sqrtf(float [[X:%.*]]) +; CHECK-NEXT: [[POW:%.*]] = call fast float @powf(float [[X:%.*]], float 0x3FC5555560000000) +; CHECK-NEXT: ret float [[POW]] +; + %call = call fast float @sqrtf(float %x) + %pow = call fast float @cbrtf(float %call) + ret float %pow +} + +define double @cbrt_cbrt(double %x) { +; CHECK-LABEL: @cbrt_cbrt( +; CHECK-NEXT: [[UNUSED:%.*]] = call fast double @cbrt(double [[X:%.*]]) +; CHECK-NEXT: [[POW1:%.*]] = call fast double @pow(double [[X:%.*]], double 0x3FBC71C71C71C71C) +; CHECK-NEXT: [[NEG_X:%.*]] = fneg fast double [[X:%.*]] +; CHECK-NEXT: [[POW2:%.*]] = call fast double @pow(double [[NEG_X]], double 0x3FBC71C71C71C71C) +; CHECK-NEXT: [[NEG_POW2:%.*]] = fneg fast double [[POW2]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp fast oge double [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: [[SELECT:%.*]] = select fast i1 [[CMP]], double [[POW1]], double [[NEG_POW2]] +; CHECK-NEXT: ret double [[SELECT]] +; + %call = call fast double @cbrt(double %x) + %pow = call fast double @cbrt(double %call) + ret double %pow +} + +define float @cbrtf_cbrtf(float %x) { +; CHECK-LABEL: @cbrtf_cbrtf( +; CHECK-NEXT: [[UNUSED:%.*]] = call fast float @cbrtf(float [[X:%.*]]) +; CHECK-NEXT: [[POW1:%.*]] = call fast float @powf(float [[X:%.*]], float 0x3FBC71C720000000) +; CHECK-NEXT: [[NEG_X:%.*]] = fneg fast float [[X:%.*]] +; CHECK-NEXT: [[POW2:%.*]] = call fast float @powf(float [[NEG_X]], float 0x3FBC71C720000000) +; CHECK-NEXT: [[NEG_POW2:%.*]] = fneg fast float [[POW2]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp fast oge float [[X:%.*]], 0.000000e+00 +; CHECK-NEXT: [[SELECT:%.*]] = select fast i1 [[CMP]], float [[POW1]], float [[NEG_POW2]] +; CHECK-NEXT: ret float [[SELECT]] +; + %call = call fast float @cbrtf(float %x) + %pow = call fast float @cbrtf(float %call) + ret float %pow +} + +declare double @pow(double,double) +declare float @powf(float,float) +declare double @sqrt(double) +declare float @sqrtf(float) +declare double @cbrt(double) +declare float @cbrtf(float) +declare double @exp(double) +declare float @expf(float) +declare double @exp2(double) +declare float @exp2f(float) +declare double @llvm.fabs.f64(double) +declare float @llvm.fabs.f32(float)