diff --git a/gcc/match.pd b/gcc/match.pd index 660d5c26840266dafd4fe7d52d4f7f709b0ea707..cfccee4fe8903cb964cae08a2e419876bd16d455 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -3334,6 +3334,146 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (if (cst1 && cst2) (vec_cond @0 { cst1; } { cst2; }))))) + +#if GIMPLE +(if (canonicalize_math_p ()) +/* These patterns are mostly used by PHIOPT to move some operations outside of + the if statements. They should be done late because it gives jump threading + and few other passes to reduce what is going on. */ +/* a ? x op POW2 : x -> x op (a ? POW2 : 0). */ + (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate) + (simplify + (cond @0 (op:s @1 INTEGER_CST@2) @1) + /* powerof2cst */ + (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2)) + (with { + tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); + } + (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; }))) + ) + ) + ) +) +#endif + +#if GIMPLE +/* These patterns are mostly used by FORWPROP to move some operations outside of + the if statements. They should be done late because it gives jump threading + and few other passes to reduce what is going on. */ +/* Mul64 is defined as a multiplication algorithm which compute two 64-bit integers to one 128-bit integer + (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { + In0Lo = In0(D) & 4294967295; + In0Hi = In0(D) >> 32; + In1Lo = In1(D) & 4294967295; + In1Hi = In1(D) >> 32; + Mull_01 = In0Hi * In1Lo; + Addc = In0Lo * In1Hi + Mull_01; + addc32 = Addc << 32; + ResLo = In0Lo * In1Lo + addc32; + ResHi = ((long unsigned int) (addc32 > ResLo)) + + (((long unsigned int) (Mull_01 > Addc)) << 32) + (Addc >> 32) + In0Hi * In1Hi; + } */ + (simplify + (plus + (plus + (convert + (gt @10 + (plus + (mult @4 @6) + (lshift@10 @9 @3)))) + (lshift + (convert + (gt @8 @9)) @3)) + (plus@11 + (rshift + (plus@9 + (mult (bit_and@4 SSA_NAME@0 @2) @7) + (mult@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) @3) + (mult (rshift@5 SSA_NAME@0 @3) + (rshift@7 SSA_NAME@1 INTEGER_CST@3)))) + (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && + TYPE_PRECISION (type) == 64) + (with { + tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); + tree shift = build_int_cst (integer_type_node, 64); + //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) + } + (convert:type (rshift + (mult (convert:i128_type @0) (convert:i128_type @1)) { shift; }))) + ) + ) + + /* (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { + In0Lo = In0(D) & 4294967295; + In0Hi = In0(D) >> 32; + In1Lo = In1(D) & 4294967295; + In1Hi = In1(D) >> 32; + Mull_01 = In0Hi * In1Lo; + Addc = In0Lo * In1Hi + Mull_01; + addc32 = Addc << 32; + ResLo = In0(D) * In1(D); + ResHi = ((long unsigned int) (addc32 > ResLo)) + + (((long unsigned int) (Mull_01 > Addc)) << 32) + (Addc >> 32) + In0Hi * In1Hi; + } */ + (simplify + (plus + (plus + (convert + (gt (lshift@10 @9 @3) + (mult @0 @1))) + (lshift + (convert + (gt @8 @9)) @3)) + (plus@11 + (rshift + (plus@9 + (mult (bit_and@4 SSA_NAME@0 @2) @7) + (mult@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) @3) + (mult (rshift@5 SSA_NAME@0 @3) + (rshift@7 SSA_NAME@1 INTEGER_CST@3)))) + (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && + TYPE_PRECISION (type) == 64) + (with { + tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); + tree shift = build_int_cst (integer_type_node, 64); + //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) + } + (convert:type (rshift + (mult (convert:i128_type @0) (convert:i128_type @1)) { shift; }))) + ) + ) +#endif + +#if GIMPLE +/* These patterns are mostly used by FORWPROP to move some operations outside of + the if statements. They should be done late because it gives jump threading + and few other passes to reduce what is going on. */ + /* + In0Lo = In0(D) & 4294967295; + In0Hi = In0(D) >> 32; + In1Lo = In1(D) & 4294967295; + In1Hi = In1(D) >> 32; + Addc = In0Lo * In1Hi + In0Hi * In1Lo; + addc32 = Addc << 32; + ResLo = In0Lo * In1Lo + addc32 + */ + (simplify + (plus (mult @4 @5) + (lshift + (plus + (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3)) + (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2))) INTEGER_CST@3)) + (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && + TYPE_PRECISION (type) == 64) + (with { + tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); + tree shift = build_int_cst (integer_type_node, 64); + //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) + } + (mult (convert:type @0) (convert:type @1))) + ) + ) +#endif /* Simplification moved from fold_cond_expr_with_comparison. It may also be extended. */ /* This pattern implements two kinds simplification: