GCC Expand Mult

1. GCC Expand Mult
- 1.1. impls

1. GCC Expand Mult

expand_mult 是 gcc 对 mult 操作的一种优化, 但并不是一个单独的 pass, 而是做为一个通用功能.

例如:

int foo(int x) {
    int y = 2 * x;
    return y;
}

$> /opt/riscv/bin/riscv64-unknown-linux-gnu-gcc  test.c -O0 -c -fdump-rtl-expand-details
$> cat test.c.245r.expand
...
(insn 10 9 11 2 (set (reg:SI 76)
        (ashift:SI (reg:SI 77)
            (const_int 1 [0x1]))) "/home/sunway/download/b/test.c":3:9 -1
     (nil))
...

可见 `2*x` 在 pass_expand 时就被换成为了 `x<<1`.

另外, gcc 还会生成更复杂的 shift, 例如:

int foo(int x) {
    int y = 12 * x;
    return y;
}

$> /opt/riscv/bin/riscv64-unknown-linux-gnu-gcc  test.c -O0 -c -fdump-rtl-expand-detail
$> cat test.c.245r.expand
...
(insn 9 8 10 2 (set (reg:SI 76)
        (subreg:SI (reg:DI 77) 0)) "/home/sunway/download/b/test.c":3:9 -1
     (nil))
(insn 10 9 11 2 (set (reg:SI 78)
        (reg:SI 76)) "/home/sunway/download/b/test.c":3:9 -1
     (nil))
#NOTE: x<<1
(insn 11 10 12 2 (set (reg:SI 79)
        (ashift:SI (reg:SI 78)
            (const_int 1 [0x1]))) "/home/sunway/download/b/test.c":3:9 -1
     (nil))
(insn 12 11 13 2 (set (reg:SI 78)
        (reg:SI 79)) "/home/sunway/download/b/test.c":3:9 -1
     (expr_list:REG_EQUAL (mult:SI (reg:SI 76)
            (const_int 2 [0x2]))
        (nil)))
#NOET: x<<1+x        
(insn 13 12 14 2 (set (reg:SI 78)
        (plus:SI (reg:SI 78)
            (reg:SI 76))) "/home/sunway/download/b/test.c":3:9 -1
     (expr_list:REG_EQUAL (mult:SI (reg:SI 76)
            (const_int 3 [0x3]))
        (nil)))
#NOTE: (x<<1+x)<<2
(insn 14 13 15 2 (set (reg:SI 80)
        (ashift:SI (reg:SI 78)
            (const_int 2 [0x2]))) "/home/sunway/download/b/test.c":3:9 -1
     (nil))

...

gcc 把 `x*12` 换成了 `((x<<1)+x)<<2`

1.1. impls

这个功能的实现是在 expand_mult

1.1.1. expand_mult

rtx expand_mult(
    machine_mode mode, rtx op0, rtx op1, rtx target, int unsignedp,
    bool no_libcall) {
    enum mult_variant variant;
    struct algorithm algorithm;
    rtx scalar_op1;
    int max_cost;
    /* NOTE: ! -Os */
    bool speed = optimize_insn_for_speed_p();

    if (CONSTANT_P(op0)) std::swap(op0, op1);

    /* For vectors, there are several simplifications that can be made if
       all elements of the vector constant are identical.  */
    scalar_op1 = unwrap_const_vec_duplicate(op1);

    /* NOTE: 如果是 int, 把乘法转换成移位 */
    if (INTEGRAL_MODE_P(mode)) {
        rtx fake_reg;
        HOST_WIDE_INT coeff;
        bool is_neg;
        int mode_bitsize;

        /* NOTE: x*0=0 */
        if (op1 == CONST0_RTX(mode)) return op1;
        /* NOTE: x*1=x */
        if (op1 == CONST1_RTX(mode)) return op0;
        /* NOTE: x*(-1)=-x */
        if (op1 == CONSTM1_RTX(mode))
            return expand_unop(
                mode, do_trapv ? negv_optab : neg_optab, op0, target, 0);

        mode_bitsize = GET_MODE_UNIT_BITSIZE(mode);

        if (CONST_INT_P(scalar_op1)) {
            coeff = INTVAL(scalar_op1);
            is_neg = coeff < 0;
        } else
            goto skip_synth;

        /* NOTE: x*4=x<<2 */
        if (EXACT_POWER_OF_2_OR_ZERO_P(coeff) &&
            !(is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT))
            return expand_shift(
                LSHIFT_EXPR, mode, op0, floor_log2(coeff), target, unsignedp);

        fake_reg = gen_raw_REG(mode, LAST_VIRTUAL_REGISTER + 1);

        /* NOTE: x*(-4)=-(x<<2) */
        if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT) {
            coeff = -(unsigned HOST_WIDE_INT)coeff;

            if (EXACT_POWER_OF_2_OR_ZERO_P(coeff)) {
                rtx temp = expand_shift(
                    LSHIFT_EXPR, mode, op0, floor_log2(coeff), target,
                    unsignedp);
                return expand_unop(mode, neg_optab, temp, target, 0);
            }

            if (choose_mult_variant(
                    mode, coeff, &algorithm, &variant, max_cost)) {
                rtx temp = expand_mult_const(
                    mode, op0, coeff, NULL_RTX, &algorithm, variant);
                return expand_unop(mode, neg_optab, temp, target, 0);
            }
            goto skip_synth;
        }

        /* NOTE: x*12 是否要 expand 成 (x<<1+x)<<2 取决于两者的 cost, 而具体 mul
         * 或 shift 的 cost 多大是 arch 相关的, 需要通过 TARGET_RTX_COSTS 这个
         * target hook 来决定 */
        max_cost = set_src_cost(gen_rtx_MULT(mode, fake_reg, op1), mode, speed);
        /* NOTE: 根据 max_cost 的限制计算一个 algorithm, 例如 (x<<1+x)<<2, 然后
         * 在 expand_mult_const 中按 algorithm 生成 rtx */
        if (choose_mult_variant(mode, coeff, &algorithm, &variant, max_cost))
            return expand_mult_const(
                mode, op0, coeff, target, &algorithm, variant);
    }
skip_synth:

    /* NOTE: 如果是 float, 把 x*2 转换成 x+x */
    if (CONST_DOUBLE_AS_FLOAT_P(scalar_op1) &&
        real_equal(CONST_DOUBLE_REAL_VALUE(scalar_op1), &dconst2)) {
        op0 = force_reg(GET_MODE(op0), op0);
        return expand_binop(
            mode, add_optab, op0, op0, target, unsignedp,
            no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
    }

    /* This used to use umul_optab if unsigned, but for non-widening multiply
       there is no difference between signed and unsigned.  */
    op0 = expand_binop(
        mode, do_trapv ? smulv_optab : smul_optab, op0, op1, target, unsignedp,
        no_libcall ? OPTAB_WIDEN : OPTAB_LIB_WIDEN);
    gcc_assert(op0 || no_libcall);
    return op0;
}

1.1.2. expand_shift

expand_shift 最终会调用到 md 中的 ashiftsi3:

(define_insn "<optab>si3"
  [(set (match_operand:SI     0 "register_operand" "= r")
    (any_shift:SI
        (match_operand:SI 1 "register_operand" "  r")
        (match_operand:QI 2 "arith_operand"    " rI")))]
  ""
{
  if (GET_CODE (operands[2]) == CONST_INT)
    operands[2] = GEN_INT (INTVAL (operands[2])
               & (GET_MODE_BITSIZE (SImode) - 1));

  return TARGET_64BIT ? "<insn>%i2w\t%0,%1,%2" : "<insn>%i2\t%0,%1,%2";
}
  [(set_attr "type" "shift")
   (set_attr "mode" "SI")])

1.1.3. set_src_cost

set_src_cost 最终会调用到 TARGET_RTX_COSTS 这个 target hook

1.1.4. choose_mult_variant

1.1.5. expand_mult_const

Backlinks

GCC Pass (GCC Pass > others > GCC Expand Mult): GCC Expand Mult

TARGET_RTX_COSTS (GCC Cost > TARGET_RTX_COSTS): 以 GCC Expand Mult 为例, gcc 决定 mul 是否 expand 成 shift 时, 需要通过 TARGET_RTX_COSTS 这个来找到各个 insn 的 cost, 例如, 如果 `x*12` 的 cost 小于 `(x<<1+x)<<2` 的 cost, 则这个 expand不会进行

mtune (mtune > riscv_tune_param > add, mul, div): 表示执行 {fp,int}_add,mul,div 等操作的 cost, 例如 GCC Expand Mult 会使用这个 cost 决定 mul 是否转换成 shift