Index: lib/Analysis/ValueTracking.cpp =================================================================== --- lib/Analysis/ValueTracking.cpp (revision 248366) +++ lib/Analysis/ValueTracking.cpp (working copy) @@ -977,6 +977,24 @@ KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. KnownZero |= KnownZero2; + + // and(x, add (x, -1)) is a common idiom that always clears the low bit; + // here we handle the more general case of adding any odd number + Value *X = nullptr, *Y = nullptr; + if (match(I->getOperand(1), m_Add(m_Value(X), m_Value(Y)))) { + if (X == I->getOperand(0)) { + APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0); + computeKnownBits(Y, KnownZero3, KnownOne3, DL, Depth + 1, Q); + if (KnownOne3.countTrailingOnes() > 0) + KnownZero |= APInt::getLowBitsSet(BitWidth, 1); + } + if (Y == I->getOperand(0)) { + APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0); + computeKnownBits(X, KnownZero3, KnownOne3, DL, Depth + 1, Q); + if (KnownOne3.countTrailingOnes() > 0) + KnownZero |= APInt::getLowBitsSet(BitWidth, 1); + } + } break; } case Instruction::Or: { @@ -1093,18 +1111,35 @@ break; } case Instruction::Shl: - // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 + } else { + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); + // shl can't eliminate trailing zeros + KnownZero = APInt::getLowBitsSet(BitWidth, KnownZero2.countTrailingOnes()); + // short shl will leave homogeneous leading bits intact + // here we handle the more common case of leading zeros + APInt ShiftAmtZeros(BitWidth, 0), ShiftAmtOnes(BitWidth, 0); + computeKnownBits(I->getOperand(1), ShiftAmtZeros, ShiftAmtOnes, DL, + Depth + 1, Q); + uint64_t MaxShiftAmt = ((APInt(BitWidth, 1) << + (BitWidth - ShiftAmtZeros.countLeadingOnes())) - 1). + getLimitedValue(BitWidth); + if (MaxShiftAmt < BitWidth) { + int PreservedZeros = (int)KnownZero2.countLeadingOnes() - (int)MaxShiftAmt; + if (PreservedZeros > 0) + KnownZero |= APInt::getHighBitsSet(BitWidth, PreservedZeros); + } } break; case Instruction::LShr: - // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 // Compute the new bits that are at the top now. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); @@ -1114,11 +1149,15 @@ KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); // high bits known zero. KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); + } else { + // impossible for lshr to eliminate leading zeros + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); + KnownZero = APInt::getHighBitsSet(BitWidth, KnownZero2.countLeadingOnes()); } break; case Instruction::AShr: - // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { + // (ashr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 // Compute the new bits that are at the top now. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); @@ -1132,6 +1171,11 @@ KnownZero |= HighBits; else if (KnownOne[BitWidth-ShiftAmt-1]) // New bits are known one. KnownOne |= HighBits; + } else { + // leading zeros or ones must be preserved by ashr + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); + KnownZero = APInt::getHighBitsSet(BitWidth, KnownZero2.countLeadingOnes()); + KnownOne = APInt::getHighBitsSet(BitWidth, KnownOne2.countLeadingOnes()); } break; case Instruction::Sub: { @@ -1334,10 +1378,8 @@ KnownZero2 = APInt(BitWidth, 0); KnownOne2 = APInt(BitWidth, 0); - // Recurse, but cap the recursion to one level, because we don't - // want to waste time spinning around in loops. computeKnownBits(IncValue, KnownZero2, KnownOne2, DL, - MaxDepth - 1, Q); + Depth + 1, Q); KnownZero &= KnownZero2; KnownOne &= KnownOne2; // If all bits have been ruled out, there's no need to check @@ -1358,6 +1400,11 @@ if (IntrinsicInst *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { default: break; + case Intrinsic::bswap: + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); + KnownZero |= KnownZero2.byteSwap(); + KnownOne |= KnownOne2.byteSwap(); + break; case Intrinsic::ctlz: case Intrinsic::cttz: { unsigned LowBits = Log2_32(BitWidth)+1; @@ -1368,8 +1415,10 @@ break; } case Intrinsic::ctpop: { - unsigned LowBits = Log2_32(BitWidth)+1; - KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); + // bits known to be zero can't contribute to the population + unsigned LowBits = Log2_32(BitWidth - KnownZero2.countPopulation())+1; + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } case Intrinsic::x86_sse42_crc32_64_64: