LLVM Interpreter: This patch implements vector support for cast operations (zext, sext, uitofp, sitofp, trunc, fpext, fptosi, fptrunc, bitcast) and shift operations (shl, ashr, lshr) for integer and floating point data types.

Added tests. Done by Yuri Veselov (mailto:Yuri.Veselov@intel.com). llvm-svn: 187724
2025-01-31 20:51:52 +01:00 · 2013-08-05 12:17:06 +00:00 · 2013-08-05 12:17:06 +00:00 · c018c20682
commit c018c20682
parent 39f379d037
3 changed files with 592 additions and 77 deletions
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@ -1138,15 +1138,41 @@ void Interpreter::visitCallSite(CallSite CS) {
  callFunction((Function*)GVTOP(SRC), ArgVals);
 }

+// auxilary function for shift operations
+static unsigned getShiftAmount(uint64_t orgShiftAmount,
+                               llvm::APInt valueToShift) {
+  unsigned valueWidth = valueToShift.getBitWidth();
+  if (orgShiftAmount < (uint64_t)valueWidth)
+    return orgShiftAmount;
+  // according to the llvm documentation, if orgShiftAmount > valueWidth,
+  // the result is undfeined. but we do shift by this rule:
+  return (NextPowerOf2(valueWidth-1) - 1) & orgShiftAmount;
+}
+
+
 void Interpreter::visitShl(BinaryOperator &I) {
  ExecutionContext &SF = ECStack.back();
  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
  GenericValue Dest;
-  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
-    Dest.IntVal = Src1.IntVal.shl(Src2.IntVal.getZExtValue());
-  else
-    Dest.IntVal = Src1.IntVal;
+  const Type *Ty = I.getType();
+
+  if (Ty->isVectorTy()) {
+    uint32_t src1Size = uint32_t(Src1.AggregateVal.size());
+    assert(src1Size == Src2.AggregateVal.size());
+    for (unsigned i = 0; i < src1Size; i++) {
+      GenericValue Result;
+      uint64_t shiftAmount = Src2.AggregateVal[i].IntVal.getZExtValue();
+      llvm::APInt valueToShift = Src1.AggregateVal[i].IntVal;
+      Result.IntVal = valueToShift.shl(getShiftAmount(shiftAmount, valueToShift));
+      Dest.AggregateVal.push_back(Result);
+    }
+  } else {
+    // scalar
+    uint64_t shiftAmount = Src2.IntVal.getZExtValue();
+    llvm::APInt valueToShift = Src1.IntVal;
+    Dest.IntVal = valueToShift.shl(getShiftAmount(shiftAmount, valueToShift));
+  }

  SetValue(&I, Dest, SF);
 }
@ -1156,10 +1182,24 @@ void Interpreter::visitLShr(BinaryOperator &I) {
  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
  GenericValue Dest;
-  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
-    Dest.IntVal = Src1.IntVal.lshr(Src2.IntVal.getZExtValue());
-  else
-    Dest.IntVal = Src1.IntVal;
+  const Type *Ty = I.getType();
+
+  if (Ty->isVectorTy()) {
+    uint32_t src1Size = uint32_t(Src1.AggregateVal.size());
+    assert(src1Size == Src2.AggregateVal.size());
+    for (unsigned i = 0; i < src1Size; i++) {
+      GenericValue Result;
+      uint64_t shiftAmount = Src2.AggregateVal[i].IntVal.getZExtValue();
+      llvm::APInt valueToShift = Src1.AggregateVal[i].IntVal;
+      Result.IntVal = valueToShift.lshr(getShiftAmount(shiftAmount, valueToShift));
+      Dest.AggregateVal.push_back(Result);
+    }
+  } else {
+    // scalar
+    uint64_t shiftAmount = Src2.IntVal.getZExtValue();
+    llvm::APInt valueToShift = Src1.IntVal;
+    Dest.IntVal = valueToShift.lshr(getShiftAmount(shiftAmount, valueToShift));
+  }

  SetValue(&I, Dest, SF);
 }
@ -1169,10 +1209,24 @@ void Interpreter::visitAShr(BinaryOperator &I) {
  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
  GenericValue Dest;
-  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
-    Dest.IntVal = Src1.IntVal.ashr(Src2.IntVal.getZExtValue());
-  else
-    Dest.IntVal = Src1.IntVal;
+  const Type *Ty = I.getType();
+
+  if (Ty->isVectorTy()) {
+    size_t src1Size = Src1.AggregateVal.size();
+    assert(src1Size == Src2.AggregateVal.size());
+    for (unsigned i = 0; i < src1Size; i++) {
+      GenericValue Result;
+      uint64_t shiftAmount = Src2.AggregateVal[i].IntVal.getZExtValue();
+      llvm::APInt valueToShift = Src1.AggregateVal[i].IntVal;
+      Result.IntVal = valueToShift.ashr(getShiftAmount(shiftAmount, valueToShift));
+      Dest.AggregateVal.push_back(Result);
+    }
+  } else {
+    // scalar
+    uint64_t shiftAmount = Src2.IntVal.getZExtValue();
+    llvm::APInt valueToShift = Src1.IntVal;
+    Dest.IntVal = valueToShift.ashr(getShiftAmount(shiftAmount, valueToShift));
+  }

  SetValue(&I, Dest, SF);
 }
@ -1180,99 +1234,248 @@ void Interpreter::visitAShr(BinaryOperator &I) {
 GenericValue Interpreter::executeTruncInst(Value *SrcVal, Type *DstTy,
                                           ExecutionContext &SF) {
  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+  Type *SrcTy = SrcVal->getType();
+  if (SrcTy->isVectorTy()) {
+    Type *DstVecTy = DstTy->getScalarType();
+    unsigned DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+    unsigned NumElts = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal
+    Dest.AggregateVal.resize(NumElts);
+    for (unsigned i = 0; i < NumElts; i++)
+      Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.trunc(DBitWidth);
+  } else {
    IntegerType *DITy = cast<IntegerType>(DstTy);
    unsigned DBitWidth = DITy->getBitWidth();
    Dest.IntVal = Src.IntVal.trunc(DBitWidth);
+  }
  return Dest;
 }

 GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy,
                                          ExecutionContext &SF) {
+  const Type *SrcTy = SrcVal->getType();
  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  IntegerType *DITy = cast<IntegerType>(DstTy);
+  if (SrcTy->isVectorTy()) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    unsigned DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+    for (unsigned i = 0; i < size; i++)
+      Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.sext(DBitWidth);
+  } else {
+    const IntegerType *DITy = cast<IntegerType>(DstTy);
    unsigned DBitWidth = DITy->getBitWidth();
    Dest.IntVal = Src.IntVal.sext(DBitWidth);
+  }
  return Dest;
 }

 GenericValue Interpreter::executeZExtInst(Value *SrcVal, Type *DstTy,
                                          ExecutionContext &SF) {
+  const Type *SrcTy = SrcVal->getType();
  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  IntegerType *DITy = cast<IntegerType>(DstTy);
+  if (SrcTy->isVectorTy()) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    unsigned DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+    for (unsigned i = 0; i < size; i++)
+      Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.zext(DBitWidth);
+  } else {
+    const IntegerType *DITy = cast<IntegerType>(DstTy);
    unsigned DBitWidth = DITy->getBitWidth();
    Dest.IntVal = Src.IntVal.zext(DBitWidth);
+  }
  return Dest;
 }

 GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, Type *DstTy,
                                             ExecutionContext &SF) {
  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+
+  if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
+    assert(SrcVal->getType()->getScalarType()->isDoubleTy() &&
+           DstTy->getScalarType()->isFloatTy() &&
+           "Invalid FPTrunc instruction");
+
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+    for (unsigned i = 0; i < size; i++)
+      Dest.AggregateVal[i].FloatVal = (float)Src.AggregateVal[i].DoubleVal;
+  } else {
    assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() &&
           "Invalid FPTrunc instruction");
    Dest.FloatVal = (float)Src.DoubleVal;
+  }
+
  return Dest;
 }

 GenericValue Interpreter::executeFPExtInst(Value *SrcVal, Type *DstTy,
                                           ExecutionContext &SF) {
  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+
+  if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
+    assert(SrcVal->getType()->getScalarType()->isFloatTy() &&
+           DstTy->getScalarType()->isDoubleTy() && "Invalid FPExt instruction");
+
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+    for (unsigned i = 0; i < size; i++)
+      Dest.AggregateVal[i].DoubleVal = (double)Src.AggregateVal[i].FloatVal;
+  } else {
    assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() &&
-         "Invalid FPTrunc instruction");
+           "Invalid FPExt instruction");
    Dest.DoubleVal = (double)Src.FloatVal;
+  }
+
  return Dest;
 }

 GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, Type *DstTy,
                                            ExecutionContext &SF) {
  Type *SrcTy = SrcVal->getType();
-  uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+
+  if (SrcTy->getTypeID() == Type::VectorTyID) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    const Type *SrcVecTy = SrcTy->getScalarType();
+    uint32_t DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+
+    if (SrcVecTy->getTypeID() == Type::FloatTyID) {
+      assert(SrcVecTy->isFloatingPointTy() && "Invalid FPToUI instruction");
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].IntVal = APIntOps::RoundFloatToAPInt(
+            Src.AggregateVal[i].FloatVal, DBitWidth);
+    } else {
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].IntVal = APIntOps::RoundDoubleToAPInt(
+            Src.AggregateVal[i].DoubleVal, DBitWidth);
+    }
+  } else {
+    // scalar
+    uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
    assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction");

    if (SrcTy->getTypeID() == Type::FloatTyID)
      Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
-  else
+    else {
      Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+    }
+  }
+
  return Dest;
 }

 GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, Type *DstTy,
                                            ExecutionContext &SF) {
  Type *SrcTy = SrcVal->getType();
-  uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+
+  if (SrcTy->getTypeID() == Type::VectorTyID) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    const Type *SrcVecTy = SrcTy->getScalarType();
+    uint32_t DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal
+    Dest.AggregateVal.resize(size);
+
+    if (SrcVecTy->getTypeID() == Type::FloatTyID) {
+      assert(SrcVecTy->isFloatingPointTy() && "Invalid FPToSI instruction");
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].IntVal = APIntOps::RoundFloatToAPInt(
+            Src.AggregateVal[i].FloatVal, DBitWidth);
+    } else {
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].IntVal = APIntOps::RoundDoubleToAPInt(
+            Src.AggregateVal[i].DoubleVal, DBitWidth);
+    }
+  } else {
+    // scalar
+    unsigned DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
    assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction");

    if (SrcTy->getTypeID() == Type::FloatTyID)
      Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
-  else
+    else {
      Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+    }
+  }
  return Dest;
 }

 GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, Type *DstTy,
                                            ExecutionContext &SF) {
  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction");

+  if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal
+    Dest.AggregateVal.resize(size);
+
+    if (DstVecTy->getTypeID() == Type::FloatTyID) {
+      assert(DstVecTy->isFloatingPointTy() && "Invalid UIToFP instruction");
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].FloatVal =
+            APIntOps::RoundAPIntToFloat(Src.AggregateVal[i].IntVal);
+    } else {
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].DoubleVal =
+            APIntOps::RoundAPIntToDouble(Src.AggregateVal[i].IntVal);
+    }
+  } else {
+    // scalar
+    assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction");
    if (DstTy->getTypeID() == Type::FloatTyID)
      Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal);
-  else
+    else {
      Dest.DoubleVal = APIntOps::RoundAPIntToDouble(Src.IntVal);
+    }
+  }
  return Dest;
 }

 GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, Type *DstTy,
                                            ExecutionContext &SF) {
  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+
+  if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal
+    Dest.AggregateVal.resize(size);
+
+    if (DstVecTy->getTypeID() == Type::FloatTyID) {
+      assert(DstVecTy->isFloatingPointTy() && "Invalid SIToFP instruction");
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].FloatVal =
+            APIntOps::RoundSignedAPIntToFloat(Src.AggregateVal[i].IntVal);
+    } else {
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].DoubleVal =
+            APIntOps::RoundSignedAPIntToDouble(Src.AggregateVal[i].IntVal);
+    }
+  } else {
+    // scalar
    assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction");

    if (DstTy->getTypeID() == Type::FloatTyID)
      Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal);
-  else
+    else {
      Dest.DoubleVal = APIntOps::RoundSignedAPIntToDouble(Src.IntVal);
-  return Dest;
+    }
+  }

+  return Dest;
 }

 GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, Type *DstTy,
@ -1301,32 +1504,166 @@ GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy,
 GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy,
                                             ExecutionContext &SF) {

+  // This instruction supports bitwise conversion of vectors to integers and
+  // to vectors of other types (as long as they have the same size)
  Type *SrcTy = SrcVal->getType();
  GenericValue Dest, Src = getOperandValue(SrcVal, SF);
+
+  if ((SrcTy->getTypeID() == Type::VectorTyID) ||
+      (DstTy->getTypeID() == Type::VectorTyID)) {
+    // vector src bitcast to vector dst or vector src bitcast to scalar dst or
+    // scalar src bitcast to vector dst
+    bool isLittleEndian = TD.isLittleEndian();
+    GenericValue TempDst, TempSrc, SrcVec;
+    const Type *SrcElemTy;
+    const Type *DstElemTy;
+    unsigned SrcBitSize;
+    unsigned DstBitSize;
+    unsigned SrcNum;
+    unsigned DstNum;
+
+    if (SrcTy->getTypeID() == Type::VectorTyID) {
+      SrcElemTy = SrcTy->getScalarType();
+      SrcBitSize = SrcTy->getScalarSizeInBits();
+      SrcNum = Src.AggregateVal.size();
+      SrcVec = Src;
+    } else {
+      // if src is scalar value, make it vector <1 x type>
+      SrcElemTy = SrcTy;
+      SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+      SrcNum = 1;
+      SrcVec.AggregateVal.push_back(Src);
+    }
+
+    if (DstTy->getTypeID() == Type::VectorTyID) {
+      DstElemTy = DstTy->getScalarType();
+      DstBitSize = DstTy->getScalarSizeInBits();
+      DstNum = (SrcNum * SrcBitSize) / DstBitSize;
+    } else {
+      DstElemTy = DstTy;
+      DstBitSize = DstTy->getPrimitiveSizeInBits();
+      DstNum = 1;
+    }
+
+    if (SrcNum * SrcBitSize != DstNum * DstBitSize)
+      llvm_unreachable("Invalid BitCast");
+
+    // If src is floating point, cast to integer first.
+    TempSrc.AggregateVal.resize(SrcNum);
+    if (SrcElemTy->isFloatTy()) {
+      for (unsigned i = 0; i < SrcNum; i++)
+        TempSrc.AggregateVal[i].IntVal =
+            APInt::floatToBits(SrcVec.AggregateVal[i].FloatVal);
+
+    } else if (SrcElemTy->isDoubleTy()) {
+      for (unsigned i = 0; i < SrcNum; i++)
+        TempSrc.AggregateVal[i].IntVal =
+            APInt::doubleToBits(SrcVec.AggregateVal[i].DoubleVal);
+    } else if (SrcElemTy->isIntegerTy()) {
+      for (unsigned i = 0; i < SrcNum; i++)
+        TempSrc.AggregateVal[i].IntVal = SrcVec.AggregateVal[i].IntVal;
+    } else {
+      // Pointers are not allowed as the element type of vector.
+      llvm_unreachable("Invalid Bitcast");
+    }
+
+    // now TempSrc is integer type vector
+    if (DstNum < SrcNum) {
+      // Example: bitcast <4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>
+      unsigned Ratio = SrcNum / DstNum;
+      unsigned SrcElt = 0;
+      for (unsigned i = 0; i < DstNum; i++) {
+        GenericValue Elt;
+        Elt.IntVal = 0;
+        Elt.IntVal = Elt.IntVal.zext(DstBitSize);
+        unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize * (Ratio - 1);
+        for (unsigned j = 0; j < Ratio; j++) {
+          APInt Tmp;
+          Tmp = Tmp.zext(SrcBitSize);
+          Tmp = TempSrc.AggregateVal[SrcElt++].IntVal;
+          Tmp = Tmp.zext(DstBitSize);
+          Tmp = Tmp.shl(ShiftAmt);
+          ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
+          Elt.IntVal |= Tmp;
+        }
+        TempDst.AggregateVal.push_back(Elt);
+      }
+    } else {
+      // Example: bitcast <2 x i64> <i64 0, i64 1> to <4 x i32>
+      unsigned Ratio = DstNum / SrcNum;
+      for (unsigned i = 0; i < SrcNum; i++) {
+        unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize * (Ratio - 1);
+        for (unsigned j = 0; j < Ratio; j++) {
+          GenericValue Elt;
+          Elt.IntVal = Elt.IntVal.zext(SrcBitSize);
+          Elt.IntVal = TempSrc.AggregateVal[i].IntVal;
+          Elt.IntVal = Elt.IntVal.lshr(ShiftAmt);
+          // it could be DstBitSize == SrcBitSize, so check it
+          if (DstBitSize < SrcBitSize)
+            Elt.IntVal = Elt.IntVal.trunc(DstBitSize);
+          ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+          TempDst.AggregateVal.push_back(Elt);
+        }
+      }
+    }
+
+    // convert result from integer to specified type
+    if (DstTy->getTypeID() == Type::VectorTyID) {
+      if (DstElemTy->isDoubleTy()) {
+        Dest.AggregateVal.resize(DstNum);
+        for (unsigned i = 0; i < DstNum; i++)
+          Dest.AggregateVal[i].DoubleVal =
+              TempDst.AggregateVal[i].IntVal.bitsToDouble();
+      } else if (DstElemTy->isFloatTy()) {
+        Dest.AggregateVal.resize(DstNum);
+        for (unsigned i = 0; i < DstNum; i++)
+          Dest.AggregateVal[i].FloatVal =
+              TempDst.AggregateVal[i].IntVal.bitsToFloat();
+      } else {
+        Dest = TempDst;
+      }
+    } else {
+      if (DstElemTy->isDoubleTy())
+        Dest.DoubleVal = TempDst.AggregateVal[0].IntVal.bitsToDouble();
+      else if (DstElemTy->isFloatTy()) {
+        Dest.FloatVal = TempDst.AggregateVal[0].IntVal.bitsToFloat();
+      } else {
+        Dest.IntVal = TempDst.AggregateVal[0].IntVal;
+      }
+    }
+  } else { //  if ((SrcTy->getTypeID() == Type::VectorTyID) ||
+           //     (DstTy->getTypeID() == Type::VectorTyID))
+
+    // scalar src bitcast to scalar dst
    if (DstTy->isPointerTy()) {
      assert(SrcTy->isPointerTy() && "Invalid BitCast");
      Dest.PointerVal = Src.PointerVal;
    } else if (DstTy->isIntegerTy()) {
-    if (SrcTy->isFloatTy()) {
+      if (SrcTy->isFloatTy())
        Dest.IntVal = APInt::floatToBits(Src.FloatVal);
-    } else if (SrcTy->isDoubleTy()) {
+      else if (SrcTy->isDoubleTy()) {
        Dest.IntVal = APInt::doubleToBits(Src.DoubleVal);
      } else if (SrcTy->isIntegerTy()) {
        Dest.IntVal = Src.IntVal;
-    } else 
+      } else {
        llvm_unreachable("Invalid BitCast");
+      }
    } else if (DstTy->isFloatTy()) {
      if (SrcTy->isIntegerTy())
        Dest.FloatVal = Src.IntVal.bitsToFloat();
-    else
+      else {
        Dest.FloatVal = Src.FloatVal;
+      }
    } else if (DstTy->isDoubleTy()) {
      if (SrcTy->isIntegerTy())
        Dest.DoubleVal = Src.IntVal.bitsToDouble();
-    else
+      else {
        Dest.DoubleVal = Src.DoubleVal;
-  } else
+      }
+    } else {
      llvm_unreachable("Invalid Bitcast");
+    }
+  }

  return Dest;
 }
--- a/test/ExecutionEngine/test-interp-vec-cast.ll
+++ b/test/ExecutionEngine/test-interp-vec-cast.ll
@ -0,0 +1,146 @@
+; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+    zext <2 x i1> <i1 true,i1 true> to <2 x i8>
+    zext <3 x i1> <i1 true,i1 true,i1 true> to <3 x i8>
+    zext <2 x i1> <i1 true,i1 true> to <2 x i16>
+    zext <3 x i1> <i1 true,i1 true,i1 true> to <3 x i16>
+    zext <2 x i1> <i1 true,i1 true> to <2 x i32>
+    zext <3 x i1> <i1 true,i1 true,i1 true> to <3 x i32>
+    zext <2 x i1> <i1 true,i1 true> to <2 x i64>
+    zext <3 x i1> <i1 true,i1 true,i1 true> to <3 x i64>
+    zext <3 x i8> <i8 4, i8 4, i8 4> to <3 x i16>
+    zext <2 x i8> <i8 -4, i8 -4> to <2 x i16>
+    zext <3 x i8> <i8 4, i8 4, i8 4> to <3 x i32>
+    zext <2 x i8> <i8 -4, i8 -4> to <2 x i32>
+    zext <3 x i8> <i8 4, i8 4, i8 4> to <3 x i64>
+    zext <2 x i8> <i8 -4, i8 -4> to <2 x i64>
+    zext <3 x i16> <i16 4, i16 4, i16 4> to <3 x i32>
+    zext <2 x i16> <i16 -4, i16 -4> to <2 x i32>
+    zext <3 x i16> <i16 4, i16 4, i16 4> to <3 x i64>
+    zext <2 x i16> <i16 -4, i16 -4> to <2 x i64>
+    zext <3 x i32> <i32 4, i32 4, i32 4> to <3 x i64>
+    zext <2 x i32> <i32 -4, i32 -4> to <2 x i64>
+
+
+    sext <2 x i1> <i1 true,i1 true> to <2 x i8>
+    sext <3 x i1> <i1 true,i1 false,i1 true> to <3 x i8>
+    sext <2 x i1> <i1 true,i1 true> to <2 x i16>
+    sext <3 x i1> <i1 true,i1 false,i1 true> to <3 x i16>
+    sext <2 x i1> <i1 true,i1 true> to <2 x i32>
+    sext <3 x i1> <i1 true,i1 false,i1 true> to <3 x i32>
+    sext <2 x i1> <i1 true,i1 true> to <2 x i64>
+    sext <3 x i1> <i1 true,i1 false,i1 true> to <3 x i64>
+    sext <3 x i8> <i8 -4, i8 0, i8 4> to <3 x i16>
+    sext <2 x i8> <i8 -4, i8 4> to <2 x i16>
+    sext <3 x i8> <i8 -4, i8 0, i8 4> to <3 x i32>
+    sext <2 x i8> <i8 -4, i8 4> to <2 x i32>
+    sext <3 x i8> <i8 -4, i8 0, i8 4> to <3 x i64>
+    sext <2 x i8> <i8 -4, i8 4> to <2 x i64>
+    sext <3 x i16> <i16 -4, i16 0, i16 4> to <3 x i32>
+    sext <2 x i16> <i16 -4, i16 4> to <2 x i32>
+    sext <3 x i16> <i16 -4, i16 0, i16 4> to <3 x i64>
+    sext <2 x i16> <i16 -4, i16 4> to <2 x i64>
+    sext <3 x i32> <i32 -4, i32 0, i32 4> to <3 x i64>
+    sext <2 x i32> <i32 -4, i32 4> to <2 x i64>
+
+
+    uitofp <3 x i1> <i1 true,i1 false,i1 true> to <3 x float>
+    uitofp <2 x i1> <i1 true,i1 true> to <2 x double>
+    uitofp <3 x i8> <i8 -4,i8 0,i8 4> to <3 x float>
+    uitofp <2 x i8> <i8 -4,i8 4> to <2 x double>
+    uitofp <3 x i16> <i16 -4,i16 0,i16 4> to <3 x float>
+    uitofp <2 x i16> <i16 -4,i16 4> to <2 x double>
+    uitofp <3 x i32> <i32 -4,i32 0,i32 4> to <3 x float>
+    uitofp <2 x i32> <i32 -4,i32 4> to <2 x double>
+    uitofp <3 x i64> <i64 -4,i64 0,i64 4> to <3 x float>
+    uitofp <2 x i64> <i64 -4,i64 4> to <2 x double>
+
+
+    sitofp <3 x i1> <i1 true,i1 false,i1 true> to <3 x float>
+    sitofp <2 x i1> <i1 true,i1 true> to <2 x double>
+    sitofp <3 x i8> <i8 -4,i8 0,i8 4> to <3 x float>
+    sitofp <2 x i8> <i8 -4,i8 4> to <2 x double>
+    sitofp <3 x i16> <i16 -4,i16 0,i16 4> to <3 x float>
+    sitofp <2 x i16> <i16 -4,i16 4> to <2 x double>
+    sitofp <3 x i32> <i32 -4,i32 0,i32 4> to <3 x float>
+    sitofp <2 x i32> <i32 -4,i32 4> to <2 x double>
+    sitofp <3 x i64> <i64 -4,i64 0,i64 4> to <3 x float>
+    sitofp <2 x i64> <i64 -4,i64 4> to <2 x double>
+
+    trunc <2 x i16> <i16 -6, i16 6> to <2 x i8>
+    trunc <3 x i16> <i16 -6, i16 6, i16 0> to <3 x i8>
+    trunc <2 x i32> <i32 -6, i32 6> to <2 x i8>
+    trunc <3 x i32> <i32 -6, i32 6, i32 0> to <3 x i8>
+    trunc <2 x i32> <i32 -6, i32 6> to <2 x i16>
+    trunc <3 x i32> <i32 -6, i32 6, i32 0> to <3 x i16>
+    trunc <2 x i64> <i64 -6, i64 6> to <2 x i8>
+    trunc <3 x i64> <i64 -6, i64 6, i64 0> to <3 x i8>
+    trunc <2 x i64> <i64 -6, i64 6> to <2 x i16>
+    trunc <3 x i64> <i64 -6, i64 6, i64 0> to <3 x i16>
+    trunc <2 x i64> <i64 -6, i64 6> to <2 x i32>
+    trunc <3 x i64> <i64 -6, i64 6, i64 0> to <3 x i32>
+
+
+    fpext <2 x float>  < float 0.000000e+00, float 1.0> to <2 x double>
+    fpext <3 x float>  < float 0.000000e+00, float -1.0, float 1.0> to <3 x double>
+
+    fptosi <2 x double> < double 0.000000e+00, double 1.0> to <2 x i8>
+    fptosi <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i8>
+    fptosi <2 x double> < double 0.000000e+00, double 1.0> to <2 x i16>
+    fptosi <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i16>
+    fptosi <2 x double> < double 0.000000e+00, double 1.0> to <2 x i32>
+    fptosi <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i32>
+    fptosi <2 x double> < double 0.000000e+00, double 1.0> to <2 x i64>
+    fptosi <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i64>
+
+    fptoui <2 x double> < double 0.000000e+00, double 1.0> to <2 x i8>
+    fptoui <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i8>
+    fptoui <2 x double> < double 0.000000e+00, double 1.0> to <2 x i16>
+    fptoui <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i16>
+    fptoui <2 x double> < double 0.000000e+00, double 1.0> to <2 x i32>
+    fptoui <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i32>
+    fptoui <2 x double> < double 0.000000e+00, double 1.0> to <2 x i64>
+    fptoui <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x i64>
+
+    fptrunc <2 x double> < double 0.000000e+00, double 1.0> to <2 x float>
+    fptrunc <3 x double> < double 0.000000e+00, double 1.0, double -1.0> to <3 x float>
+
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to <4 x i16>
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to <2 x i32>
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to i64
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to <2 x float>
+    bitcast <8 x i8> <i8 0, i8 -1, i8 2, i8 -3, i8 4, i8 -5, i8 6, i8 -7> to double
+
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to <8 x i8>
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to <2 x i32>
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to i64
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to <2 x float>
+    bitcast <4 x i16> <i16 0, i16 -1, i16 2, i16 -3> to double
+
+    bitcast <2 x i32> <i32 1, i32 -1> to <8 x i8>
+    bitcast <2 x i32> <i32 1, i32 -1> to <4 x i16>
+    bitcast <2 x i32> <i32 1, i32 -1> to i64
+    bitcast <2 x i32> <i32 1, i32 -1> to <2 x float>
+    bitcast <2 x i32> <i32 1, i32 -1> to double
+
+    bitcast i64 1 to <8 x i8>
+    bitcast i64 1 to <4 x i16>
+    bitcast i64 1 to <2 x i32>
+    bitcast i64 1 to <2 x float>
+    bitcast i64 1 to double
+
+    bitcast <2 x float> <float 1.0, float -1.0> to <8 x i8>
+    bitcast <2 x float> <float 1.0, float -1.0> to <4 x i16>
+    bitcast <2 x float> <float 1.0, float -1.0> to i64
+    bitcast <2 x float> <float 1.0, float -1.0> to <2 x i32>
+    bitcast <2 x float> <float 1.0, float -1.0> to double
+
+    bitcast double 1.0 to <8 x i8>
+    bitcast double 1.0 to <4 x i16>
+    bitcast double 1.0 to <2 x i32>
+    bitcast double 1.0 to <2 x float>
+    bitcast double 1.0 to i64
+
+    ret i32 0
+}
--- a/test/ExecutionEngine/test-interp-vec-shift.ll
+++ b/test/ExecutionEngine/test-interp-vec-shift.ll
@ -0,0 +1,32 @@
+; RUN: %lli -force-interpreter=true %s > /dev/null
+
+define i32 @main() {
+    %shamt = add <2 x i8> <i8 0, i8 0>, <i8 1, i8 2>
+    %shift.upgrd.1 = zext <2 x i8> %shamt to <2 x i32>
+    %t1.s = shl <2 x i32> <i32 1, i32 2>, %shift.upgrd.1
+    %t2.s = shl <2 x i32> <i32 1, i32 2>, <i32 3, i32 4>
+    %shift.upgrd.2 = zext <2 x i8> %shamt to <2 x i32>
+    %t1 = shl <2 x i32> <i32 1, i32 2>, %shift.upgrd.2
+    %t2 = shl <2 x i32> <i32 1, i32 0>, <i32 5, i32 6>
+    %t2.s.upgrd.3 = shl <2 x i64> <i64 1, i64 2>, <i64 3, i64 4>
+    %t2.upgrd.4 = shl <2 x i64> <i64 1, i64 2>, <i64 6, i64 7>
+    %shift.upgrd.5 = zext <2 x i8> %shamt to <2 x i32>
+    %tr1.s = ashr <2 x i32> <i32 1, i32 2>, %shift.upgrd.5
+    %tr2.s = ashr <2 x i32> <i32 1, i32 2>, <i32 4, i32 5>
+    %shift.upgrd.6 = zext <2 x i8> %shamt to <2 x i32>
+    %tr1 = lshr <2 x i32> <i32 1, i32 2>, %shift.upgrd.6
+    %tr2 = lshr <2 x i32> <i32 1, i32 2>, <i32 5, i32 6>
+    %tr1.l = ashr <2 x i64> <i64 1, i64 2>, <i64 4, i64 5>
+    %shift.upgrd.7 = zext <2 x i8> %shamt to <2 x i64>
+    %tr2.l = ashr <2 x i64> <i64 1, i64 2>, %shift.upgrd.7
+    %tr3.l = shl <2 x i64> <i64 1, i64 2>, <i64 4, i64 5>
+    %shift.upgrd.8 = zext <2 x i8> %shamt to <2 x i64>
+    %tr4.l = shl <2 x i64> <i64 1, i64 2>, %shift.upgrd.8
+    %tr1.u = lshr <2 x i64> <i64 1, i64 2>, <i64 5, i64 6>
+    %shift.upgrd.9 = zext <2 x i8> %shamt to <2 x i64>
+    %tr2.u = lshr <2 x i64> <i64 1, i64 2>, %shift.upgrd.9
+    %tr3.u = shl <2 x i64> <i64 1, i64 2>, <i64 5, i64 6>
+    %shift.upgrd.10 = zext <2 x i8> %shamt to <2 x i64>
+    %tr4.u = shl <2 x i64> <i64 1, i64 2>, %shift.upgrd.10
+    ret i32 0
+}