diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 9a5be79e816..3f4d3296bdd 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -666,3 +666,25 @@ def : Pat<(int_ppc_altivec_vnmsubfp VRRC:$A, VRRC:$B, VRRC:$C),
 
 def : Pat<(PPCvperm (v16i8 VRRC:$vA), VRRC:$vB, VRRC:$vC),
           (VPERM VRRC:$vA, VRRC:$vB, VRRC:$vC)>;
+
+// Vector shifts
+def : Pat<(v16i8 (shl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+          (v16i8 (VSLB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (shl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+          (v8i16 (VSLH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (shl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+          (v4i32 (VSLW VRRC:$vA, VRRC:$vB))>;
+
+def : Pat<(v16i8 (srl (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+          (v16i8 (VSRB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (srl (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+          (v8i16 (VSRH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (srl (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+          (v4i32 (VSRW VRRC:$vA, VRRC:$vB))>;
+
+def : Pat<(v16i8 (sra (v16i8 VRRC:$vA), (v16i8 VRRC:$vB))),
+          (v16i8 (VSRAB VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v8i16 (sra (v8i16 VRRC:$vA), (v8i16 VRRC:$vB))),
+          (v8i16 (VSRAH VRRC:$vA, VRRC:$vB))>;
+def : Pat<(v4i32 (sra (v4i32 VRRC:$vA), (v4i32 VRRC:$vB))),
+          (v4i32 (VSRAW VRRC:$vA, VRRC:$vB))>;
diff --git a/test/CodeGen/PowerPC/vec_shift.ll b/test/CodeGen/PowerPC/vec_shift.ll
new file mode 100644
index 00000000000..0cc699cee42
--- /dev/null
+++ b/test/CodeGen/PowerPC/vec_shift.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llc  -march=ppc32 -mcpu=g5
+; PR3628
+
+define void @update(<4 x i32> %val, <4 x i32>* %dst) nounwind {
+entry:
+	%shl = shl <4 x i32> %val, < i32 4, i32 3, i32 2, i32 1 >
+	%shr = ashr <4 x i32> %shl, < i32 1, i32 2, i32 3, i32 4 >
+	store <4 x i32> %shr, <4 x i32>* %dst
+	ret void
+}