[flang] still apply vectorization cost model with IVDEP (#180760)

The current implementation of `!DIR IVDEP` leads flang to bypass LLVM
cost model and always vectorize the loop carrying `!DIR$ IVDEP`.

IVDEP is an extension and its documentation varies, and while it usually
leads to vectorization because it is added on loops where it is usually
profitable, its documentation only tells it is meant to tell the
compiler that there are no loop carried dependencies and that the loop
is safe to vectorize.

In some application, such directive may have been added to help the
compiler proving it is safe to vectorize, but vectorizing is not always
the best choice for all architectures. The cost model should still be
applied. This is at least the case for classic flang.

When users want vectorization to happen, they should use `!DIR$ VECTOR
ALWAYS`.

This patch updates flang to not emit `llvm.loop.vectorize.enable` just
because IVDEP was seen. Instead, IVDEP now only controls the emissions
of the access groups to translate the independence of the accesses and
leave the vectorization decision up to the cost model. `!DIR$ VECTOR
ALWAYS` can be used in combination with IVDEP to force vectorization (it
causes the emission of `llvm.loop.vectorize.enable`).
diff --git a/flang/docs/Directives.md b/flang/docs/Directives.md
index 704bb76..7080e5c 100644
--- a/flang/docs/Directives.md
+++ b/flang/docs/Directives.md
@@ -51,6 +51,9 @@
   be passed in registers, so it's not clear how lowering should handle this
   case. (Passing scalar actual argument to `ignore_tkr(R)` dummy argument
   that is a scalar with `VALUE` attribute is allowed.)
+* `!dir$ ivdep` asserts that there are no vector dependencies in the following loop,
+  allowing the compiler to vectorize or parallelize the loop if it chooses to do so
+  based on its cost model. It does not force vectorization.
 * `!dir$ assume_aligned desginator:alignment`, where designator is a variable,
   maybe with array indices, and alignment is what the compiler should assume the
   alignment to be. E.g A:64 or B(1,1,1):128. The alignment should be a power of 2,
diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp
index a82e266..6eedb08 100644
--- a/flang/lib/Lower/Bridge.cpp
+++ b/flang/lib/Lower/Bridge.cpp
@@ -2797,8 +2797,6 @@
                 has_attrs = true;
               },
               [&](const Fortran::parser::CompilerDirective::IVDep &iv) {
-                disableVecAttr =
-                    mlir::BoolAttr::get(builder->getContext(), false);
                 aga.push_back(
                     mlir::LLVM::AccessGroupAttr::get(builder->getContext()));
                 has_attrs = true;
diff --git a/flang/test/Integration/ivdep.f90 b/flang/test/Integration/ivdep.f90
index b122f58..0be86ff 100644
--- a/flang/test/Integration/ivdep.f90
+++ b/flang/test/Integration/ivdep.f90
@@ -103,14 +103,28 @@
     end subroutine
 end subroutine ivdep_test3
 
+! CHECK-LABEL: ivdep_test4
+subroutine ivdep_test4
+  integer :: a(10)
+  !dir$ ivdep
+  !dir$ vector always
+  ! CHECK:   br i1 {{.*}}, label {{.*}}, label {{.*}}
+  do i=1,10
+     a(i)=i
+     !CHECK: br label {{.*}}, !llvm.loop ![[ANNOTATION3:.*]]
+  end do
+end subroutine ivdep_test4
+
 ! CHECK: [[DISTRINCT]] = distinct !{}
-! CHECK: ![[ANNOTATION]] = distinct !{![[ANNOTATION]], ![[VECTORIZE:.*]], ![[PARALLEL_ACCESSES:.*]]}
-! CHECK: ![[VECTORIZE]] = !{!"llvm.loop.vectorize.enable", i1 true}
+! CHECK: ![[ANNOTATION]] = distinct !{![[ANNOTATION]], ![[PARALLEL_ACCESSES:.*]]}
 ! CHECK: ![[PARALLEL_ACCESSES]] = !{!"llvm.loop.parallel_accesses", [[DISTRINCT]]}
 ! CHECK: [[DISTRINCT1]] = distinct !{}
-! CHECK: ![[ANNOTATION1]] = distinct !{![[ANNOTATION1]], ![[VECTORIZE:.*]], ![[PARALLEL_ACCESSES1:.*]]}
+! CHECK: ![[ANNOTATION1]] = distinct !{![[ANNOTATION1]], ![[PARALLEL_ACCESSES1:.*]]}
 ! CHECK: ![[PARALLEL_ACCESSES1]] = !{!"llvm.loop.parallel_accesses", [[DISTRINCT1]]}
 ! CHECK: [[DISTRINCT2]] = distinct !{}
-! CHECK: ![[ANNOTATION2]] = distinct !{![[ANNOTATION2]], ![[VECTORIZE:.*]], ![[PARALLEL_ACCESSES2:.*]]}
+! CHECK: ![[ANNOTATION2]] = distinct !{![[ANNOTATION2]], ![[PARALLEL_ACCESSES2:.*]]}
 ! CHECK: ![[PARALLEL_ACCESSES2]] = !{!"llvm.loop.parallel_accesses", [[DISTRINCT2]]}
-
+! CHECK: [[DISTRINCT3:.*]] = distinct !{}
+! CHECK: ![[ANNOTATION3]] = distinct !{![[ANNOTATION3]], ![[VECTORIZE:.*]], ![[PARALLEL_ACCESSES3:.*]]}
+! CHECK: ![[VECTORIZE]] = !{!"llvm.loop.vectorize.enable", i1 true}
+! CHECK: ![[PARALLEL_ACCESSES3]] = !{!"llvm.loop.parallel_accesses", [[DISTRINCT3]]}
diff --git a/flang/test/Lower/HLFIR/ivdep-elemental.f90 b/flang/test/Lower/HLFIR/ivdep-elemental.f90
index ac5b274..07f02eb 100644
--- a/flang/test/Lower/HLFIR/ivdep-elemental.f90
+++ b/flang/test/Lower/HLFIR/ivdep-elemental.f90
@@ -1,6 +1,6 @@
 ! RUN: %flang_fc1 -emit-fir -O2 %s -o - | FileCheck %s
 
-! CHECK: #[[ANNOTATION:.*]] = #llvm.loop_annotation<vectorize = #{{.*}}, parallelAccesses = #[[GROUP:.*]]>
+! CHECK: #[[ANNOTATION:.*]] = #llvm.loop_annotation<parallelAccesses = #[[GROUP:.*]]>
 subroutine elemental_assignment_in_loop(a, b)
   real :: a(100,100), b(100,100)
   !dir$ ivdep
diff --git a/flang/test/Lower/HLFIR/ivdep-where.f90 b/flang/test/Lower/HLFIR/ivdep-where.f90
index cddf28c..f995bf9 100644
--- a/flang/test/Lower/HLFIR/ivdep-where.f90
+++ b/flang/test/Lower/HLFIR/ivdep-where.f90
@@ -1,6 +1,6 @@
 ! RUN: %flang_fc1 -emit-fir -O2 %s -o - | FileCheck %s
 
-! CHECK: #[[ANNOTATION:.*]] = #llvm.loop_annotation<vectorize = #{{.*}}, parallelAccesses = #[[GROUP:.*]]>
+! CHECK: #[[ANNOTATION:.*]] = #llvm.loop_annotation<parallelAccesses = #[[GROUP:.*]]>
 subroutine test_where(a, l)
   real :: a(100,100)
   logical :: l(100, 100)
diff --git a/flang/test/Lower/ivdep-array.f90 b/flang/test/Lower/ivdep-array.f90
index d018fba..d31a08b 100644
--- a/flang/test/Lower/ivdep-array.f90
+++ b/flang/test/Lower/ivdep-array.f90
@@ -1,6 +1,6 @@
 ! RUN: %flang_fc1 -emit-fir -O2 %s -o - | FileCheck %s
 
-! CHECK: #[[ANNOTATION:.*]] = #llvm.loop_annotation<vectorize = #{{.*}}, parallelAccesses = #[[GROUP:.*]]>
+! CHECK: #[[ANNOTATION:.*]] = #llvm.loop_annotation<parallelAccesses = #[[GROUP:.*]]>
 subroutine array_assignment_in_loop(a, b)
   real :: a(100,100), b(100,100)
   !dir$ ivdep
diff --git a/flang/test/Lower/ivdep.f90 b/flang/test/Lower/ivdep.f90
index 93bcdae..69f54cd 100644
--- a/flang/test/Lower/ivdep.f90
+++ b/flang/test/Lower/ivdep.f90
@@ -3,10 +3,12 @@
 ! CHECK: #access_group = #llvm.access_group<id = distinct[0]<>>
 ! CHECK: #access_group1 = #llvm.access_group<id = distinct[1]<>>
 ! CHECK: #access_group2 = #llvm.access_group<id = distinct[2]<>>
+! CHECK: #access_group3 = #llvm.access_group<id = distinct[3]<>>
 ! CHECK: #loop_vectorize = #llvm.loop_vectorize<disable = false>
-! CHECK: #loop_annotation = #llvm.loop_annotation<vectorize = #loop_vectorize, parallelAccesses = #access_group>
-! CHECK: #loop_annotation1 = #llvm.loop_annotation<vectorize = #loop_vectorize, parallelAccesses = #access_group1>
-! CHECK: #loop_annotation2 = #llvm.loop_annotation<vectorize = #loop_vectorize, parallelAccesses = #access_group2>
+! CHECK: #loop_annotation = #llvm.loop_annotation<parallelAccesses = #access_group>
+! CHECK: #loop_annotation1 = #llvm.loop_annotation<parallelAccesses = #access_group1>
+! CHECK: #loop_annotation2 = #llvm.loop_annotation<parallelAccesses = #access_group2>
+! CHECK: #loop_annotation3 = #llvm.loop_annotation<vectorize = #loop_vectorize, parallelAccesses = #access_group3>
 
 ! CHECK-LABEL: ivdep_test1
 subroutine ivdep_test1 
@@ -92,3 +94,13 @@
     end subroutine
 end subroutine ivdep_test3
 
+! CHECK-LABEL: ivdep_test4
+subroutine ivdep_test4
+  integer :: a(10)
+  !dir$ ivdep
+  !dir$ vector always
+  !CHECK: fir.do_loop {{.*}} attributes {loopAnnotation = #loop_annotation3}
+  do i=1,10
+     a(i)=i
+  end do
+end subroutine ivdep_test4