]> git.unchartedbackwaters.co.uk Git - francis/psl_presentation_20130225.git/commitdiff
Add generated code from the Scala OFC implementation.
authorFrancis Russell <francis@unchartedbackwaters.co.uk>
Fri, 15 Feb 2013 17:51:54 +0000 (17:51 +0000)
committerFrancis Russell <francis@unchartedbackwaters.co.uk>
Fri, 15 Feb 2013 17:58:56 +0000 (17:58 +0000)
code/integrals_kinetic_part_1.F90 [new file with mode: 0644]
code/integrals_kinetic_part_2.F90 [new file with mode: 0644]
presentation.tex

diff --git a/code/integrals_kinetic_part_1.F90 b/code/integrals_kinetic_part_1.F90
new file mode 100644 (file)
index 0000000..b1c1a97
--- /dev/null
@@ -0,0 +1,88 @@
+subroutine integrals_kinetic(kinet_1, bras_on_grid_1, bra_basis_1, kets_on_grid_1, ket_basis_1)
+  use basis, only: basis_copy_function_to_box, basis_ket_start_wrt_fftbox
+  use comms, only: pub_my_node_id
+  use fourier, only: fourier_apply_box_pair
+  use function_basis, only: FUNC_BASIS
+  use parallel_strategy, only: pub_first_atom_on_node
+  use simulation_cell, only: pub_fftbox, pub_cell
+  use sparse, only: sparse_atom_of_elem, sparse_index_length, SPAM3, sparse_put_element, sparse_generate_index, sparse_fi&
+  &rst_elem_on_node
+  
+  implicit none
+  complex(kind=DP), dimension(:,:,:), allocatable :: reciprocal_box_1
+  Complex(kind=DP), dimension(:,:,:), allocatable :: transformed_1
+  integer :: col_1
+  integer :: col_atom_1
+  integer :: fftbox_offset1_1
+  integer :: fftbox_offset1_2
+  integer :: fftbox_offset2_1
+  integer :: fftbox_offset2_2
+  integer :: fftbox_offset3_1
+  integer :: fftbox_offset3_2
+  integer :: freq_1_1
+  integer :: freq_2_1
+  integer :: freq_3_1
+  integer :: i1_1
+  integer :: i1_2
+  integer :: i1_3
+  integer :: i2_1
+  integer :: i2_2
+  integer :: i2_3
+  integer :: i3_1
+  integer :: i3_2
+  integer :: i3_3
+  integer :: local_col_atom_1
+  integer :: row_1
+  integer :: row_atom_1
+  integer :: row_idx_1
+  integer :: ternary_1
+  integer :: ternary_2
+  integer :: ternary_3
+  integer :: tightbox_origin1_1
+  integer :: tightbox_origin1_2
+  integer :: tightbox_origin2_1
+  integer :: tightbox_origin2_2
+  integer :: tightbox_origin3_1
+  integer :: tightbox_origin3_2
+  integer, dimension(:), allocatable :: sparse_idx_1
+  real(kind=DP) :: inner_product_result_1
+  real(kind=DP) :: reciprocal_vector1_1
+  real(kind=DP) :: reciprocal_vector2_1
+  real(kind=DP) :: reciprocal_vector3_1
+  real(kind=DP), dimension(:) :: bras_on_grid_1
+  real(kind=DP), dimension(:) :: kets_on_grid_1
+  real(kind=DP), dimension(:,:,:), allocatable :: dummybox_1
+  real(kind=DP), dimension(:,:,:), allocatable :: fftbox_1
+  real(kind=DP), dimension(:,:,:), allocatable :: fftbox_2
+  real(kind=DP), dimension(:,:,:), allocatable :: fftbox_3
+  real(kind=DP), dimension(:,:,:), allocatable :: scaled_1
+  type(FUNC_BASIS) :: bra_basis_1
+  type(FUNC_BASIS) :: ket_basis_1
+  type(SPAM3) :: kinet_1
+  
+  allocate(sparse_idx_1(sparse_index_length(kinet_1)))
+  call sparse_generate_index(sparse_idx_1, kinet_1)
+  do row_1 = sparse_first_elem_on_node(pub_my_node_id, kinet_1, 'R'), sparse_first_elem_on_node(pub_my_node_id + 1, kinet&
+  &_1, 'R') - 1
+    row_atom_1 = sparse_atom_of_elem(row_1, kinet_1, 'R')
+    do col_1 = sparse_first_elem_on_node(pub_my_node_id, kinet_1, 'C'), sparse_first_elem_on_node(pub_my_node_id + 1, kin&
+    &et_1, 'C') - 1
+      col_atom_1 = sparse_atom_of_elem(col_1, kinet_1, 'C')
+      local_col_atom_1 = col_atom_1 - pub_first_atom_on_node(pub_my_node_id) + 1
+      do row_idx_1 = sparse_idx_1(local_col_atom_1), sparse_idx_1(local_col_atom_1 + 1) - 1
+        if (sparse_idx_1(row_idx_1) .eq. row_atom_1) then
+          allocate(fftbox_3(pub_fftbox%total_pt1,pub_fftbox%total_pt2,pub_fftbox%total_pt3))
+          call basis_ket_start_wrt_fftbox(fftbox_offset1_2, fftbox_offset2_2, fftbox_offset3_2, pub_fftbox%total_pt1, pub&
+          &_fftbox%total_pt2, pub_fftbox%total_pt3)
+          call basis_copy_function_to_box(fftbox_3, pub_fftbox%total_pt1, pub_fftbox%total_pt2, pub_fftbox%total_pt3, fft&
+          &box_offset1_2, fftbox_offset2_2, fftbox_offset3_2, bra_basis_1%tight_boxes(row_1), bras_on_grid_1, bra_basis_1&
+          &%spheres(row_1))
+          tightbox_origin1_1 = (bra_basis_1%tight_boxes(row_1)%start_ppds1 - 1) * pub_cell%n_pt1 + bra_basis_1%tight_boxe&
+          &s(row_1)%start_pts1
+          tightbox_origin2_2 = (bra_basis_1%tight_boxes(row_1)%start_ppds2 - 1) * pub_cell%n_pt2 + bra_basis_1%tight_boxe&
+          &s(row_1)%start_pts2
+          tightbox_origin3_1 = (bra_basis_1%tight_boxes(row_1)%start_ppds3 - 1) * pub_cell%n_pt3 + bra_basis_1%tight_boxe&
+          &s(row_1)%start_pts3
+          allocate(fftbox_1(pub_fftbox%total_pt1,pub_fftbox%total_pt2,pub_fftbox%total_pt3))
+          call basis_ket_start_wrt_fftbox(fftbox_offset1_1, fftbox_offset2_1, fftbox_offset3_1, pub_fftbox%total_pt1, pub&
+          &_fftbox%total_pt2, pub_fftbox%total_pt3)
diff --git a/code/integrals_kinetic_part_2.F90 b/code/integrals_kinetic_part_2.F90
new file mode 100644 (file)
index 0000000..ec23681
--- /dev/null
@@ -0,0 +1,87 @@
+          call basis_copy_function_to_box(fftbox_1, pub_fftbox%total_pt1, pub_fftbox%total_pt2, pub_fftbox%total_pt3, fft&
+          &box_offset1_1, fftbox_offset2_1, fftbox_offset3_1, ket_basis_1%tight_boxes(col_1), kets_on_grid_1, ket_basis_1&
+          &%spheres(col_1))
+          tightbox_origin1_2 = (ket_basis_1%tight_boxes(col_1)%start_ppds1 - 1) * pub_cell%n_pt1 + ket_basis_1%tight_boxe&
+          &s(col_1)%start_pts1
+          tightbox_origin2_1 = (ket_basis_1%tight_boxes(col_1)%start_ppds2 - 1) * pub_cell%n_pt2 + ket_basis_1%tight_boxe&
+          &s(col_1)%start_pts2
+          tightbox_origin3_2 = (ket_basis_1%tight_boxes(col_1)%start_ppds3 - 1) * pub_cell%n_pt3 + ket_basis_1%tight_boxe&
+          &s(col_1)%start_pts3
+          allocate(reciprocal_box_1(pub_fftbox%total_pt1,pub_fftbox%total_pt2,pub_fftbox%total_pt3))
+          call fourier_apply_box_pair('C', 'F', fftbox_1, fftbox_1, reciprocal_box_1)
+          deallocate(fftbox_1)
+          allocate(transformed_1(pub_fftbox%total_pt1,pub_fftbox%total_pt2,pub_fftbox%total_pt3))
+          do i3_1 = 1, pub_fftbox%total_pt3
+            if (i3_1 .gt. pub_fftbox%total_pt3 / 2 + 1) then
+              ternary_1 = i3_1 - pub_fftbox%total_pt3 - 1
+            else
+              ternary_1 = i3_1 - 1
+            endif
+            freq_3_1 = ternary_1
+            do i2_3 = 1, pub_fftbox%total_pt2
+              if (i2_3 .gt. pub_fftbox%total_pt2 / 2 + 1) then
+                ternary_2 = i2_3 - pub_fftbox%total_pt2 - 1
+              else
+                ternary_2 = i2_3 - 1
+              endif
+              freq_2_1 = ternary_2
+              do i1_3 = 1, pub_fftbox%total_pt1
+                if (i1_3 .gt. pub_fftbox%total_pt1 / 2 + 1) then
+                  ternary_3 = i1_3 - pub_fftbox%total_pt1 - 1
+                else
+                  ternary_3 = i1_3 - 1
+                endif
+                freq_1_1 = ternary_3
+                reciprocal_vector1_1 = 0.0 + pub_fftbox%b1%X * real(freq_1_1) + pub_fftbox%b2%X * real(freq_2_1) + pub_ff&
+                &tbox%b3%X * real(freq_3_1)
+                reciprocal_vector2_1 = 0.0 + pub_fftbox%b1%Y * real(freq_1_1) + pub_fftbox%b2%Y * real(freq_2_1) + pub_ff&
+                &tbox%b3%Y * real(freq_3_1)
+                reciprocal_vector3_1 = 0.0 + pub_fftbox%b1%Z * real(freq_1_1) + pub_fftbox%b2%Z * real(freq_2_1) + pub_ff&
+                &tbox%b3%Z * real(freq_3_1)
+                transformed_1(i1_3, i2_3, i3_1) = reciprocal_box_1(i1_3, i2_3, i3_1) * cmplx((0.0 + reciprocal_vector1_1 &
+                &* reciprocal_vector1_1 + reciprocal_vector2_1 * reciprocal_vector2_1 + reciprocal_vector3_1 * reciprocal&
+                &_vector3_1) * (-1.0))
+              end do
+            end do
+          end do
+          deallocate(reciprocal_box_1)
+          allocate(fftbox_2(pub_fftbox%total_pt1,pub_fftbox%total_pt2,pub_fftbox%total_pt3))
+          allocate(dummybox_1(pub_fftbox%total_pt1,pub_fftbox%total_pt2,pub_fftbox%total_pt3))
+          call fourier_apply_box_pair('C', 'B', fftbox_2, dummybox_1, transformed_1)
+          deallocate(transformed_1)
+          allocate(scaled_1(pub_fftbox%total_pt1,pub_fftbox%total_pt2,pub_fftbox%total_pt3))
+          do i3_3 = 1, pub_fftbox%total_pt3
+            do i2_2 = 1, pub_fftbox%total_pt2
+              do i1_2 = 1, pub_fftbox%total_pt1
+                scaled_1(i1_2, i2_2, i3_3) = fftbox_2(i1_2, i2_2, i3_3) * (-0.5)
+              end do
+            end do
+          end do
+          deallocate(fftbox_2)
+          deallocate(dummybox_1)
+          inner_product_result_1 = 0.0
+          do i3_2 = max(tightbox_origin3_1 - fftbox_offset3_2, tightbox_origin3_2 - fftbox_offset3_1), min(tightbox_origi&
+          &n3_1 - fftbox_offset3_2 + pub_fftbox%total_pt3, tightbox_origin3_2 - fftbox_offset3_1 + pub_fftbox%total_pt3) &
+          &- 1
+            do i2_1 = max(tightbox_origin2_2 - fftbox_offset2_2, tightbox_origin2_1 - fftbox_offset2_1), min(tightbox_ori&
+            &gin2_2 - fftbox_offset2_2 + pub_fftbox%total_pt2, tightbox_origin2_1 - fftbox_offset2_1 + pub_fftbox%total_p&
+            &t2) - 1
+              do i1_1 = max(tightbox_origin1_1 - fftbox_offset1_2, tightbox_origin1_2 - fftbox_offset1_1), min(tightbox_o&
+              &rigin1_1 - fftbox_offset1_2 + pub_fftbox%total_pt1, tightbox_origin1_2 - fftbox_offset1_1 + pub_fftbox%tot&
+              &al_pt1) - 1
+                inner_product_result_1 = inner_product_result_1 + fftbox_3(i1_1 - (tightbox_origin1_1 - fftbox_offset1_2)&
+                & + 1, i2_1 - (tightbox_origin2_2 - fftbox_offset2_2) + 1, i3_2 - (tightbox_origin3_1 - fftbox_offset3_2)&
+                & + 1) * scaled_1(i1_1 - (tightbox_origin1_2 - fftbox_offset1_1) + 1, i2_1 - (tightbox_origin2_1 - fftbox&
+                &_offset2_1) + 1, i3_2 - (tightbox_origin3_2 - fftbox_offset3_1) + 1) * pub_cell%weight
+              end do
+            end do
+          end do
+          deallocate(fftbox_3)
+          deallocate(scaled_1)
+          call sparse_put_element(inner_product_result_1, kinet_1, row_1, col_1)
+        endif
+      end do
+    end do
+  end do
+  deallocate(sparse_idx_1)
+end subroutine
index 17d3492a33f69fbd2e6b7e0e10d7dcd9b36fcd1a..ddd14814f6c2cd1d40c2b7bf3495456b97a77df2 100644 (file)
@@ -257,7 +257,7 @@ Permutation & $T_{perm} = (I \succ I') \cup (I' \succ I)$ \\
 \frametitle{Our initial implementation}
 
 We developed an code generator in Scala that attempted to reason
-abstractly about the indices involved in the computation.
+about the indices involved in the computation.
 
 \vspace{0.5em}
 \centering
@@ -269,6 +269,54 @@ abstractly about the indices involved in the computation.
 
 }
 
+\frame{
+
+\frametitle{Our initial implementation}
+
+The system was designed to reason about the relationships between indices, and
+generate code subject to the various constraints.
+
+\begin{itemize}
+
+\item Nodes that could be applied point-wise did not affect any of their indices.
+
+\item Nodes that constructed entirely new fields (e.g. FFTs) removed indices of
+  the operands they used and exposed new indices.
+
+\item Nodes could be transparent to indices they did not act on, so an FFT node
+  acting on a set of functions would represent the FFT of all of them.
+
+\item Nodes could require random access to certain indices, forcing the system
+  to introduce temporary storage.
+
+\item The system attempted to synthesise a loop nesting that respected all
+  dependencies whilst minimizing the amout of intermediate storage required to
+  pass data between each node in the graph.
+
+\end{itemize}
+
+}
+
+\frame{
+
+\frametitle{Generated code for ONETEP's kinetic energy integral}
+
+\resizebox{0.48\textwidth}{!}{
+\begin{minipage}{2.5\textwidth}
+\begin{alltt}
+\input{code/integrals_kinetic_part_1.F90}
+\end{alltt}
+\end{minipage}
+}
+\resizebox{0.48\textwidth}{!}{
+\begin{minipage}{2.5\textwidth}
+\begin{alltt}
+\input{code/integrals_kinetic_part_2.F90}
+\end{alltt}
+\end{minipage}
+}
+
+}
 \end{document}