--- /dev/null
+subroutine integrals_kinetic(kinet_1, bras_on_grid_1, bra_basis_1, kets_on_grid_1, ket_basis_1)
+ use basis, only: basis_copy_function_to_box, basis_ket_start_wrt_fftbox
+ use comms, only: pub_my_node_id
+ use fourier, only: fourier_apply_box_pair
+ use function_basis, only: FUNC_BASIS
+ use parallel_strategy, only: pub_first_atom_on_node
+ use simulation_cell, only: pub_fftbox, pub_cell
+ use sparse, only: sparse_atom_of_elem, sparse_index_length, SPAM3, sparse_put_element, sparse_generate_index, sparse_fi&
+ &rst_elem_on_node
+
+ implicit none
+ complex(kind=DP), dimension(:,:,:), allocatable :: reciprocal_box_1
+ Complex(kind=DP), dimension(:,:,:), allocatable :: transformed_1
+ integer :: col_1
+ integer :: col_atom_1
+ integer :: fftbox_offset1_1
+ integer :: fftbox_offset1_2
+ integer :: fftbox_offset2_1
+ integer :: fftbox_offset2_2
+ integer :: fftbox_offset3_1
+ integer :: fftbox_offset3_2
+ integer :: freq_1_1
+ integer :: freq_2_1
+ integer :: freq_3_1
+ integer :: i1_1
+ integer :: i1_2
+ integer :: i1_3
+ integer :: i2_1
+ integer :: i2_2
+ integer :: i2_3
+ integer :: i3_1
+ integer :: i3_2
+ integer :: i3_3
+ integer :: local_col_atom_1
+ integer :: row_1
+ integer :: row_atom_1
+ integer :: row_idx_1
+ integer :: ternary_1
+ integer :: ternary_2
+ integer :: ternary_3
+ integer :: tightbox_origin1_1
+ integer :: tightbox_origin1_2
+ integer :: tightbox_origin2_1
+ integer :: tightbox_origin2_2
+ integer :: tightbox_origin3_1
+ integer :: tightbox_origin3_2
+ integer, dimension(:), allocatable :: sparse_idx_1
+ real(kind=DP) :: inner_product_result_1
+ real(kind=DP) :: reciprocal_vector1_1
+ real(kind=DP) :: reciprocal_vector2_1
+ real(kind=DP) :: reciprocal_vector3_1
+ real(kind=DP), dimension(:) :: bras_on_grid_1
+ real(kind=DP), dimension(:) :: kets_on_grid_1
+ real(kind=DP), dimension(:,:,:), allocatable :: dummybox_1
+ real(kind=DP), dimension(:,:,:), allocatable :: fftbox_1
+ real(kind=DP), dimension(:,:,:), allocatable :: fftbox_2
+ real(kind=DP), dimension(:,:,:), allocatable :: fftbox_3
+ real(kind=DP), dimension(:,:,:), allocatable :: scaled_1
+ type(FUNC_BASIS) :: bra_basis_1
+ type(FUNC_BASIS) :: ket_basis_1
+ type(SPAM3) :: kinet_1
+
+ allocate(sparse_idx_1(sparse_index_length(kinet_1)))
+ call sparse_generate_index(sparse_idx_1, kinet_1)
+ do row_1 = sparse_first_elem_on_node(pub_my_node_id, kinet_1, 'R'), sparse_first_elem_on_node(pub_my_node_id + 1, kinet&
+ &_1, 'R') - 1
+ row_atom_1 = sparse_atom_of_elem(row_1, kinet_1, 'R')
+ do col_1 = sparse_first_elem_on_node(pub_my_node_id, kinet_1, 'C'), sparse_first_elem_on_node(pub_my_node_id + 1, kin&
+ &et_1, 'C') - 1
+ col_atom_1 = sparse_atom_of_elem(col_1, kinet_1, 'C')
+ local_col_atom_1 = col_atom_1 - pub_first_atom_on_node(pub_my_node_id) + 1
+ do row_idx_1 = sparse_idx_1(local_col_atom_1), sparse_idx_1(local_col_atom_1 + 1) - 1
+ if (sparse_idx_1(row_idx_1) .eq. row_atom_1) then
+ allocate(fftbox_3(pub_fftbox%total_pt1,pub_fftbox%total_pt2,pub_fftbox%total_pt3))
+ call basis_ket_start_wrt_fftbox(fftbox_offset1_2, fftbox_offset2_2, fftbox_offset3_2, pub_fftbox%total_pt1, pub&
+ &_fftbox%total_pt2, pub_fftbox%total_pt3)
+ call basis_copy_function_to_box(fftbox_3, pub_fftbox%total_pt1, pub_fftbox%total_pt2, pub_fftbox%total_pt3, fft&
+ &box_offset1_2, fftbox_offset2_2, fftbox_offset3_2, bra_basis_1%tight_boxes(row_1), bras_on_grid_1, bra_basis_1&
+ &%spheres(row_1))
+ tightbox_origin1_1 = (bra_basis_1%tight_boxes(row_1)%start_ppds1 - 1) * pub_cell%n_pt1 + bra_basis_1%tight_boxe&
+ &s(row_1)%start_pts1
+ tightbox_origin2_2 = (bra_basis_1%tight_boxes(row_1)%start_ppds2 - 1) * pub_cell%n_pt2 + bra_basis_1%tight_boxe&
+ &s(row_1)%start_pts2
+ tightbox_origin3_1 = (bra_basis_1%tight_boxes(row_1)%start_ppds3 - 1) * pub_cell%n_pt3 + bra_basis_1%tight_boxe&
+ &s(row_1)%start_pts3
+ allocate(fftbox_1(pub_fftbox%total_pt1,pub_fftbox%total_pt2,pub_fftbox%total_pt3))
+ call basis_ket_start_wrt_fftbox(fftbox_offset1_1, fftbox_offset2_1, fftbox_offset3_1, pub_fftbox%total_pt1, pub&
+ &_fftbox%total_pt2, pub_fftbox%total_pt3)
--- /dev/null
+ call basis_copy_function_to_box(fftbox_1, pub_fftbox%total_pt1, pub_fftbox%total_pt2, pub_fftbox%total_pt3, fft&
+ &box_offset1_1, fftbox_offset2_1, fftbox_offset3_1, ket_basis_1%tight_boxes(col_1), kets_on_grid_1, ket_basis_1&
+ &%spheres(col_1))
+ tightbox_origin1_2 = (ket_basis_1%tight_boxes(col_1)%start_ppds1 - 1) * pub_cell%n_pt1 + ket_basis_1%tight_boxe&
+ &s(col_1)%start_pts1
+ tightbox_origin2_1 = (ket_basis_1%tight_boxes(col_1)%start_ppds2 - 1) * pub_cell%n_pt2 + ket_basis_1%tight_boxe&
+ &s(col_1)%start_pts2
+ tightbox_origin3_2 = (ket_basis_1%tight_boxes(col_1)%start_ppds3 - 1) * pub_cell%n_pt3 + ket_basis_1%tight_boxe&
+ &s(col_1)%start_pts3
+ allocate(reciprocal_box_1(pub_fftbox%total_pt1,pub_fftbox%total_pt2,pub_fftbox%total_pt3))
+ call fourier_apply_box_pair('C', 'F', fftbox_1, fftbox_1, reciprocal_box_1)
+ deallocate(fftbox_1)
+ allocate(transformed_1(pub_fftbox%total_pt1,pub_fftbox%total_pt2,pub_fftbox%total_pt3))
+ do i3_1 = 1, pub_fftbox%total_pt3
+ if (i3_1 .gt. pub_fftbox%total_pt3 / 2 + 1) then
+ ternary_1 = i3_1 - pub_fftbox%total_pt3 - 1
+ else
+ ternary_1 = i3_1 - 1
+ endif
+ freq_3_1 = ternary_1
+ do i2_3 = 1, pub_fftbox%total_pt2
+ if (i2_3 .gt. pub_fftbox%total_pt2 / 2 + 1) then
+ ternary_2 = i2_3 - pub_fftbox%total_pt2 - 1
+ else
+ ternary_2 = i2_3 - 1
+ endif
+ freq_2_1 = ternary_2
+ do i1_3 = 1, pub_fftbox%total_pt1
+ if (i1_3 .gt. pub_fftbox%total_pt1 / 2 + 1) then
+ ternary_3 = i1_3 - pub_fftbox%total_pt1 - 1
+ else
+ ternary_3 = i1_3 - 1
+ endif
+ freq_1_1 = ternary_3
+ reciprocal_vector1_1 = 0.0 + pub_fftbox%b1%X * real(freq_1_1) + pub_fftbox%b2%X * real(freq_2_1) + pub_ff&
+ &tbox%b3%X * real(freq_3_1)
+ reciprocal_vector2_1 = 0.0 + pub_fftbox%b1%Y * real(freq_1_1) + pub_fftbox%b2%Y * real(freq_2_1) + pub_ff&
+ &tbox%b3%Y * real(freq_3_1)
+ reciprocal_vector3_1 = 0.0 + pub_fftbox%b1%Z * real(freq_1_1) + pub_fftbox%b2%Z * real(freq_2_1) + pub_ff&
+ &tbox%b3%Z * real(freq_3_1)
+ transformed_1(i1_3, i2_3, i3_1) = reciprocal_box_1(i1_3, i2_3, i3_1) * cmplx((0.0 + reciprocal_vector1_1 &
+ &* reciprocal_vector1_1 + reciprocal_vector2_1 * reciprocal_vector2_1 + reciprocal_vector3_1 * reciprocal&
+ &_vector3_1) * (-1.0))
+ end do
+ end do
+ end do
+ deallocate(reciprocal_box_1)
+ allocate(fftbox_2(pub_fftbox%total_pt1,pub_fftbox%total_pt2,pub_fftbox%total_pt3))
+ allocate(dummybox_1(pub_fftbox%total_pt1,pub_fftbox%total_pt2,pub_fftbox%total_pt3))
+ call fourier_apply_box_pair('C', 'B', fftbox_2, dummybox_1, transformed_1)
+ deallocate(transformed_1)
+ allocate(scaled_1(pub_fftbox%total_pt1,pub_fftbox%total_pt2,pub_fftbox%total_pt3))
+ do i3_3 = 1, pub_fftbox%total_pt3
+ do i2_2 = 1, pub_fftbox%total_pt2
+ do i1_2 = 1, pub_fftbox%total_pt1
+ scaled_1(i1_2, i2_2, i3_3) = fftbox_2(i1_2, i2_2, i3_3) * (-0.5)
+ end do
+ end do
+ end do
+ deallocate(fftbox_2)
+ deallocate(dummybox_1)
+ inner_product_result_1 = 0.0
+ do i3_2 = max(tightbox_origin3_1 - fftbox_offset3_2, tightbox_origin3_2 - fftbox_offset3_1), min(tightbox_origi&
+ &n3_1 - fftbox_offset3_2 + pub_fftbox%total_pt3, tightbox_origin3_2 - fftbox_offset3_1 + pub_fftbox%total_pt3) &
+ &- 1
+ do i2_1 = max(tightbox_origin2_2 - fftbox_offset2_2, tightbox_origin2_1 - fftbox_offset2_1), min(tightbox_ori&
+ &gin2_2 - fftbox_offset2_2 + pub_fftbox%total_pt2, tightbox_origin2_1 - fftbox_offset2_1 + pub_fftbox%total_p&
+ &t2) - 1
+ do i1_1 = max(tightbox_origin1_1 - fftbox_offset1_2, tightbox_origin1_2 - fftbox_offset1_1), min(tightbox_o&
+ &rigin1_1 - fftbox_offset1_2 + pub_fftbox%total_pt1, tightbox_origin1_2 - fftbox_offset1_1 + pub_fftbox%tot&
+ &al_pt1) - 1
+ inner_product_result_1 = inner_product_result_1 + fftbox_3(i1_1 - (tightbox_origin1_1 - fftbox_offset1_2)&
+ & + 1, i2_1 - (tightbox_origin2_2 - fftbox_offset2_2) + 1, i3_2 - (tightbox_origin3_1 - fftbox_offset3_2)&
+ & + 1) * scaled_1(i1_1 - (tightbox_origin1_2 - fftbox_offset1_1) + 1, i2_1 - (tightbox_origin2_1 - fftbox&
+ &_offset2_1) + 1, i3_2 - (tightbox_origin3_2 - fftbox_offset3_1) + 1) * pub_cell%weight
+ end do
+ end do
+ end do
+ deallocate(fftbox_3)
+ deallocate(scaled_1)
+ call sparse_put_element(inner_product_result_1, kinet_1, row_1, col_1)
+ endif
+ end do
+ end do
+ end do
+ deallocate(sparse_idx_1)
+end subroutine
\frametitle{Our initial implementation}
We developed an code generator in Scala that attempted to reason
-abstractly about the indices involved in the computation.
+about the indices involved in the computation.
\vspace{0.5em}
\centering
}
+\frame{
+
+\frametitle{Our initial implementation}
+
+The system was designed to reason about the relationships between indices, and
+generate code subject to the various constraints.
+
+\begin{itemize}
+
+\item Nodes that could be applied point-wise did not affect any of their indices.
+
+\item Nodes that constructed entirely new fields (e.g. FFTs) removed indices of
+ the operands they used and exposed new indices.
+
+\item Nodes could be transparent to indices they did not act on, so an FFT node
+ acting on a set of functions would represent the FFT of all of them.
+
+\item Nodes could require random access to certain indices, forcing the system
+ to introduce temporary storage.
+
+\item The system attempted to synthesise a loop nesting that respected all
+ dependencies whilst minimizing the amout of intermediate storage required to
+ pass data between each node in the graph.
+
+\end{itemize}
+
+}
+
+\frame{
+
+\frametitle{Generated code for ONETEP's kinetic energy integral}
+
+\resizebox{0.48\textwidth}{!}{
+\begin{minipage}{2.5\textwidth}
+\begin{alltt}
+\input{code/integrals_kinetic_part_1.F90}
+\end{alltt}
+\end{minipage}
+}
+\resizebox{0.48\textwidth}{!}{
+\begin{minipage}{2.5\textwidth}
+\begin{alltt}
+\input{code/integrals_kinetic_part_2.F90}
+\end{alltt}
+\end{minipage}
+}
+
+}
\end{document}