From ff39ef8cd8fd42ce1ba54d95875d819bc3636182 Mon Sep 17 00:00:00 2001 From: Francis Russell Date: Tue, 11 Jun 2013 08:59:59 +0100 Subject: [PATCH] More work on slides. --- .gitignore | 3 + Makefile | 21 +- images-gnuplot/midpoints.gpi | 11 + .../resampling-standalone-fftw-filtered.dat | 23 + .../resampling-standalone-fftw-filtered.gpi | 17 + images-gnuplot/resampling-standalone-fftw.dat | 49 ++ images-gnuplot/resampling-standalone-fftw.gpi | 14 + images-svg/complex-formats.svg | 251 ++++++ images-svg/interpolation-onetep.svg | 213 ++--- images-svg/interpolation-phase-shift-1d.svg | 699 ++++++++++++++++ images-svg/per-dimension-interpolation.svg | 757 ++++++++++++++++++ onetep-phase-shift-fftw.tex | 76 ++ presentation.tex | 171 +++- 13 files changed, 2197 insertions(+), 108 deletions(-) create mode 100644 images-gnuplot/midpoints.gpi create mode 100644 images-gnuplot/resampling-standalone-fftw-filtered.dat create mode 100644 images-gnuplot/resampling-standalone-fftw-filtered.gpi create mode 100644 images-gnuplot/resampling-standalone-fftw.dat create mode 100644 images-gnuplot/resampling-standalone-fftw.gpi create mode 100644 images-svg/complex-formats.svg create mode 100644 images-svg/interpolation-phase-shift-1d.svg create mode 100644 images-svg/per-dimension-interpolation.svg create mode 100644 onetep-phase-shift-fftw.tex diff --git a/.gitignore b/.gitignore index 8138dbc..032d982 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,6 @@ /code/*.tex /images-dot/*.tex /images-svg/*.pdf +/images-gnuplot/*.pdf +/images-gnuplot/*.tex +/images-gnuplot/build_stamp diff --git a/Makefile b/Makefile index 4fbd70a..958b0aa 100644 --- a/Makefile +++ b/Makefile @@ -13,9 +13,17 @@ CODE_TEX_FILES=$(CODE_F90_TEX_FILES) $(CODE_UFL_TEX_FILES) DOT_FOLDER=images-dot DOT_TEX_FILES=${patsubst %.dot,%.tex,$(wildcard $(DOT_FOLDER)/*.dot)} +GNUPLOT_FOLDER=images-gnuplot/ +GNUPLOT_FOLDER_GENERATOR_FILES=$(wildcard $(GNUPLOT_FOLDER)/*.gpi) $(wildcard $(GNUPLOT_FOLDER)/*.dat) +GNUPLOT_FOLDER_BUILD_STAMP=$(GNUPLOT_FOLDER)/build_stamp PDFLATEX_OUTPUT_LOG=pdflatex_output.log +REWRITE_GNUPLOT_EPSLATEX=\ +for epsfile in *.eps; do epstopdf $${epsfile} && rm $${epsfile}; done &&\ +ESCAPED_BUILD_DIR=$$(echo $(1) | sed -r 's/(\/)/\\\1/g') &&\ +for texfile in *.tex; do sed -i -r "s/includegraphics\{([^\/]*)\}/includegraphics\{$${ESCAPED_BUILD_DIR}\/\1\}/g" $${texfile}; done + all: presentation.pdf display: presentation.pdf @@ -29,7 +37,7 @@ display-4up: presentation-4up.pdf presentation-4up.pdf: presentation.pdf pdfnup --nup 2x2 --a4paper --scale 0.95 --frame true presentation.pdf -o presentation-4up.pdf -presentation.pdf: $(IMAGE_FILES) $(SVG_OUTPUTS) $(CODE_TEX_FILES) $(CODE_RAW_FILES) $(DOT_TEX_FILES) $(wildcard *.tex *.sty) pygments.sty +presentation.pdf: $(IMAGE_FILES) $(SVG_OUTPUTS) $(CODE_TEX_FILES) $(CODE_RAW_FILES) $(DOT_TEX_FILES) $(GNUPLOT_FOLDER_BUILD_STAMP) $(wildcard *.tex *.sty) pygments.sty pdflatex -draftmode presentation &&\ while(pdflatex presentation | tee $(PDFLATEX_OUTPUT_LOG) && grep "Rerun to get cross-references right" $(PDFLATEX_OUTPUT_LOG)); do true; done &&\ rm -f $(PDFLATEX_OUTPUT_LOG) @@ -41,10 +49,13 @@ clean: $(BASIS_FUNCTIONS_BUILD_STAMP) \ $(CODE_TEX_FILES) \ $(DOT_FOLDER)/*.tex \ + $(GNUPLOT_FOLDER_BUILD_STAMP) \ + $(GNUPLOT_FOLDER)/*.pdf \ + $(GNUPLOT_FOLDER)/*.tex \ pygments.sty upload: presentation.pdf - rsync -C --progress presentation.pdf shell3.doc.ic.ac.uk:~/public_html/psl_presentation_2013.pdf + rsync -C --progress presentation.pdf shell3.doc.ic.ac.uk:~/public_html/psl_presentation_201306.pdf %.pdf: %.svg inkscape -D -A $@ $< @@ -61,4 +72,10 @@ pygments.sty: %.tex: %.dot dot2tex --codeonly --usepdflatex -f tikz $< -o $@ +$(GNUPLOT_FOLDER_BUILD_STAMP): $(GNUPLOT_FOLDER_GENERATOR_FILES) + cd $(GNUPLOT_FOLDER) &&\ + for gpifile in *.gpi; do gnuplot $${gpifile}; done &&\ + ${call REWRITE_GNUPLOT_EPSLATEX, $(GNUPLOT_FOLDER)} + touch $@ + .PHONY: all display clean upload 4up display-4up diff --git a/images-gnuplot/midpoints.gpi b/images-gnuplot/midpoints.gpi new file mode 100644 index 0000000..0643caa --- /dev/null +++ b/images-gnuplot/midpoints.gpi @@ -0,0 +1,11 @@ +set terminal epslatex color 10 +set output "midpoints.eps" +set style line 1 lc rgb '#0060ad' lw 2 pt 7 pi -1 ps 1.5 +set xzeroaxis +unset xtics +unset ytics +set nokey +set xrange [0:2*pi] +set multiplot +plot sin(2*x) + sin(4*x) + cos(3*x) with linespoints lc rgb '#ff0000' pi 8 linewidth 2 pointtype 7 pointsize 2.0 pi 4 +plot sin(2*x) + sin(4*x) + cos(3*x) with linespoints lc rgb '#0060ad' pi 8 linewidth 2 pointtype 7 pointsize 2.0 pi 8 diff --git a/images-gnuplot/resampling-standalone-fftw-filtered.dat b/images-gnuplot/resampling-standalone-fftw-filtered.dat new file mode 100644 index 0000000..003a5a9 --- /dev/null +++ b/images-gnuplot/resampling-standalone-fftw-filtered.dat @@ -0,0 +1,23 @@ +#size naive padding-aware phase-shift +5 0.000019 0.000015 0.000016 +7 0.000025 0.000019 0.000018 +9 0.000098 0.000067 0.000037 +11 0.000199 0.000134 0.000074 +13 0.000315 0.000220 0.000123 +15 0.000445 0.000300 0.000163 +21 0.001396 0.000982 0.000669 +25 0.002426 0.001687 0.001142 +27 0.003370 0.002590 0.001565 +33 0.007219 0.005465 0.003311 +35 0.008007 0.006102 0.003976 +39 0.013129 0.009861 0.005793 +45 0.020602 0.015612 0.008428 +49 0.027783 0.021392 0.012545 +55 0.039221 0.030916 0.017545 +63 0.062115 0.048851 0.028067 +65 0.066311 0.052534 0.031252 +75 0.098265 0.078223 0.048038 +77 0.130923 0.100775 0.059835 +81 0.151019 0.114575 0.069888 +91 0.233345 0.166318 0.098290 +99 0.318596 0.221242 0.132848 diff --git a/images-gnuplot/resampling-standalone-fftw-filtered.gpi b/images-gnuplot/resampling-standalone-fftw-filtered.gpi new file mode 100644 index 0000000..ec05755 --- /dev/null +++ b/images-gnuplot/resampling-standalone-fftw-filtered.gpi @@ -0,0 +1,17 @@ +set terminal epslatex color + +set xlabel "Problem Size" +set ylabel "Time (seconds)" +set key left top box +set grid layerdefault linetype -1 linecolor rgb "gray" linewidth 0.2 +set ytics nomirror +set xtics nomirror +set xrange [0:*] +set yrange [0:*] +set output "resampling-standalone-fftw-filtered.eps" + +f(x) = int(x) % 10 == 0 + +plot "resampling-standalone-fftw-filtered.dat" using 1:2 ti "naïve" with linespoints,\ + "resampling-standalone-fftw-filtered.dat" using 1:3 ti "padding-aware (ONETEP)" with linespoints,\ + "resampling-standalone-fftw-filtered.dat" using 1:4 ti "phase-shift" with linespoints diff --git a/images-gnuplot/resampling-standalone-fftw.dat b/images-gnuplot/resampling-standalone-fftw.dat new file mode 100644 index 0000000..f9b910d --- /dev/null +++ b/images-gnuplot/resampling-standalone-fftw.dat @@ -0,0 +1,49 @@ +#size naive padding-aware phase-shift +5 0.000019 0.000015 0.000016 +7 0.000025 0.000019 0.000018 +9 0.000098 0.000067 0.000037 +11 0.000199 0.000134 0.000074 +13 0.000315 0.000220 0.000123 +15 0.000445 0.000300 0.000163 +17 0.001370 0.000924 0.000612 +19 0.002010 0.001349 0.000908 +21 0.001396 0.000982 0.000669 +23 0.004080 0.002738 0.001901 +25 0.002426 0.001687 0.001142 +27 0.003370 0.002590 0.001565 +29 0.009758 0.006611 0.004529 +31 0.012635 0.008584 0.005890 +33 0.007219 0.005465 0.003311 +35 0.008007 0.006102 0.003976 +37 0.024749 0.017108 0.011843 +39 0.013129 0.009861 0.005793 +41 0.037512 0.026032 0.017544 +43 0.045216 0.031266 0.021066 +45 0.020602 0.015612 0.008428 +47 0.062967 0.043421 0.029291 +49 0.027783 0.021392 0.012545 +51 0.050410 0.036424 0.023319 +53 0.099004 0.067826 0.046351 +55 0.039221 0.030916 0.017545 +57 0.073912 0.052673 0.034185 +59 0.147541 0.100467 0.070029 +61 0.166863 0.113820 0.079292 +63 0.062115 0.048851 0.028067 +65 0.066311 0.052534 0.031252 +67 0.241163 0.161867 0.116694 +69 0.145022 0.102993 0.072324 +71 0.285200 0.195218 0.145291 +73 0.332839 0.223679 0.170274 +75 0.098265 0.078223 0.048038 +77 0.130923 0.100775 0.059835 +79 0.396457 0.260195 0.189941 +81 0.151019 0.114575 0.069888 +83 0.507240 0.349279 0.253765 +85 0.242405 0.173575 0.117382 +87 0.331778 0.231736 0.161169 +89 0.592494 0.400258 0.296694 +91 0.233345 0.166318 0.098290 +93 0.428193 0.291792 0.202346 +95 0.348777 0.257993 0.173070 +97 0.725529 0.488956 0.359322 +99 0.318596 0.221242 0.132848 diff --git a/images-gnuplot/resampling-standalone-fftw.gpi b/images-gnuplot/resampling-standalone-fftw.gpi new file mode 100644 index 0000000..e180f71 --- /dev/null +++ b/images-gnuplot/resampling-standalone-fftw.gpi @@ -0,0 +1,14 @@ +set terminal epslatex color + +set xlabel "Problem Size" +set ylabel "Time (seconds)" +set key left top box +set grid layerdefault linetype -1 linecolor rgb "gray" linewidth 0.2 +set ytics nomirror +set xtics nomirror +set xrange [0:*] +set yrange [0:0.8] +set output "resampling-standalone-fftw.eps" +plot "resampling-standalone-fftw.dat" using 1:2 ti "naïve" with linespoints,\ + "resampling-standalone-fftw.dat" using 1:3 ti "padding-aware (ONETEP)" with linespoints,\ + "resampling-standalone-fftw.dat" using 1:4 ti "phase-shift" with linespoints diff --git a/images-svg/complex-formats.svg b/images-svg/complex-formats.svg new file mode 100644 index 0000000..9fcde29 --- /dev/null +++ b/images-svg/complex-formats.svg @@ -0,0 +1,251 @@ + + + + + + + + + + + + image/svg+xml + + + + + + + + + imag0 + + real0 + + + + imag1 + + real1 + + + + real1 + + real0 + + + + imag1 + + imag0 + + Split + Interleaved + + diff --git a/images-svg/interpolation-onetep.svg b/images-svg/interpolation-onetep.svg index cd5c3c4..a9e8d67 100644 --- a/images-svg/interpolation-onetep.svg +++ b/images-svg/interpolation-onetep.svg @@ -466,8 +466,15 @@ inkscape:vp_x="-179.45222 : 100.46105 : 0" inkscape:vp_y="0 : 500 : 0" inkscape:vp_z="178.10571 : 102.82938 : 0" - inkscape:persp3d-origin="109.3618 : 277.98199 : 1" + inkscape:persp3d-origin="-20.493673 : 297.16038 : 1" id="perspective3904-4-6-4-4-9" /> + @@ -505,7 +512,7 @@ image/svg+xml - + @@ -513,6 +520,65 @@ inkscape:label="Layer 1" inkscape:groupmode="layer" id="layer1"> + + + + + + + + + + + @@ -1406,119 +1472,56 @@ inkscape:connector-curvature="0" /> + + + + + + id="g3102-8-5" + transform="translate(-273.5,208.00001)"> - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + Input Data + FrequencyRepresentation + + + + + + + + + + + PrecomputedRotation Values + + 1D FFT + Element-wise Multiplication + + + x + + + + + + + + + + + + + Shifted Frequency Representation + + + + + + + + + + + Midpoint Values + + Interleave + + + + + + + + + + + + + + + + + + + + + + Interpolated Data + + 1D Inverse-FFT + + diff --git a/images-svg/per-dimension-interpolation.svg b/images-svg/per-dimension-interpolation.svg new file mode 100644 index 0000000..b7eba34 --- /dev/null +++ b/images-svg/per-dimension-interpolation.svg @@ -0,0 +1,757 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Interpolatein Z + + Interpolatein Y + + Interpolatein Y + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Interpolatein X + + + Interpolatein X + + Interpolatein X + + Interpolatein X + InputData + + diff --git a/onetep-phase-shift-fftw.tex b/onetep-phase-shift-fftw.tex new file mode 100644 index 0000000..782cf42 --- /dev/null +++ b/onetep-phase-shift-fftw.tex @@ -0,0 +1,76 @@ +\newcommand{\oneteproutinesphaseshiftfftw}{ +\begin{tabularx}{\textwidth}{cXXX} +\hline +\bf{Test} & \bf{Original (s)} & \bf{Modified (s)} & \bf{Reduction} \\ +\hline +test01& 49.47& 35.27& 28.70\%\\ +test02& 61.51& 44.56& 27.56\%\\ +test03& 40.67& 29.66& 27.07\%\\ +test04& 90.99& 61.51& 32.40\%\\ +test05& 47.78& 34.46& 27.88\%\\ +test06& 3.73& 2.73& 26.81\%\\ +test07& 8.72& 6.46& 25.92\%\\ +test08& 93.33& 66.83& 28.39\%\\ +test09& 3.47& 2.35& 32.28\%\\ +test10& 361.81& 265.67& 26.57\%\\ +test11& 33.27& 23.04& 30.75\%\\ +test12& 57.65& 40.19& 30.29\%\\ +test13& 17.37& 12.30& 29.19\%\\ +test14& 40.55& 29.74& 26.66\%\\ +test15& 26.05& 18.55& 28.79\%\\ +test16& 33.93& 25.67& 24.34\%\\ +test18& 107.61& 77.48& 28.00\%\\ +test19& 73.72& 52.60& 28.65\%\\ +test20& 95.72& 62.37& 34.84\%\\ +test21& 2.68& 2.13& 20.52\%\\ +test22& 53.47& 39.34& 26.43\%\\ +test23& 22.55& 15.40& 31.71\%\\ +test24& 22.81& 15.42& 32.40\%\\ +%test26& 3.49& 2.46& 29.51\%\\ +%test27& 74.33& 52.47& 29.41\%\\ +%test28& 23.34& 15.77& 32.43\%\\ +%test29& 29.75& 21.55& 27.56\%\\ +%test30& 14.52& 10.73& 26.10\%\\ +%test31& 33.06& 20.72& 37.33\%\\ +\hline +\end{tabularx} +} + +\newcommand{\onetepoverallphaseshiftfftw}{ +\begin{tabularx}{\textwidth}{cXXX} +\hline +\bf{Test} & \bf{Original (s)} & \bf{Modified (s)} & \bf{Reduction} \\ +\hline +test01& 131.43& 111.06& 15.49\%\\ +test02& 136.79& 118.22& 13.58\%\\ +test03& 619.04& 606.73& 1.99\%\\ +test04& 169.56& 139.72& 17.60\%\\ +test05& 107.47& 93.61& 12.90\%\\ +test06& 27.02& 26.35& 2.49\%\\ +test07& 49.99& 45.57& 8.85\%\\ +test08& 296.68& 247.57& 16.55\%\\ +test09& 15.82& 14.49& 8.44\%\\ +test10& 1126.06& 955.09& 15.18\%\\ +test11& 90.85& 77.61& 14.57\%\\ +test12& 154.00& 131.92& 14.34\%\\ +test13& 231.26& 223.75& 3.25\%\\ +test14& 272.05& 255.62& 6.04\%\\ +test15& 97.64& 83.10& 14.90\%\\ +test16& 98.63& 87.28& 11.51\%\\ +%test17& 55.70& 54.84& 1.55\%\\ +test18& 343.04& 294.95& 14.02\%\\ +test19& 180.72& 156.56& 13.37\%\\ +test20& 260.15& 221.94& 14.69\%\\ +test21& 11.45& 10.95& 4.35\%\\ +test22& 125.30& 109.90& 12.29\%\\ +test23& 46.18& 38.92& 15.72\%\\ +test24& 46.46& 38.85& 16.39\%\\ +%test26& 409.71& 407.21& 0.61\%\\ +%test27& 1700.92& 1662.08& 2.28\%\\ +%test28& 893.35& 886.74& 0.74\%\\ +%test29& 69.91& 61.32& 12.30\%\\ +%test30& 67.33& 62.03& 7.87\%\\ +%test31& 91.14& 73.04& 19.86\%\\ +\hline +\end{tabularx} +} diff --git a/presentation.tex b/presentation.tex index 34ca1ad..ee1e939 100644 --- a/presentation.tex +++ b/presentation.tex @@ -9,8 +9,10 @@ \usepackage{ucs} \usepackage{alltt} \usepackage[utf8x]{inputenc} +\usepackage{tabularx} \input{pygments.sty} +\input{onetep-phase-shift-fftw} \title[PSL Meeting]{PSL Meeting Presentation} \subtitle{Exploring Performance Optimisation Opportunities in ONETEP} @@ -86,7 +88,7 @@ dimension. \frame{ -\frametitle{Fourier Interpolation of FFT-boxes} +\frametitle{ONETEP's Approach to Interpolation} \small ONETEP performs transforms in each dimension so each FFT only operates on 50\% @@ -101,6 +103,173 @@ zeroes instead of 87.5\% of the naïve strategy. } +\frame{ + +\frametitle{Phase-Shift Approach to Interpolation} + +We have a signal discretely sampled at the blue points, but we want to know the +value at both the blue and red points. We can find these if we shift the signal by +half a sample. + +\centering +\resizebox{0.80\textwidth}{!}{ +\input{images-gnuplot/midpoints} +} + +} + +\frame{ + +\frametitle{Phase-Shift Interpolation in 1D} + +\begin{itemize} + +\small +\item We use a 1D FFT to compute the frequency representation, apply a phase shift, +then trasform back to compute the values of the midpoints. + +\item We interleave the original data and midpoints to produce the interpolated + values. + +\item We never operate on zeros (except those in the original input). + +\end{itemize} + +\centering +\resizebox{0.60\textwidth}{!}{ +\includegraphics{images-svg/interpolation-phase-shift-1d} +} + +} + +\frame{ + +\frametitle{Phase-Shift Interpolation in 3D} + +\begin{itemize} + +\item Interpolation is done by dimension, with the most cache inefficient done + first. + +\item We interleave the original data and the 7 interpolated blocks as a final + step. + +\end{itemize} + +\centering +\resizebox{0.60\textwidth}{!}{ +\includegraphics{images-svg/per-dimension-interpolation} +} + +} + +\frame{ + +\frametitle{Performance Results with FFTW\footnote{Core i7-2600, 3.4GHz, 8MB L2 +cache, FFTW 3.3}} + +\footnotesize +Results are difficult to interpret because of FFTW's performance at different +problem sizes due to the factorisations it chooses. + +\centering +\resizebox{0.85\textwidth}{!}{ +\input{images-gnuplot/resampling-standalone-fftw} +} + +} + +\frame{ + +\frametitle{Performance Results with FFTW\footnote{Core i7-2600, 3.4GHz, 8MB L2 +cache, FFTW 3.3}} + +\footnotesize +We can filter the results to those which FFTW likes best (products of small +primes). Specifically, sizes of the form $2^a3^b5^c7^d11^e13^f$ where $e+f<2$. + +\centering +\resizebox{0.85\textwidth}{!}{ +\input{images-gnuplot/resampling-standalone-fftw-filtered} +} + +} + +\frame +{ +\frametitle{Performance within ONETEP} + +\begin{itemize} + +\item In our standalone benchmarks, our results give around a 35\% reduction in + execution time over ONETEP's approach for FFTW's preferred sizes. + +\item In practise, we found the actual reduction to be a lot less and overall +reduction in execution time to usually be less than 5\%. + +\item When doing Fourier interpolation, ONETEP spends a lot of time in its + modifying data layouts before calling FFTW. + +\item Specifically, converting from a split to interleaved representation of + complex numbers. Sometimes this is due to two sets of real operands being + merged so interpolation can be done on both simultaneously. + +\vspace{1em} +\centering +\resizebox{0.8\textwidth}{!}{ +\includegraphics{images-svg/complex-formats} +} + + + +\end{itemize} + +} + +\frame +{ +\frametitle{Making the new interpolation work better with ONETEP} + +\begin{itemize} + +\item I discovered that FFTW provides a ``guru'' interface that makes it +possible to pass in data in data in the split format. + +\item I wrote a new implementation of the interpolation routines that could + accept and return data in this format without remarshalling. + +\item Also wrote a routine that returned the product of the real and complex + parts as part of the re-interleave step, avoiding the need for the calling + routine in ONETEP to have to iterative over the data again. + +\end{itemize} +} + +\frame{ + +\frametitle{ONETEP Interpolation Routine Timings\footnote{Core i7-2600, 3.4GHz, +8MB L2 182 cache, FFTW 3.3}} + +\centering +\resizebox{0.65\textwidth}{!}{ +\oneteproutinesphaseshiftfftw +} + +} + +\frame{ + +\frametitle{ONETEP Total Execution Time Timings\footnote{Core i7-2600, 3.4GHz, +8MB L2 182 cache, FFTW 3.3}} + +\centering +\resizebox{0.65\textwidth}{!}{ +\onetepoverallphaseshiftfftw +} + +} + + \end{document} -- 2.47.3