diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ba67bd1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +trackingPaper.pdf +*.aux +*.log +*.bbl +*.blg +*.synctex.gz + diff --git a/sigproc.bib b/sigproc.bib index 6c9f504..fbbd6b8 100644 --- a/sigproc.bib +++ b/sigproc.bib @@ -485,6 +485,16 @@ year={2016} url="https://github.com/dataseries/DataSeries", } +@techreport{DataSeries, + Author = {Eric Anderson and Martin Arlitt and Morrey III, Charles B. and Alistair Veitch}, + Institution = {Hewlett-Packard}, + Month = "Sept.", + Number = {HPL-2009-323}, + Title = {DataSeries: An Efficient, Flexible Data Format for Structured Serial Data}, + Type = {Technical Report}, + Year = 2009} + + @misc{pandasPythonWebsite, title="pandas: Python Data Analysis Library", url="http://pandas.pydata.org/", @@ -623,10 +633,10 @@ year={2016} % publisher={USENIX Association} %} -@inproceedings{baker1991measurements, +@article{baker1991measurements, title={Measurements of a distributed file system}, author={Baker, Mary G and Hartman, John H and Kupfer, Michael D and Shirriff, Ken W and Ousterhout, John K}, - booktitle={ACM SIGOPS Operating Systems Review}, + journal={ACM SIGOPS Operating Systems Review}, volume={25}, number={5}, pages={198--212}, @@ -652,11 +662,13 @@ year={2016} year={1996} } -@book{ousterhout1985trace, +@article{ousterhout1985trace, title={A trace-driven analysis of the UNIX 4.2 BSD file system}, author={Ousterhout, John K and Da Costa, Herve and Harrison, David and Kunze, John A and Kupfer, Mike and Thompson, James G}, + journal={ACM SIGOPS Operating Systems Review}, volume={19}, number={5}, + month="Dec.", year={1985}, publisher={ACM} } diff --git a/trackingPaper.aux b/trackingPaper.aux deleted file mode 100644 index 41caab8..0000000 --- a/trackingPaper.aux +++ /dev/null @@ -1,175 +0,0 @@ -\relax -\citation{leung2008measurement} -\citation{PFRINGMan} -\citation{ousterhout1985trace} -\citation{ramakrishnan1992analysis} -\citation{baker1991measurements} -\citation{gribble1996self} -\citation{douceur1999large} -\citation{vogels1999file} -\citation{zhou1999analysis} -\citation{roselli2000comparison} -\citation{malkani2003passive} -\citation{agrawal2007five} -\citation{leung2008measurement} -\citation{vrable2009cumulus} -\citation{benson2010network} -\citation{chen2012interactive} -\citation{vogels1999file} -\citation{malkani2003passive} -\citation{seltzer2003nfs} -\citation{anderson2004buttress} -\citation{Orosz2013} -\citation{dabir2007bottleneck} -\citation{skopko2012loss} -\citation{traeger2008nine} -\citation{ruemmler1992unix} -\citation{roselli2000comparison} -\citation{traeger2008nine} -\citation{ruemmler1992unix} -\citation{douceur1999large} -\citation{malkani2003passive} -\citation{seltzer2003nfs} -\citation{leung2008measurement} -\citation{roselli2000comparison} -\citation{seltzer2003nfs} -\citation{leung2008measurement} -\citation{leung2008measurement} -\citation{vogels1999file} -\citation{roselli2000comparison} -\citation{seltzer2003nfs} -\citation{anderson2004buttress} -\@writefile{toc}{\contentsline {section}{\numberline {I}Introduction}{1}\protected@file@percent } -\citation{leung2008measurement} -\citation{leung2008measurement} -\citation{anderson2004buttress} -\citation{Orosz2013} -\citation{dabir2007bottleneck} -\citation{skopko2012loss} -\citation{leung2008measurement} -\citation{roselli2000comparison} -\citation{seltzer2003nfs} -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {I-A}}Related Work}{2}\protected@file@percent } -\newlabel{Previous Advances Due to Testing}{{\unhbox \voidb@x \hbox {I-A}}{2}} -\@writefile{toc}{\contentsline {section}{\numberline {II}Background}{2}\protected@file@percent } -\citation{Orosz2013} -\citation{dabir2007bottleneck} -\citation{skopko2012loss} -\citation{anderson2004buttress} -\citation{PFRINGMan} -\citation{Orosz2013} -\@writefile{lot}{\contentsline {table}{\numberline {I}{\ignorespaces Summary of major file system studies over the past decades. For each study the tables shows the dates of the trace data, the file system or protocol studied, whether it involved network file systems, the trace methodology used, and the workloads studied. Dynamic trace studies are those that involve traces of live requests. Snapshot studies involve snapshots of file system contents.}}{3}\protected@file@percent } -\newlabel{tbl:studySummary}{{I}{3}} -\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Visualization of SMB Packet}}{3}\protected@file@percent } -\newlabel{fig:smbPacket}{{1}{3}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {II-A}}Issues with Tracing}{3}\protected@file@percent } -\newlabel{Issues with Tracing}{{\unhbox \voidb@x \hbox {II-A}}{3}} -\@writefile{toc}{\contentsline {section}{\numberline {III}Packet Capturing System}{3}\protected@file@percent } -\citation{ntopWebsite} -\citation{pfringWebsite} -\citation{dataseriesGit} -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {III-A}}UITS System Overview}{4}\protected@file@percent } -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {III-B}}High-speed Packet Capture}{4}\protected@file@percent } -\newlabel{Capture}{{\unhbox \voidb@x \hbox {III-B}}{4}} -\newlabel{tbl:TraceSummaryTotal}{{IV}{4}} -\@writefile{lot}{\contentsline {table}{\numberline {II}{\ignorespaces Summary of Trace I/O Statistics for the time of April 30th, 2019 to May 20th, 2019}}{4}\protected@file@percent } -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {III-C}}DataSeries Analysis}{4}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{\numberline {IV}Data Analysis}{4}\protected@file@percent } -\newlabel{sec:data-analysis}{{IV}{4}} -\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces \leavevmode {\color {red}Visualization of Packet Capturing System}}}{5}\protected@file@percent } -\newlabel{fig:captureTopology}{{2}{5}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-A}}I/O Data Request Sizes}{5}\protected@file@percent } -\@writefile{lot}{\contentsline {table}{\numberline {III}{\ignorespaces Percentage of SMB and SMB2 Protocol Commands from April 30th, 2019 to May 20th, 2019. Breakdown of General Operations for SMB2}}{5}\protected@file@percent } -\newlabel{tbl:SMBCommands}{{III}{5}} -\@writefile{lot}{\contentsline {table}{\numberline {IV}{\ignorespaces \leavevmode {\color {red}Top 10 File Extensions Seen Over Three Week Period}}}{6}\protected@file@percent } -\newlabel{tab:top10SMB2FileExts}{{IV}{6}} -\@writefile{lot}{\contentsline {table}{\numberline {V}{\ignorespaces \leavevmode {\color {red}Common File Extensions Seen Over Three Week Period}}}{6}\protected@file@percent } -\newlabel{tab:commonSMB2FileExts}{{V}{6}} -\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces PDF of Bytes Transferred for Read I/O}}{6}\protected@file@percent } -\newlabel{fig:PDF-Bytes-Read}{{3}{6}} -\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces CDF of Bytes Transferred for Read I/O}}{6}\protected@file@percent } -\newlabel{fig:CDF-Bytes-Read}{{4}{6}} -\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces PDF of Bytes Transferred for Write I/O}}{6}\protected@file@percent } -\newlabel{fig:PDF-Bytes-Write}{{5}{6}} -\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces CDF of Bytes Transferred for Write I/O}}{6}\protected@file@percent } -\newlabel{fig:CDF-Bytes-Write}{{6}{6}} -\@writefile{lot}{\contentsline {table}{\numberline {VI}{\ignorespaces Percentage of transfer sizes for reads and writes}}{6}\protected@file@percent } -\newlabel{fig:transferSizes}{{VI}{6}} -\@writefile{lot}{\contentsline {table}{\numberline {VII}{\ignorespaces Summary of Trace Statistics: Average Response Time (RT) and Inter Arrival Time (IAT)}}{7}\protected@file@percent } -\newlabel{tbl:PercentageTraceSummary}{{VII}{7}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-B}}I/O Response Times}{7}\protected@file@percent } -\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces CDF of Inter Arrival Time for General I/O}}{7}\protected@file@percent } -\newlabel{fig:CDF-IAT-General}{{7}{7}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-C}}File Extensions}{7}\protected@file@percent } -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-D}}Distribution Models}{7}\protected@file@percent } -\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces PDF of Inter Arrival Time for General I/O}}{8}\protected@file@percent } -\newlabel{fig:PDF-IAT-General}{{8}{8}} -\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces CDF of Inter Arrival Time for Read I/O}}{8}\protected@file@percent } -\newlabel{fig:CDF-IAT-Read}{{9}{8}} -\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces PDF of Inter Arrival Time for Read I/O}}{8}\protected@file@percent } -\newlabel{fig:PDF-IAT-Read}{{10}{8}} -\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces CDF of Inter Arrival Time for Write I/O}}{8}\protected@file@percent } -\newlabel{fig:CDF-IAT-Write}{{11}{8}} -\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces PDF of Inter Arrival Time for Write I/O}}{8}\protected@file@percent } -\newlabel{fig:PDF-IAT-Write}{{12}{8}} -\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces CDF of Inter Arrival Time for Create I/O}}{8}\protected@file@percent } -\newlabel{fig:CDF-IAT-Create}{{13}{8}} -\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces PDF of Inter Arrival Time for Create I/O}}{9}\protected@file@percent } -\newlabel{fig:PDF-IAT-Create}{{14}{9}} -\@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces CDF of Response Time for General I/O}}{9}\protected@file@percent } -\newlabel{fig:CDF-RT-General}{{15}{9}} -\@writefile{lof}{\contentsline {figure}{\numberline {16}{\ignorespaces PDF of Response Time for General I/O}}{9}\protected@file@percent } -\newlabel{fig:PDF-RT-General}{{16}{9}} -\@writefile{lof}{\contentsline {figure}{\numberline {17}{\ignorespaces CDF of Response Time for Read I/O}}{9}\protected@file@percent } -\newlabel{fig:CDF-RT-Read}{{17}{9}} -\@writefile{lof}{\contentsline {figure}{\numberline {18}{\ignorespaces PDF of Response Time for Read I/O}}{9}\protected@file@percent } -\newlabel{fig:PDF-RT-Read}{{18}{9}} -\@writefile{lof}{\contentsline {figure}{\numberline {19}{\ignorespaces CDF of Return Time for Write IO}}{9}\protected@file@percent } -\newlabel{fig:CDF-RT-Write}{{19}{9}} -\@writefile{lof}{\contentsline {figure}{\numberline {20}{\ignorespaces PDF of Return Time for Write IO}}{9}\protected@file@percent } -\newlabel{fig:PDF-RT-Write}{{20}{9}} -\citation{Orosz2013} -\citation{dabir2007bottleneck} -\citation{skopko2012loss} -\citation{Orosz2013} -\citation{seltzer2003nfs} -\citation{anderson2004buttress} -\@writefile{lof}{\contentsline {figure}{\numberline {21}{\ignorespaces CDF of Response Time for Create I/O}}{10}\protected@file@percent } -\newlabel{fig:CDF-RT-Create}{{21}{10}} -\@writefile{lof}{\contentsline {figure}{\numberline {22}{\ignorespaces PDF of Response Time for Create I/O}}{10}\protected@file@percent } -\newlabel{fig:PDF-RT-Create}{{22}{10}} -\@writefile{lot}{\contentsline {table}{\numberline {VIII}{\ignorespaces Comparison of $\mu $, $\sigma $, $k$, and $\lambda $ Values for Curve Fitting Equations on CDF Graphs}}{10}\protected@file@percent } -\newlabel{tbl:curveFitting}{{VIII}{10}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-E}}System Limitations and Challenges}{10}\protected@file@percent } -\newlabel{System Limitations and Challenges}{{\unhbox \voidb@x \hbox {IV-E}}{10}} -\bibstyle{IEEEtran} -\bibdata{sigproc} -\bibcite{leung2008measurement}{1} -\bibcite{PFRINGMan}{2} -\bibcite{ousterhout1985trace}{3} -\bibcite{ramakrishnan1992analysis}{4} -\bibcite{baker1991measurements}{5} -\bibcite{gribble1996self}{6} -\bibcite{douceur1999large}{7} -\bibcite{vogels1999file}{8} -\bibcite{zhou1999analysis}{9} -\bibcite{roselli2000comparison}{10} -\bibcite{malkani2003passive}{11} -\bibcite{agrawal2007five}{12} -\bibcite{vrable2009cumulus}{13} -\bibcite{benson2010network}{14} -\bibcite{chen2012interactive}{15} -\bibcite{seltzer2003nfs}{16} -\bibcite{anderson2004buttress}{17} -\bibcite{Orosz2013}{18} -\bibcite{dabir2007bottleneck}{19} -\bibcite{skopko2012loss}{20} -\bibcite{traeger2008nine}{21} -\bibcite{ruemmler1992unix}{22} -\bibcite{ntopWebsite}{23} -\@writefile{toc}{\contentsline {section}{\numberline {V}Conclusions and Future Work}{11}\protected@file@percent } -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {V-A}}Future Work}{11}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{References}{11}\protected@file@percent } -\bibcite{pfringWebsite}{24} -\bibcite{dataseriesGit}{25} -\bibcite{pandasPythonWebsite}{26} diff --git a/trackingPaper.bbl b/trackingPaper.bbl deleted file mode 100644 index 8abb6ef..0000000 --- a/trackingPaper.bbl +++ /dev/null @@ -1,161 +0,0 @@ -% Generated by IEEEtran.bst, version: 1.14 (2015/08/26) -\begin{thebibliography}{10} -\providecommand{\url}[1]{#1} -\csname url@samestyle\endcsname -\providecommand{\newblock}{\relax} -\providecommand{\bibinfo}[2]{#2} -\providecommand{\BIBentrySTDinterwordspacing}{\spaceskip=0pt\relax} -\providecommand{\BIBentryALTinterwordstretchfactor}{4} -\providecommand{\BIBentryALTinterwordspacing}{\spaceskip=\fontdimen2\font plus -\BIBentryALTinterwordstretchfactor\fontdimen3\font minus - \fontdimen4\font\relax} -\providecommand{\BIBforeignlanguage}[2]{{% -\expandafter\ifx\csname l@#1\endcsname\relax -\typeout{** WARNING: IEEEtran.bst: No hyphenation pattern has been}% -\typeout{** loaded for the language `#1'. Using the pattern for}% -\typeout{** the default language instead.}% -\else -\language=\csname l@#1\endcsname -\fi -#2}} -\providecommand{\BIBdecl}{\relax} -\BIBdecl - -\bibitem{leung2008measurement} -A.~W. Leung, S.~Pasupathy, G.~R. Goodson, and E.~L. Miller, ``Measurement and - analysis of large-scale network file system workloads,'' in \emph{USENIX - Annual Technical Conference}, 2008, pp. 213--226. - -\bibitem{PFRINGMan} -\BIBentryALTinterwordspacing -``{PF\_RING} user guide.'' [Online]. Available: - \url{https://svn.ntop.org/svn/ntop/trunk/PF_RING/doc/UsersGuide.pdf} -\BIBentrySTDinterwordspacing - -\bibitem{ousterhout1985trace} -J.~K. Ousterhout, H.~Da~Costa, D.~Harrison, J.~A. Kunze, M.~Kupfer, and J.~G. - Thompson, \emph{A trace-driven analysis of the UNIX 4.2 BSD file - system}.\hskip 1em plus 0.5em minus 0.4em\relax ACM, 1985, vol.~19, no.~5. - -\bibitem{ramakrishnan1992analysis} -K.~Ramakrishnan, P.~Biswas, and R.~Karedla, ``Analysis of file i/o traces in - commercial computing environments,'' in \emph{ACM SIGMETRICS Performance - Evaluation Review}, vol.~20, no.~1.\hskip 1em plus 0.5em minus 0.4em\relax - ACM, 1992, pp. 78--90. - -\bibitem{baker1991measurements} -M.~G. Baker, J.~H. Hartman, M.~D. Kupfer, K.~W. Shirriff, and J.~K. Ousterhout, - ``Measurements of a distributed file system,'' in \emph{ACM SIGOPS Operating - Systems Review}, vol.~25, no.~5.\hskip 1em plus 0.5em minus 0.4em\relax ACM, - 1991, pp. 198--212. - -\bibitem{gribble1996self} -S.~D. Gribble, G.~S. Manku, and E.~A. Brewer, ``Self-similarity in - file-systems: Measurements and applications,'' \emph{Unpublished Paper, - Department of Computer Science, University of California, Berkeley}, 1996. - -\bibitem{douceur1999large} -J.~R. Douceur and W.~J. Bolosky, ``A large-scale study of file-system - contents,'' \emph{ACM SIGMETRICS Performance Evaluation Review}, vol.~27, - no.~1, pp. 59--70, 1999. - -\bibitem{vogels1999file} -W.~Vogels, ``File system usage in {Windows NT 4.0},'' in \emph{ACM SIGOPS - Operating Systems Review}, vol.~33, no.~5.\hskip 1em plus 0.5em minus - 0.4em\relax ACM, 1999, pp. 93--109. - -\bibitem{zhou1999analysis} -M.~Zhou and A.~J. Smith, ``Analysis of personal computer workloads,'' in - \emph{Modeling, Analysis and Simulation of Computer and Telecommunication - Systems, 1999. Proceedings. 7th International Symposium on}.\hskip 1em plus - 0.5em minus 0.4em\relax IEEE, 1999, pp. 208--217. - -\bibitem{roselli2000comparison} -D.~S. Roselli, J.~R. Lorch, and T.~E. Anderson, ``A comparison of file system - workloads.'' in \emph{USENIX Annual Technical Conference}, 2000, pp. 41--54. - -\bibitem{malkani2003passive} -P.~Malkani, D.~Ellard, J.~Ledlie, and M.~Seltzer, ``Passive {NFS} tracing of - email and research workloads,'' 2003. - -\bibitem{agrawal2007five} -N.~Agrawal, W.~J. Bolosky, J.~R. Douceur, and J.~R. Lorch, ``A five-year study - of file-system metadata,'' \emph{ACM Transactions on Storage (TOS)}, vol.~3, - no.~3, p.~9, 2007. - -\bibitem{vrable2009cumulus} -M.~Vrable, S.~Savage, and G.~M. Voelker, ``Cumulus: Filesystem backup to the - cloud,'' \emph{ACM Transactions on Storage (TOS)}, vol.~5, no.~4, p.~14, - 2009. - -\bibitem{benson2010network} -T.~Benson, A.~Akella, and D.~A. Maltz, ``Network traffic characteristics of - data centers in the wild,'' in \emph{Proceedings of the 10th ACM SIGCOMM - conference on Internet measurement}.\hskip 1em plus 0.5em minus 0.4em\relax - ACM, 2010, pp. 267--280. - -\bibitem{chen2012interactive} -Y.~Chen, S.~Alspaugh, and R.~Katz, ``Interactive analytical processing in big - data systems: A cross-industry study of mapreduce workloads,'' - \emph{Proceedings of the VLDB Endowment}, vol.~5, no.~12, pp. 1802--1813, - 2012. - -\bibitem{seltzer2003nfs} -M.~Seltzer and D.~Ellard, ``{NFS} tricks and benchmarking traps,'' 2003. - -\bibitem{anderson2004buttress} -E.~Anderson, M.~Kallahalla, M.~Uysal, and R.~Swaminathan, ``Buttress: A toolkit - for flexible and high fidelity {I/O} benchmarking,'' in \emph{Proceedings of - the 3rd USENIX Conference on File and Storage Technologies}.\hskip 1em plus - 0.5em minus 0.4em\relax USENIX Association, 2004, pp. 4--4. - -\bibitem{Orosz2013} -P.~Orosz and T.~Skopko, ``Multi-threaded packet timestamping for end-to-end - {QoS} evaluation,'' in \emph{ICSNC 2013, The Eighth International Conference - on Systems and Networks Communications}, 2013. - -\bibitem{dabir2007bottleneck} -A.~Dabir and A.~Matrawy, ``Bottleneck analysis of traffic monitoring using - {Wireshark},'' in \emph{Innovations in Information Technology, 2007. IIT'07. - 4th International Conference on}.\hskip 1em plus 0.5em minus 0.4em\relax - IEEE, 2007, pp. 158--162. - -\bibitem{skopko2012loss} -T.~Skopk{\'o}, ``Loss analysis of the software-based packet capturing,'' - \emph{Carpathian Journal of Electronic and Computer Engineering}, vol.~5, p. - 107, 2012. - -\bibitem{traeger2008nine} -A.~Traeger, E.~Zadok, N.~Joukov, and C.~P. Wright, ``A nine year study of file - system and storage benchmarking,'' \emph{ACM Transactions on Storage (TOS)}, - vol.~4, no.~2, p.~5, 2008. - -\bibitem{ruemmler1992unix} -C.~Ruemmler and J.~Wilkes, \emph{UNIX disk access patterns}.\hskip 1em plus - 0.5em minus 0.4em\relax Hewlett-Packard Laboratories, 1992. - -\bibitem{ntopWebsite} -\BIBentryALTinterwordspacing -``ntop - high performance network monitoring solutions.'' [Online]. Available: - \url{http://www.ntop.org/} -\BIBentrySTDinterwordspacing - -\bibitem{pfringWebsite} -\BIBentryALTinterwordspacing -``{PF\_RING} - high-speed packet capture, filtering and analysis.'' [Online]. - Available: \url{http://www.ntop.org/products/packet-capture/pf_ring/} -\BIBentrySTDinterwordspacing - -\bibitem{dataseriesGit} -\BIBentryALTinterwordspacing -``Dataseries: structured serial data library.'' [Online]. Available: - \url{https://github.com/dataseries/DataSeries} -\BIBentrySTDinterwordspacing - -\bibitem{pandasPythonWebsite} -\BIBentryALTinterwordspacing -``pandas: Python data analysis library.'' [Online]. Available: - \url{http://pandas.pydata.org/} -\BIBentrySTDinterwordspacing - -\end{thebibliography} diff --git a/trackingPaper.blg b/trackingPaper.blg deleted file mode 100644 index f115676..0000000 --- a/trackingPaper.blg +++ /dev/null @@ -1,75 +0,0 @@ -This is BibTeX, Version 0.99d (TeX Live 2017/Arch Linux) -Capacity: max_strings=100000, hash_size=100000, hash_prime=85009 -The top-level auxiliary file: trackingPaper.aux -The style file: IEEEtran.bst -Reallocated singl_function (elt_size=8) to 100 items from 50. -Reallocated singl_function (elt_size=8) to 100 items from 50. -Reallocated singl_function (elt_size=8) to 100 items from 50. -Reallocated wiz_functions (elt_size=8) to 6000 items from 3000. -Reallocated singl_function (elt_size=8) to 100 items from 50. -Database file #1: sigproc.bib -Repeated entry---line 619 of file sigproc.bib - : %@article{malkani2003passive - : , -I'm skipping whatever remains of this entry -Repeated entry---line 637 of file sigproc.bib - : %@article{douceur1999large - : , -I'm skipping whatever remains of this entry -Repeated entry---line 675 of file sigproc.bib - : %@inproceedings{roselli2000comparison - : , -I'm skipping whatever remains of this entry -Repeated entry---line 683 of file sigproc.bib - : %@inproceedings{vogels1999file - : , -I'm skipping whatever remains of this entry --- IEEEtran.bst version 1.14 (2015/08/26) by Michael Shell. --- http://www.michaelshell.org/tex/ieeetran/bibtex/ --- See the "IEEEtran_bst_HOWTO.pdf" manual for usage information. -Warning--empty journal in malkani2003passive -Warning--empty journal in seltzer2003nfs - -Done. -You've used 26 entries, - 4087 wiz_defined-function locations, - 956 strings with 11483 characters, -and the built_in function-call counts, 16473 in all, are: -= -- 1276 -> -- 355 -< -- 113 -+ -- 188 -- -- 63 -* -- 773 -:= -- 2495 -add.period$ -- 59 -call.type$ -- 26 -change.case$ -- 24 -chr.to.int$ -- 237 -cite$ -- 28 -duplicate$ -- 1248 -empty$ -- 1467 -format.name$ -- 82 -if$ -- 3843 -int.to.chr$ -- 0 -int.to.str$ -- 26 -missing$ -- 231 -newline$ -- 111 -num.names$ -- 21 -pop$ -- 593 -preamble$ -- 1 -purify$ -- 0 -quote$ -- 2 -skip$ -- 1278 -stack$ -- 0 -substring$ -- 590 -swap$ -- 919 -text.length$ -- 34 -text.prefix$ -- 0 -top$ -- 5 -type$ -- 26 -warning$ -- 2 -while$ -- 69 -width$ -- 28 -write$ -- 260 -(There were 4 error messages) diff --git a/trackingPaper.log b/trackingPaper.log deleted file mode 100644 index 245ae61..0000000 --- a/trackingPaper.log +++ /dev/null @@ -1,752 +0,0 @@ -This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Arch Linux) (preloaded format=pdflatex 2019.8.27) 21 DEC 2019 10:10 -entering extended mode - restricted \write18 enabled. - %&-line parsing enabled. -**trackingPaper.tex -(./trackingPaper.tex -LaTeX2e <2018-12-01> -(/usr/share/texmf-dist/tex/latex/IEEEtran/IEEEtran.cls -Document Class: IEEEtran 2015/08/26 V1.8b by Michael Shell --- See the "IEEEtran_HOWTO" manual for usage information. --- http://www.michaelshell.org/tex/ieeetran/ -\@IEEEtrantmpdimenA=\dimen102 -\@IEEEtrantmpdimenB=\dimen103 -\@IEEEtrantmpdimenC=\dimen104 -\@IEEEtrantmpcountA=\count80 -\@IEEEtrantmpcountB=\count81 -\@IEEEtrantmpcountC=\count82 -\@IEEEtrantmptoksA=\toks14 -LaTeX Font Info: Try loading font information for OT1+ptm on input line 503. - -(/usr/share/texmf-dist/tex/latex/psnfss/ot1ptm.fd -File: ot1ptm.fd 2001/06/04 font definitions for OT1/ptm. -) --- Using 8.5in x 11in (letter) paper. --- Using PDF output. -\@IEEEnormalsizeunitybaselineskip=\dimen105 --- This is a 10 point document. -\CLASSINFOnormalsizebaselineskip=\dimen106 -\CLASSINFOnormalsizeunitybaselineskip=\dimen107 -\IEEEnormaljot=\dimen108 -LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <5> not available -(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. -LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <5> not available -(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. - -LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <7> not available -(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. -LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <7> not available -(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. - -LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <8> not available -(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. -LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <8> not available -(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. - -LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <9> not available -(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. -LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <9> not available -(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. - -LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <10> not available -(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. -LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <10> not available -(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. - -LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <11> not available -(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. -LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <11> not available -(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. - -LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <12> not available -(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. -LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <12> not available -(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. - -LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <17> not available -(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. -LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <17> not available -(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. - -LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <20> not available -(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. -LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <20> not available -(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. - -LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <24> not available -(Font) Font shape `OT1/ptm/b/n' tried instead on input line 1090. -LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <24> not available -(Font) Font shape `OT1/ptm/b/it' tried instead on input line 1090. - -\IEEEquantizedlength=\dimen109 -\IEEEquantizedlengthdiff=\dimen110 -\IEEEquantizedtextheightdiff=\dimen111 -\IEEEilabelindentA=\dimen112 -\IEEEilabelindentB=\dimen113 -\IEEEilabelindent=\dimen114 -\IEEEelabelindent=\dimen115 -\IEEEdlabelindent=\dimen116 -\IEEElabelindent=\dimen117 -\IEEEiednormlabelsep=\dimen118 -\IEEEiedmathlabelsep=\dimen119 -\IEEEiedtopsep=\skip41 -\c@section=\count83 -\c@subsection=\count84 -\c@subsubsection=\count85 -\c@paragraph=\count86 -\c@IEEEsubequation=\count87 -\abovecaptionskip=\skip42 -\belowcaptionskip=\skip43 -\c@figure=\count88 -\c@table=\count89 -\@IEEEeqnnumcols=\count90 -\@IEEEeqncolcnt=\count91 -\@IEEEsubeqnnumrollback=\count92 -\@IEEEquantizeheightA=\dimen120 -\@IEEEquantizeheightB=\dimen121 -\@IEEEquantizeheightC=\dimen122 -\@IEEEquantizeprevdepth=\dimen123 -\@IEEEquantizemultiple=\count93 -\@IEEEquantizeboxA=\box27 -\@IEEEtmpitemindent=\dimen124 -\IEEEPARstartletwidth=\dimen125 -\c@IEEEbiography=\count94 -\@IEEEtranrubishbin=\box28 -) (/usr/share/texmf-dist/tex/latex/listings/listings.sty -(/usr/share/texmf-dist/tex/latex/graphics/keyval.sty -Package: keyval 2014/10/28 v1.15 key=value parser (DPC) -\KV@toks@=\toks15 -) -\lst@mode=\count95 -\lst@gtempboxa=\box29 -\lst@token=\toks16 -\lst@length=\count96 -\lst@currlwidth=\dimen126 -\lst@column=\count97 -\lst@pos=\count98 -\lst@lostspace=\dimen127 -\lst@width=\dimen128 -\lst@newlines=\count99 -\lst@lineno=\count100 -\lst@maxwidth=\dimen129 - -(/usr/share/texmf-dist/tex/latex/listings/lstmisc.sty -File: lstmisc.sty 2019/02/27 1.8b (Carsten Heinz) -\c@lstnumber=\count101 -\lst@skipnumbers=\count102 -\lst@framebox=\box30 -) -(/usr/share/texmf-dist/tex/latex/listings/listings.cfg -File: listings.cfg 2019/02/27 1.8b listings configuration -)) -Package: listings 2019/02/27 1.8b (Carsten Heinz) - -(/usr/share/texmf-dist/tex/latex/graphics/color.sty -Package: color 2016/07/10 v1.1e Standard LaTeX Color (DPC) - -(/usr/share/texmf-dist/tex/latex/graphics-cfg/color.cfg -File: color.cfg 2016/01/02 v1.6 sample color configuration -) -Package color Info: Driver file: pdftex.def on input line 147. - -(/usr/share/texmf-dist/tex/latex/graphics-def/pdftex.def -File: pdftex.def 2018/01/08 v1.0l Graphics/color driver for pdftex -)) -(/usr/share/texmf-dist/tex/latex/preprint/balance.sty -Package: balance 1999/02/23 4.3 (PWD) -\oldvsize=\dimen130 -) -(/usr/share/texmf-dist/tex/latex/graphics/graphicx.sty -Package: graphicx 2017/06/01 v1.1a Enhanced LaTeX Graphics (DPC,SPQR) - -(/usr/share/texmf-dist/tex/latex/graphics/graphics.sty -Package: graphics 2017/06/25 v1.2c Standard LaTeX Graphics (DPC,SPQR) - -(/usr/share/texmf-dist/tex/latex/graphics/trig.sty -Package: trig 2016/01/03 v1.10 sin cos tan (DPC) -) -(/usr/share/texmf-dist/tex/latex/graphics-cfg/graphics.cfg -File: graphics.cfg 2016/06/04 v1.11 sample graphics configuration -) -Package graphics Info: Driver file: pdftex.def on input line 99. -) -\Gin@req@height=\dimen131 -\Gin@req@width=\dimen132 -) -(/usr/share/texmf-dist/tex/latex/url/url.sty -\Urlmuskip=\muskip10 -Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc. -) -(/usr/share/texmf-dist/tex/latex/tools/tabularx.sty -Package: tabularx 2016/02/03 v2.11b `tabularx' package (DPC) - -(/usr/share/texmf-dist/tex/latex/tools/array.sty -Package: array 2018/12/30 v2.4k Tabular extension package (FMi) -\col@sep=\dimen133 -\ar@mcellbox=\box31 -\extrarowheight=\dimen134 -\NC@list=\toks17 -\extratabsurround=\skip44 -\backup@length=\skip45 -\ar@cellbox=\box32 -) -\TX@col@width=\dimen135 -\TX@old@table=\dimen136 -\TX@old@col=\dimen137 -\TX@target=\dimen138 -\TX@delta=\dimen139 -\TX@cols=\count103 -\TX@ftn=\toks18 -) -(/usr/share/texmf-dist/tex/latex/booktabs/booktabs.sty -Package: booktabs 2016/04/27 v1.618033 publication quality tables -\heavyrulewidth=\dimen140 -\lightrulewidth=\dimen141 -\cmidrulewidth=\dimen142 -\belowrulesep=\dimen143 -\belowbottomsep=\dimen144 -\aboverulesep=\dimen145 -\abovetopsep=\dimen146 -\cmidrulesep=\dimen147 -\cmidrulekern=\dimen148 -\defaultaddspace=\dimen149 -\@cmidla=\count104 -\@cmidlb=\count105 -\@aboverulesep=\dimen150 -\@belowrulesep=\dimen151 -\@thisruleclass=\count106 -\@lastruleclass=\count107 -\@thisrulewidth=\dimen152 -) -(/usr/share/texmf-dist/tex/latex/multirow/multirow.sty -Package: multirow 2019/01/01 v2.4 Span multiple rows of a table -\multirow@colwidth=\skip46 -\multirow@cntb=\count108 -\multirow@dima=\skip47 -\bigstrutjot=\dimen153 -) -(/usr/share/texmf-dist/tex/generic/ulem/ulem.sty -\UL@box=\box33 -\UL@hyphenbox=\box34 -\UL@skip=\skip48 -\UL@hook=\toks19 -\UL@height=\dimen154 -\UL@pe=\count109 -\UL@pixel=\dimen155 -\ULC@box=\box35 -Package: ulem 2012/05/18 -\ULdepth=\dimen156 -) (./trackingPaper.aux) -\openout1 = `trackingPaper.aux'. - -LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 112. -LaTeX Font Info: ... okay on input line 112. -LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 112. -LaTeX Font Info: ... okay on input line 112. -LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 112. -LaTeX Font Info: ... okay on input line 112. -LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 112. -LaTeX Font Info: ... okay on input line 112. -LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 112. -LaTeX Font Info: ... okay on input line 112. -LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 112. -LaTeX Font Info: ... okay on input line 112. - --- Lines per column: 56 (exact). -\c@lstlisting=\count110 -(/usr/share/texmf-dist/tex/context/base/mkii/supp-pdf.mkii -[Loading MPS to PDF converter (version 2006.09.02).] -\scratchcounter=\count111 -\scratchdimen=\dimen157 -\scratchbox=\box36 -\nofMPsegments=\count112 -\nofMParguments=\count113 -\everyMPshowfont=\toks20 -\MPscratchCnt=\count114 -\MPscratchDim=\dimen158 -\MPnumerator=\count115 -\makeMPintoPDFobject=\count116 -\everyMPtoPDFconversion=\toks21 -) (/usr/share/texmf-dist/tex/latex/oberdiek/epstopdf-base.sty -Package: epstopdf-base 2016/05/15 v2.6 Base part for package epstopdf - -(/usr/share/texmf-dist/tex/generic/oberdiek/infwarerr.sty -Package: infwarerr 2016/05/16 v1.4 Providing info/warning/error messages (HO) -) -(/usr/share/texmf-dist/tex/latex/oberdiek/grfext.sty -Package: grfext 2016/05/16 v1.2 Manage graphics extensions (HO) - -(/usr/share/texmf-dist/tex/generic/oberdiek/kvdefinekeys.sty -Package: kvdefinekeys 2016/05/16 v1.4 Define keys (HO) - -(/usr/share/texmf-dist/tex/generic/oberdiek/ltxcmds.sty -Package: ltxcmds 2016/05/16 v1.23 LaTeX kernel commands for general use (HO) -))) -(/usr/share/texmf-dist/tex/latex/oberdiek/kvoptions.sty -Package: kvoptions 2016/05/16 v3.12 Key value format for package options (HO) - -(/usr/share/texmf-dist/tex/generic/oberdiek/kvsetkeys.sty -Package: kvsetkeys 2016/05/16 v1.17 Key value parser (HO) - -(/usr/share/texmf-dist/tex/generic/oberdiek/etexcmds.sty -Package: etexcmds 2016/05/16 v1.6 Avoid name clashes with e-TeX commands (HO) - -(/usr/share/texmf-dist/tex/generic/oberdiek/ifluatex.sty -Package: ifluatex 2016/05/16 v1.4 Provides the ifluatex switch (HO) -Package ifluatex Info: LuaTeX not detected. -)))) -(/usr/share/texmf-dist/tex/generic/oberdiek/pdftexcmds.sty -Package: pdftexcmds 2018/09/10 v0.29 Utility functions of pdfTeX for LuaTeX (HO -) - -(/usr/share/texmf-dist/tex/generic/oberdiek/ifpdf.sty -Package: ifpdf 2018/09/07 v3.3 Provides the ifpdf switch -) -Package pdftexcmds Info: LuaTeX not detected. -Package pdftexcmds Info: \pdf@primitive is available. -Package pdftexcmds Info: \pdf@ifprimitive is available. -Package pdftexcmds Info: \pdfdraftmode found. -) -Package epstopdf-base Info: Redefining graphics rule for `.eps' on input line 4 -38. -Package grfext Info: Graphics extension search list: -(grfext) [.pdf,.png,.jpg,.mps,.jpeg,.jbig2,.jb2,.PDF,.PNG,.JPG,.JPE -G,.JBIG2,.JB2,.eps] -(grfext) \AppendGraphicsExtensions on input line 456. - -(/usr/share/texmf-dist/tex/latex/latexconfig/epstopdf-sys.cfg -File: epstopdf-sys.cfg 2010/07/13 v1.3 Configuration of (r)epstopdf for TeX Liv -e -)) - -LaTeX Warning: No \author given. - - -LaTeX Warning: No \author given. - - -LaTeX Warning: No \author given. - - -LaTeX Warning: No positions in optional float specifier. - Default added (so using `tbp') on input line 194. - -LaTeX Font Info: External font `cmex10' loaded for size -(Font) <8> on input line 196. -LaTeX Font Info: External font `cmex10' loaded for size -(Font) <6> on input line 196. -LaTeX Font Info: External font `cmex10' loaded for size -(Font) <5> on input line 196. - -Underfull \hbox (badness 10000) in paragraph at lines 193--224 - - [] - - -Underfull \vbox (badness 10000) has occurred while \output is active [] - - [1{/home/duncan/.texlive/texmf-var/fonts/map/pdftex/updmap/pdftex.map} - - -] -Underfull \hbox (badness 1603) in paragraph at lines 241--243 -\OT1/ptm/m/n/10 il-lu-mi-na-tion of these un-der-ly-ing ac-tions (e.g. read-wri -te - [] - - -Underfull \hbox (badness 10000) in paragraph at lines 241--243 - - [] - -[2] -LaTeX Font Info: External font `cmex10' loaded for size -(Font) <7> on input line 257. -<./images/smbPacket.jpg, id=13, 684.5575pt x 337.26pt> -File: ./images/smbPacket.jpg Graphic file (type jpg) - -Package pdftex.def Info: ./images/smbPacket.jpg used on input line 263. -(pdftex.def) Requested size: 258.0pt x 127.1103pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 263--264 -[][] - [] - -[3 <./images/smbPacket.jpg>] -LaTeX Font Info: Try loading font information for OT1+pcr on input line 299. - - (/usr/share/texmf-dist/tex/latex/psnfss/ot1pcr.fd -File: ot1pcr.fd 2001/06/04 font definitions for OT1/pcr. -) -<./images/packetcapturetopology.png, id=18, 3769.08125pt x 1547.7825pt> -File: ./images/packetcapturetopology.png Graphic file (type png) - -Package pdftex.def Info: ./images/packetcapturetopology.png used on input line - 308. -(pdftex.def) Requested size: 516.0pt x 211.89377pt. - -LaTeX Warning: No positions in optional float specifier. - Default added (so using `tbp') on input line 327. - -[4] -Overfull \hbox (22.75874pt too wide) in paragraph at lines 375--413 - [][] - [] - - -LaTeX Warning: No positions in optional float specifier. - Default added (so using `tbp') on input line 417. - -LaTeX Font Info: Try loading font information for OML+ptm on input line 427. - -(/usr/share/texmf-dist/tex/latex/psnfss/omlptm.fd -File: omlptm.fd -) -LaTeX Font Info: Font shape `OML/ptm/m/n' in size <8> not available -(Font) Font shape `OML/cmm/m/it' tried instead on input line 427. - - -LaTeX Warning: No positions in optional float specifier. - Default added (so using `tbp') on input line 437. - -<./images/smb_read_bytes_pdf.pdf, id=26, 361.35pt x 216.81pt> -File: ./images/smb_read_bytes_pdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_read_bytes_pdf.pdf used on input line 48 -7. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 487--488 -[][] - [] - -<./images/smb_read_bytes_cdf.pdf, id=27, 361.35pt x 216.81pt> -File: ./images/smb_read_bytes_cdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_read_bytes_cdf.pdf used on input line 49 -3. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 493--494 -[][] - [] - -<./images/smb_write_bytes_pdf.pdf, id=28, 361.35pt x 216.81pt> -File: ./images/smb_write_bytes_pdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_write_bytes_pdf.pdf used on input line 4 -99. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 499--500 -[][] - [] - -<./images/smb_write_bytes_cdf.pdf, id=29, 361.35pt x 216.81pt> -File: ./images/smb_write_bytes_cdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_write_bytes_cdf.pdf used on input line 5 -05. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 505--506 -[][] - [] - -[5 <./images/packetcapturetopology.png>] [6 <./images/smb_read_bytes_pdf.pdf> < -./images/smb_read_bytes_cdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_read_bytes_cdf.pdf): PDF i -nclusion: multiple pdfs with page group included in a single page -> <./images/smb_write_bytes_pdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_write_bytes_pdf.pdf): PDF -inclusion: multiple pdfs with page group included in a single page -> <./images/smb_write_bytes_cdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_write_bytes_cdf.pdf): PDF -inclusion: multiple pdfs with page group included in a single page ->] - -LaTeX Warning: No positions in optional float specifier. - Default added (so using `tbp') on input line 572. - - -Overfull \hbox (31.17738pt too wide) in paragraph at lines 574--584 - [][] - [] - -<./images/smb_general_iats_cdf.pdf, id=68, 361.35pt x 216.81pt> -File: ./images/smb_general_iats_cdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_general_iats_cdf.pdf used on input line -696. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 696--697 -[][] - [] - -<./images/smb_general_iats_pdf.pdf, id=69, 361.35pt x 216.81pt> -File: ./images/smb_general_iats_pdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_general_iats_pdf.pdf used on input line -702. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 702--703 -[][] - [] - -<./images/smb_read_iats_cdf.pdf, id=70, 361.35pt x 216.81pt> -File: ./images/smb_read_iats_cdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_read_iats_cdf.pdf used on input line 708 -. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 708--709 -[][] - [] - -<./images/smb_read_iats_pdf.pdf, id=71, 361.35pt x 216.81pt> -File: ./images/smb_read_iats_pdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_read_iats_pdf.pdf used on input line 714 -. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 714--715 -[][] - [] - -<./images/smb_write_iats_cdf.pdf, id=72, 361.35pt x 216.81pt> -File: ./images/smb_write_iats_cdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_write_iats_cdf.pdf used on input line 72 -0. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 720--721 -[][] - [] - -<./images/smb_write_iats_pdf.pdf, id=73, 361.35pt x 216.81pt> -File: ./images/smb_write_iats_pdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_write_iats_pdf.pdf used on input line 72 -6. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 726--727 -[][] - [] - -<./images/smb_create_iats_cdf.pdf, id=74, 361.35pt x 216.81pt> -File: ./images/smb_create_iats_cdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_create_iats_cdf.pdf used on input line 7 -32. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 732--733 -[][] - [] - -<./images/smb_create_iats_pdf.pdf, id=75, 361.35pt x 216.81pt> -File: ./images/smb_create_iats_pdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_create_iats_pdf.pdf used on input line 7 -38. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 738--739 -[][] - [] - -<./images/smb_general_rts_cdf.pdf, id=76, 361.35pt x 216.81pt> -File: ./images/smb_general_rts_cdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_general_rts_cdf.pdf used on input line 7 -46. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 746--747 -[][] - [] - -<./images/smb_general_rts_pdf.pdf, id=77, 361.35pt x 216.81pt> -File: ./images/smb_general_rts_pdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_general_rts_pdf.pdf used on input line 7 -53. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 753--754 -[][] - [] - -<./images/smb_read_rts_cdf.pdf, id=78, 361.35pt x 216.81pt> -File: ./images/smb_read_rts_cdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_read_rts_cdf.pdf used on input line 760. - -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 760--761 -[][] - [] - -<./images/smb_read_rts_pdf.pdf, id=79, 361.35pt x 216.81pt> -File: ./images/smb_read_rts_pdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_read_rts_pdf.pdf used on input line 767. - -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 767--768 -[][] - [] - -<./images/smb_write_rts_cdf.pdf, id=80, 361.35pt x 216.81pt> -File: ./images/smb_write_rts_cdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_write_rts_cdf.pdf used on input line 774 -. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 774--775 -[][] - [] - -<./images/smb_write_rts_pdf.pdf, id=81, 361.35pt x 216.81pt> -File: ./images/smb_write_rts_pdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_write_rts_pdf.pdf used on input line 781 -. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 781--782 -[][] - [] - -<./images/smb_create_rts_cdf.pdf, id=82, 361.35pt x 216.81pt> -File: ./images/smb_create_rts_cdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_create_rts_cdf.pdf used on input line 78 -8. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 788--789 -[][] - [] - -<./images/smb_create_rts_pdf.pdf, id=83, 361.35pt x 216.81pt> -File: ./images/smb_create_rts_pdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_create_rts_pdf.pdf used on input line 79 -5. -(pdftex.def) Requested size: 258.0pt x 154.79962pt. - -Overfull \hbox (6.0pt too wide) in paragraph at lines 795--796 -[][] - [] - -[7 <./images/smb_general_iats_cdf.pdf>] [8 <./images/smb_general_iats_pdf.pdf> -<./images/smb_read_iats_cdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_read_iats_cdf.pdf): PDF in -clusion: multiple pdfs with page group included in a single page -> <./images/smb_read_iats_pdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_read_iats_pdf.pdf): PDF in -clusion: multiple pdfs with page group included in a single page -> <./images/smb_write_iats_cdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_write_iats_cdf.pdf): PDF i -nclusion: multiple pdfs with page group included in a single page -> <./images/smb_write_iats_pdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_write_iats_pdf.pdf): PDF i -nclusion: multiple pdfs with page group included in a single page -> <./images/smb_create_iats_cdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_create_iats_cdf.pdf): PDF -inclusion: multiple pdfs with page group included in a single page ->] [9 <./images/smb_create_iats_pdf.pdf> <./images/smb_general_rts_cdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_general_rts_cdf.pdf): PDF -inclusion: multiple pdfs with page group included in a single page -> <./images/smb_general_rts_pdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_general_rts_pdf.pdf): PDF -inclusion: multiple pdfs with page group included in a single page -> <./images/smb_read_rts_cdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_read_rts_cdf.pdf): PDF inc -lusion: multiple pdfs with page group included in a single page -> <./images/smb_read_rts_pdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_read_rts_pdf.pdf): PDF inc -lusion: multiple pdfs with page group included in a single page -> <./images/smb_write_rts_cdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_write_rts_cdf.pdf): PDF in -clusion: multiple pdfs with page group included in a single page -> <./images/smb_write_rts_pdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_write_rts_pdf.pdf): PDF in -clusion: multiple pdfs with page group included in a single page ->] [10 <./images/smb_create_rts_cdf.pdf> <./images/smb_create_rts_pdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_create_rts_pdf.pdf): PDF i -nclusion: multiple pdfs with page group included in a single page ->] (./trackingPaper.bbl - -Package balance Warning: You have called \balance in second column -(balance) Columns might not be balanced. - -[11]) - -** Conference Paper ** -Before submitting the final camera ready copy, remember to: - - 1. Manually equalize the lengths of two columns on the last page - of your paper; - - 2. Ensure that any PostScript and/or PDF output post-processing - uses only Type 1 fonts and that every step in the generation - process uses the appropriate paper size. - -[12] (./trackingPaper.aux) ) -Here is how much of TeX's memory you used: - 4270 strings out of 492623 - 67663 string characters out of 6135669 - 149935 words of memory out of 5000000 - 8104 multiletter control sequences out of 15000+600000 - 38340 words of font info for 74 fonts, out of 8000000 for 9000 - 1141 hyphenation exceptions out of 8191 - 41i,11n,32p,1341b,307s stack positions out of 5000i,500n,10000p,200000b,80000s -{/usr/share/texmf-dist/fonts/enc/dvips/base/8r.enc} - -Output written on trackingPaper.pdf (12 pages, 1044919 bytes). -PDF statistics: - 286 PDF objects out of 1000 (max. 8388607) - 189 compressed objects within 2 object streams - 0 named destinations out of 1000 (max. 500000) - 111 words of extra memory for PDF output out of 10000 (max. 10000000) - diff --git a/trackingPaper.pdf b/trackingPaper.pdf deleted file mode 100644 index 7947575..0000000 Binary files a/trackingPaper.pdf and /dev/null differ diff --git a/trackingPaper.synctex.gz b/trackingPaper.synctex.gz deleted file mode 100644 index 55ddd4c..0000000 Binary files a/trackingPaper.synctex.gz and /dev/null differ diff --git a/trackingPaper.tex b/trackingPaper.tex index fb3b529..ceb27dd 100644 --- a/trackingPaper.tex +++ b/trackingPaper.tex @@ -153,14 +153,16 @@ extensively in most non-research networks. In spite of the prevalence of SMB usage within most enterprise networks, there has been very little analysis of SMB workloads in prior academic research. The last major study -of SMB was nearly a decade ago~\cite{leung2008measurement}, and the nature of storage +of SMB was more than a decade ago~\cite{leung2008measurement}, and the nature of storage usage has changed dramatically over the last decade. -It is always important to revisit commonly used protocols to examine their use in comparison to the expected use case(s). This is doubly so for network communications because the nuances of networked data exchange can greatly influence the effectiveness and efficiency of a chosen protocol. Since an examination of SMB has not occurred in the past decade, we took a look at its current implementation and use in a large university network. +It is always important to revisit commonly used protocols to examine their use in comparison to the expected use case(s). This is doubly so for network communications because the nuances of networked data exchange can greatly influence the effectiveness and efficiency of a chosen protocol. +Since an SMB-based trace study has not been undertaken +recently, we took a look at its current implementation and use in a large university network. %Due to the sensitivity of the captured information, we ensure that all sensitive information is hashed and that the original network captures are not saved. Our study is based on network packet traces collected on the University of Connecticut's centralized storage facility over a period of three weeks in May 2019. This trace-driven analysis can help in the design of future storage products as well as providing data for future performance benchmarks. %Benchmarks are important for the purpose of developing technologies as well as taking accurate metrics. The reasoning behind this tracing capture work is to eventually better develop accurate benchmarks for network protocol evaluation. -Benchmarks allow for the stress testing of various aspects of a system (e.g. network, single system). Aggregate data analysis collected from traces can lead to the development of synthetic benchmarks. Traces can expose systems patterns that can also be reflected in synthetic benchmarks. Finally, the traces themselves can drive system simulations that can be used to evaluate prospective storage architectures. +Benchmarks allow for the stress testing of various aspects of a system (e.g. network, single system). Aggregate data analysis collected from traces can lead to the development of synthetic benchmarks. Traces can also expose systems patterns that can also be reflected in synthetic benchmarks. Finally, the traces themselves can drive system simulations that can be used to evaluate prospective storage architectures. %\begin{itemize} % \item \textbf{Why?:} Benchmarks allow for the stress testing of different/all aspects of a system (e.g. network, single system). @@ -178,16 +180,18 @@ Benchmarks allow for the stress testing of various aspects of a system (e.g. net % \end{enumerate} %\end{itemize} -Out of all the elements that make up the tracing system used for this research, there are a few key aspects that are worth covering due to customization of the system. These key components of the tracing system are the use of PF\_RING to mitigate timing and resource concerns, the use of proper hardware and software to handle incoming data, along with the tweaking of DataSeries code to create analysis tools for the captured data. +We created a new tracing system to collect data from the UConn storage network system. The tracing system was built around the high-speed PF\_RING packet capture system and required the use of proper hardware and software to handle incoming data. We also created a new trace capture format derived on the DataSeries structured data format developed by HP~\cite{DataSeries}. % PF\_RING section %The addition of PF\_RING lends to the tracing system by minimizing the copying of packets which, in turn, allows for more accurate timestamping of incoming traffic packets being captured ~\cite{Orosz2013,skopko2012loss,pfringWebsite,PFRINGMan}. PF\_RING acts as a kernel module that aids in minimizing packet loss/timestamping issues by not passing packets through the kernel data structures~\cite{PFRINGMan}. %The other reason PF\_RING is instrumental is that it functions with the 10Gb/s hardware that was installed into the Trace1 server; allowing for full throughput from the network tap on the UITS system. \\ % DataSeries + Code section -The tweaks and code additions to the existing DataSeries work are filtering for specific SMB protocol fields along with the writing of analysis tools to parse and dissect the captured packets. Specific fields were chosen to be the interesting fields kept for analysis. It should be noted that this was done originally arbitrarily and changes/additions have been made as the value of certain fields were determined to be worth examining; e.g. multiple runs were required to refine the captured data for later analysis. The code written for analysis of the captured DataSeries format packets focuses on I/O events and ID tracking (TID/UID). The future vision for this information is to combine ID tracking with the OpLock information in order to track resource sharing of the different clients on the network. As well as using IP information to recreate communication in a larger network trace to establish a better benchmark. +DataSeries was modified to filter specific SMB protocol fields along with the writing of analysis tools to parse and dissect the captured packets. Specific fields were chosen to be the interesting fields kept for analysis. +%It should be noted that this was done originally arbitrarily and changes/additions have been made as the value of certain fields were determined to be worth examining; e.g. multiple runs were required to refine the captured data for later analysis. +The DataSeries data format allowed us to create data analysis code that focuses on I/O events and ID tracking (TID/UID). The future vision for this information is to combine ID tracking with the OpLock information in order to track resource sharing of the different clients on the network. As well as using IP information to recreate communication in a larger network trace to establish a better benchmark. %Focus should be aboiut analysis and new traces -The contributions of this work are the new traces of SMB traffic over a larger university network as well as new analysis of this traffic. Our new examination of the captured data reveals that despite the streamlining of the CIFS/SMB protocol to be less chatty, the majority of SMB communication in metadata based I/O. We found that read operations occur in greater numbers and cause a larger overall number of bytes to pass over the network. However, the average number of bytes transferred for each write I/O is greater than that of the average read operation. We also find that the current standard for modeling network I/O holds for the majority of operations, while a more representative model needs to be developed for reads. +The contributions of this work are the new traces of SMB traffic over a larger university network as well as new analysis of this traffic. Our new examination of the captured data reveals that despite the streamlining of the CIFS/SMB protocol to be less "chatty", the majority of SMB communication is still metadata based I/O rather than actual data I/O. We found that read operations occur in greater numbers and cause a larger overall number of bytes to pass over the network. However, the average number of bytes transferred for each write I/O is greater than that of the average read operation. We also find that the current standard for modeling network I/O holds for the majority of operations, while a more representative model needs to be developed for reads. \subsection{Related Work} In this section we discuss previous studies examining traces and testing that has advanced benchmark development. We summarize major works in trace study in Table~\ref{tbl:studySummary}. In addition we examine issues that occur with traces and the assumptions in their study. @@ -211,16 +215,16 @@ Leung, \textit{et al.}~\cite{leung2008measurement} & 2007 & CIF Vrable, \textit{et al.}~\cite{vrable2009cumulus} & 2009 & FUSE & x & Snapshots & Backup \\ \hline Benson, \textit{et al.}~\cite{benson2010network} & 2010 & AFS, MapReduce, NCP, SMB & x & Dynamic & Academic, Corporate \\ \hline Chen, \textit{et al.}~\cite{chen2012interactive} & 2012 & MapReduce & x & Dynamic & Corporate \\ \hline -This paper & 2017 & SMB & x & Dynamic & Academic, Engineering, Backup \\ \hline +This paper & 2020 & SMB & x & Dynamic & Academic, Engineering, Backup \\ \hline \end{tabular} \caption{Summary of major file system studies over the past decades. For each study the tables shows the dates of the trace data, the file system or protocol studied, whether it involved network file systems, the trace methodology used, and the workloads studied. Dynamic trace studies are those that involve traces of live requests. Snapshot studies involve snapshots of file system contents.} \label{tbl:studySummary} \vspace{-2em} -\end{table*}Since +\end{table*} \label{Previous Advances Due to Testing} Tracing collection and analysis has proved its worth in time from previous studies where one can see important lessons pulled from the research; change in behavior of read/write events, overhead concerns originating in system implementation, bottlenecks in communication, and other revelations found in the traces. \\ Previous tracing work has shown that one of the largest \& broadest hurdles to tackle is that traces (and benchmarks) must be tailored to the system being tested. There are always some generalizations taken into account but these generalizations can also be a major source of error~\cite{vogels1999file,malkani2003passive,seltzer2003nfs,anderson2004buttress,Orosz2013,dabir2007bottleneck,skopko2012loss,traeger2008nine,ruemmler1992unix}. To produce a benchmark with high fidelity one needs to understand not only the technology being used but how it is being implemented within the system~\cite{roselli2000comparison,traeger2008nine,ruemmler1992unix}. All of these aspects will lend to the behavior of the system; from timing \& resource elements to how the managing software governs actions~\cite{douceur1999large,malkani2003passive,seltzer2003nfs}. Furthermore, in pursuing this work one may find unexpected results and learn new things through examination~\cite{leung2008measurement,roselli2000comparison,seltzer2003nfs}. \\ -These studies are required in order to evaluate the development of technologies and methodologies along with furthering knowledge of different system aspects and capabilities. As has been pointed out by past work, the design of systems is usually guided by an understanding of the file system workloads and user behavior~\cite{leung2008measurement}. It is for that reason that new studies are constantly performed by the science community, from large scale studies to individual protocol studies~\cite{leung2008measurement,vogels1999file,roselli2000comparison,seltzer2003nfs,anderson2004buttress}. Even within these studies, the information gleaned is only as meaningful as the considerations of how the data is handled. \\ +These studies are required in order to evaluate the development of technologies and methodologies along with furthering knowledge of different system aspects and capabilities. As has been pointed out by past work, the design of systems is usually guided by an understanding of the file system workloads and user behavior~\cite{leung2008measurement}. It is for that reason that new studies are constantly performed by the science community, from large scale studies to individual protocol studies~\cite{leung2008measurement,vogels1999file,roselli2000comparison,seltzer2003nfs,anderson2004buttress}. Even within these studies, the information gleaned is only as meaningful as the considerations of how the data is handled. The work done by Leung et. al.~\cite{leung2008measurement} found observations related to the infrequency of files to be shared by more than one client. Over 67\% of files were never open by more than one client. Leung's \textit{et. al.} work led to a series of observations, from the fact that files are rarely re-opened to finding that read-write access patterns are more frequent ~\cite{leung2008measurement}. @@ -238,21 +242,23 @@ Narayan and Chandy examined the concerns of distributed I/O and the different mo Due to the striping of files across multiple nodes, this can cause any read or write to access all the nodes; which does not decrease the inter-arrival times (IATs) seen. As the number of I/O operations increase and the number of nodes increase, the IAT times decreased. Observations from Skopko in a 2012 paper~\cite{skopko2012loss} examined the nuance concerns of software based capture solutions. The main observation was software solutions relied heavily on OS packet processing mechanisms. Further more, depending on the mode of operation (e.g. interrupt or polling), the timestamping of packets would change. -As seen in previous trace work done~\cite{leung2008measurement,roselli2000comparison,seltzer2003nfs}, the general perceptions of how computer systems are being used versus their initial purpose have allowed for great strides in eliminating actual bottlenecks rather than spending unnecessary time working on imagined bottlenecks. Without illumination of these underlying actions (e.g. read-write ratios, file death rates, file access rates) these issues can not be readily tackled. +As seen in previous trace work~\cite{leung2008measurement,roselli2000comparison,seltzer2003nfs}, the general perceptions of how computer systems are being used versus their initial purpose have allowed for great strides in eliminating actual bottlenecks rather than spending unnecessary time working on imagined bottlenecks. Without illumination of these underlying actions (e.g. read-write ratios, file death rates, file access rates) these issues can not be readily tackled. \\ \section{Background} +\subsection{Server Message Block} The Server Message Block (SMB) is an application-layer network protocol mainly used for providing shared access to files, shared access to printers, shared access to serial ports, miscellaneous communications between nodes on the network, as well as providing an authenticated inter-process communication mechanism. %The majority of usage for the SMB protocol involves Microsfot Windows. Almost all implementations of SMB servers use NT Domain authentication to validate user-access to resources -The SMB 1.0 protocol has been found to have high/significant impact due to latency issues. Monitoring revealed a high degree of ``chattiness'' and disregard of network latency between hosts. Solutions to this problem were included in the updated SMB 2.0 protocol which decreases ``chattiness'' by reducing commands and sub-commands from over a hundred to nineteen. Additional changes, most significantly being increased security, were implemented in SMB 3.0 protocol (previously named SMB 2.2). +The SMB 1.0 protocol has been found to have high/significant impact on performance due to latency issues. Monitoring revealed a high degree of ``chattiness'' and disregard of network latency between hosts. Solutions to this problem were included in the updated SMB 2.0 protocol which decreases ``chattiness'' by reducing commands and sub-commands from over a hundred to nineteen. Additional changes, most significantly being increased security, were implemented in SMB 3.0 protocol (previously named SMB 2.2). % XXX citations for SMB specs for different versions? %\textcolor{red}{\textbf{Add information about SMB 2.X/3?}} The rough order of communication for SMB session file interaction contains about five steps. First is a negotiation where a Microsoft SMB Protocol dialect is determined. Next a session is established to determine the share-level security. After this the Tree ID (TID) is determined for the share to be connected to as well as a file ID (FID) for a file requested by the client. From this establishment, I/O operations are performed using the FID given in the previous step. % Information relating to the capturing of SMB information -The only data that needs to be tracked from the SMB traces are the UID and TID for each session. The MID value is used for tracking individual packets in each established session. The PID tracks the process running the command or series of commands on a host. +The only data that needs to be tracked from the SMB traces are the UID (User ID) and TID for each session. The SMB commands also include a MID (Multiplex ID) value that is used for tracking individual packets in each established session, and a PID (Process ID) that tracks the process running the command or series of commands on a host. +For the purposes of our tracing, we do not track the MID or PID information. -Some nuances of SMB protocol I/O are: +Some nuances of SMB protocol I/O to note are: \begin{itemize} \item SMB/SMB2 write request is the command that pushes bytes over the wire. \textbf{Note:} the response packet only confirms their arrival and use (e.g. writing). \item SMB/SMB2 read response is the command that pushes bytes over the wire. \textbf{Note:} The request packet only asks for the data. @@ -269,23 +275,30 @@ Some nuances of SMB protocol I/O are: \label{Issues with Tracing} There are three general approaches to creating a benchmark based on a trade-off between experimental complexity and resemblance to the original application. (1) Connect the system to a production test environment, run the application, and measure the application metrics. (2) Collect traces from running the application and replay them (after possible modification) back on the test I/O system. (3) Generate a synthetic workload and measure the system performance. -The majority of benchmarks are attempts to represent a known system and structure on which some ``original'' design/system was tested. While this is all well and good, there are many issues with this sort of approach; temporal \& spatial scaling concerns, timestamping and buffer copying, as well as driver operation for capturing packets~\cite{Orosz2013,dabir2007bottleneck,skopko2012loss}. Each of these aspects contribute to the initial problems with dissection and analysis of the captured information. For example, inaccuracies in scheduling I/Os may result in as much as a factor of 3.5 differences in measured response time and factor of 26 in measured queue sizes; differences that are too large to ignore~\cite{anderson2004buttress}. +The majority of benchmarks attempt to represent a known system and structure on which some ``original'' design/system was tested. While this is all well and good, there are many issues with this sort of approach; temporal \& spatial scaling concerns, timestamping and buffer copying, as well as driver operation for capturing packets~\cite{Orosz2013,dabir2007bottleneck,skopko2012loss}. Each of these aspects contribute to the initial problems with dissection and analysis of the captured information. For example, inaccuracies in scheduling I/Os may result in as much as a factor of 3.5 differences in measured response time and factor of 26 in measured queue sizes; differences that are too large to ignore~\cite{anderson2004buttress}. Dealing with timing accuracy and high throughput involves three challenges. (1) Designing for dealing with peak performance requirements. (2) Coping with OS timing inaccuracies. (3) Working around unpredictable OS behavior; e.g. mechanisms to keep time and issue I/Os or performance effects due to interrupts. -Temporal scaling refers to the need to account for the nuances of timing with respect to the run time of commands; consisting of computation, communication \& service. A temporally scalable benchmarking system would take these subtleties into account when expanding its operation across multiple machines in a network. While these temporal issues have been tackled for a single processor (and even somewhat for cases of multi-processor), these same timing issues are not properly handled when dealing with inter-network communication. Inaccuracies in packet timestamping can be caused due to overhead in generic kernel-time based solutions, as well as use of the kernel data structures ~\cite{PFRINGMan,Orosz2013}. \\ -Spatial scaling refers to the need to account for the nuances of expanding a benchmark to incorporate a number of (\textbf{n}) machines over a network. A system that properly incorporates spatial scaling is one that would be able to incorporate communication (even in varying intensities) between all the machines on a system, thus stress testing all communicative actions and aspects (e.g. resource locks, queueing) on the network. +Temporal scaling refers to the need to account for the nuances of timing with respect to the run time of commands; consisting of computation, communication \& service. A temporally scalable benchmarking system would take these subtleties into account when expanding its operation across multiple machines in a network. While these temporal issues have been tackled for a single processor (and even somewhat for cases of multi-processor), these same timing issues are not properly handled when dealing with inter-network communication. Inaccuracies in packet timestamping can be caused due to overhead in generic kernel-time based solutions, as well as use of the kernel data structures ~\cite{PFRINGMan,Orosz2013}. + +Spatial scaling refers to the need to account for the nuances of expanding a benchmark to incorporate a number of machines over a network. A system that properly incorporates spatial scaling is one that would be able to incorporate communication (even in varying intensities) between all the machines on a system, thus stress testing all communicative actions and aspects (e.g. resource locks, queueing) on the network. \section{Packet Capturing System} -In this section, we examine the packet capturing system as well as decisions made that influence its capabilities. We illustrate the existing university network filesystem as well as our methods for ensuring high-speed packet capture. Then, we discuss the analysis code we developed for examining the captured data. +In this section, we describe the packet capturing system as well as decisions made that influence its capabilities. We illustrate the existing university network filesystem as well as our methods for ensuring high-speed packet capture. Then, we discuss the analysis code we developed for examining the captured data. % and on the python dissection code we wrote for performing traffic analysis. + +\begin{figure*} + \includegraphics[width=\textwidth]{./images/packetcapturetopology.png} + \caption{\textcolor{red}{Visualization of Packet Capturing System}} + \label{fig:captureTopology} +\end{figure*} + \subsection{UITS System Overview} -We collected traces from the University of Connecticut University Information Technology Services (UITS) centralized storage server. The UITS system consists of five Microsoft file server cluster nodes. These blade servers are used to host SMB file shares for various departments at UConn as well as personal drive share space for faculty, staff and students, along with at least one small group of users. Each server is capable of handling 1~Gb/s of traffic in each direction (e.g. outbound and inbound traffic). All together the five blade server system can in theory handle 10~Gb/s of receiving and transmitting data. +We collected traces from the University of Connecticut University Information Technology Services (UITS) centralized storage server. The UITS system consists of five Microsoft file server cluster nodes. These blade servers are used to host SMB file shares for various departments at UConn as well as personal drive share space for faculty, staff and students, along with at least one small group of users. Each server is capable of handling 1~Gb/s of traffic in each direction (e.g. outbound and inbound traffic). Altogether, the five-blade server system can in theory handle 5~Gb/s of data traffic in each direction. %Some of these blade servers have local storage but the majority do not have any. -The blade servers serve SMB but the actual storage is served by SAN storage nodes that sit behind them. This system does not currently implement load balancing. Instead, the servers are set up to spread the traffic load among four of the active cluster nodes while the fifth node is passive and purposed to take over in the case that any of the other nodes go down (e.g. become inoperable or crash). - -The topology for the packet duplicating element is shown in Figure~\ref{fig:captureTopology}. For our tracing, we installed a 10~Gb network tap on the file server switch, allowing our storage server to obtain a copy of all network traffic going to the 5 file servers. The reason for using 10Gb hardware is to help ensure that the system is able to capture and all information on the network at peak theoretical throughput. +The blade servers serve as SMB heads, but the actual storage is served by SAN storage nodes that sit behind them. This system does not currently implement load balancing. Instead, the servers are set up to spread the traffic load with a static distribution among four of the active cluster nodes while the fifth node is passive and purposed to take over in the case that any of the other nodes go down (e.g. become inoperable or crash). +The actual tracing was performed with a tracing server connected to a switch outfitted with a packet duplicating element as shown in the topology diagram in Figure~\ref{fig:captureTopology}. A 10~Gbps network tap was installed in the file server switch, allowing our storage server to obtain a copy of all network traffic going to the 5 file servers. The reason for using 10~Gbps hardware is to help ensure that the system is able to capture and all information on the network at peak theoretical throughput. \subsection{High-speed Packet Capture} \label{Capture} @@ -294,27 +307,25 @@ The topology for the packet duplicating element is shown in Figure~\ref{fig:capt %The broad strokes are that incoming SMB/CIFS information comes from the university's network. All packet and transaction information is passed through a duplicating switch that then allows for the tracing system to capture these packet transactions over a 10 Gb port. These packets are %passed along to the \textit{tshark} packet collection program which records these packets into a cyclical capturing ring. A watchdog program (\textit{inotify}) watches the directory where all of these packet-capture (pcap) files are being stored. As a new pcap file is completed \textit{inotify} passes the file to \textit{pcap2ds} along with what protocol is being examined (i.e. SMB). The \textit{pcap2ds} program reads through the given pcap files, -In order to maximize our faithful capture of the constant rate of traffic, we implement an ntop~\cite{ntopWebsite} solution called PF\_RING~\cite{pfringWebsite} to dramatically improve the storage server's packet capture speed. +In order to maximize our faithful capture of the constant rate of traffic, we implement on the tracing server an ntop~\cite{ntopWebsite} solution called PF\_RING~\cite{pfringWebsite} to dramatically improve the storage server's packet capture speed. %A license was obtained for scholastic use of PF\_RING. PF\_RING implements a ring buffer to provide fast and efficient packet capturing. Having implemented PF\_RING, the next step was to -We had to tune an implementation of \texttt{tshark} (wireshark's terminal pcap implementation) to maximize the packet capture and dissection into the DataSeries format~\cite{dataseriesGit}. +We had to tune an implementation of \texttt{tshark} (wireshark's terminal pcap implementation) to maximize the packet capture rate. +%and dissection into the DataSeries format~\cite{dataseriesGit}. %The assumption being made is that PF\_RING tackles and takes care of the concerns of packets loss due to buffer size, storage, and writing. \textit{tshark} need only read in those packets and generate the necessary DataSeries (ds) files. -To optimize this step a capture ring buffer flag is used to minimize the amount of space used to write pcap files, while optimizing the amount of time to +\texttt{tshark} outputs \texttt{.pcap} files which captures all of the data present in packets on the network. We configure \texttt{tshark} so that it only captures SMB packets. Furthermore, to optimize this step, a capture ring buffer flag is used to minimize the amount of space used to write \texttt{.pcap} files, while optimizing the amount of time to %\textit{pcap2ds} can -filter data from the pcap files. +filter data from the \texttt{.pcap} files. The filesize used was in a ring buffer where each file captured was 64000 kB. % This causes tshark to switch to the next file after it reaches a determined size. %To simplify this aspect of the capturing process, the entirety of the capturing, dissection, and permanent storage was all automated through watch-dog scripts. -\begin{figure*} - \includegraphics[width=\textwidth]{./images/packetcapturetopology.png} - \caption{\textcolor{red}{Visualization of Packet Capturing System}} - \label{fig:captureTopology} -\end{figure*} + +The \texttt{.pcap} files from \texttt{tshark} do not lend themselves to easy data analysis, so we translate these files into the DataSeries~\cite{DataSeries} format. HP developed DataSeries, an XML-based structured data format, that was designed to be self-descriptive, storage and access efficient, and highly flexible. The system for taking captured \texttt{.pcap} files and writing them into the DataSeries format (i.e. \texttt{.ds}) does so by first creating a structure (based on a pre-written determination of the data desired to capture). Once the code builds this structure, it then reads through the capture traffic packets while dissecting and filling in the prepared structure with the desired information and format. -Due to the fundamental nature of this work, there is no need to track every piece of information that is exchanged, only that information which illuminates the behavior of the clients \& servers that function over the network (e.g. I/O transactions). It should also be noted that all sensitive information being captured by the tracing system is hashed to protect the users whose information is examined by the tracing system. Further more, we now only receive the SMB header information since that contains the I/O information we seek, while the body of the SMB traffic is not passed through to better ensure security of the university's network communications. It is worth noting that in the case of larger SMB headers, some information is lost, but this is a trade-off by the university to provide, on average, the correct sized SMB header but does lead to scenarios where some information may be captured incompletely. +Due to the fundamental nature of this work, there is no need to track every piece of information that is exchanged, only that information which illuminates the behavior of the clients and servers that interact over the network (i.e. I/O transactions). It should also be noted that all sensitive information being captured by the tracing system is hashed to protect the users whose information is examined by the tracing system. Furthermore, the DataSeries file retains only the first XXX bytes of the SMB packet - enough to capture the SMB header information that contains the I/O information we seek, while the body of the SMB traffic is not retained in order to better ensure security of the university's network communications. It is worth noting that in the case of larger SMB headers, some information is lost, but this is a trade-off by the university to provide, on average, the correct sized SMB header but does lead to scenarios where some information may be captured incompletely. \subsection{DataSeries Analysis} -Building upon existing code for the interpretation and dissection of the captured \texttt{.ds} files, we developed C/C++ code for examining the captured traffic information. From this analysis, a larger text file is created that contains read, write, create and general I/O information at both a global scale and individual tracking ID (UID/TID) level. In addition, read and write buffer size information is tracked, as well as the inter-arrival and response times. Also included in this data is oplock information and IP addresses. The main contribution of this step is to aggregate seen information for later interpretation of the results. +Building upon existing code for the interpretation and dissection of the captured \texttt{.ds} files, we developed C/C++ code for examining the captured traffic information. From this analysis, we are able to capture read, write, create and general I/O information at both a global scale and individual tracking ID (UID/TID) level. In addition, read and write buffer size information is tracked, as well as the inter-arrival and response times. Also included in this data is oplock information and IP addresses. The main contribution of this step is to aggregate seen information for later interpretation of the results. This step also creates an easily digestible output that can be used to re-create all tuple information for SMB/SMB2 sessions that are witnessed over the entire time period. Sessions are any communication where a valid UID and TID is used. @@ -359,12 +370,13 @@ Average Write Size (B) & 63 \\ \hline % NOTE: Not sure but this reference keeps referencing the WRONG table Table~\ref{tbl:TraceSummaryTotal} -shows a summary of the I/O operations, response times, and inter arrival times observed for the network filesystem. This table illustrates that the majority of I/O operations are general; showing that $74.87$\% of the network file system I/O are metadata operations. +shows a summary of the I/O operations, response times, and inter arrival times observed for the network filesystem. This table illustrates that the majority of I/O operations are general (74.87\%). As shown in the bottom part of Table~\ref{tbl:TraceSummaryTotal} general includes metadata commands such as \texttt{connect}, close, query info, etc. -Our examination of the collected network filesystem data revealed interesting patterns for the current use of CIFS/SMB in a large engineering academic setting. The first is that there is a major shift away from read and write operations towards more metadata-based ones. This matches the last CIFS observations made by Leung et.~al.~that files were being generated and accessed infrequently. The change in operations are due to a movement of use activity from reading and writing data to simply checking file and directory metadata. However, since the earlier study, SMB has transitioned to the SMB2 protocol which was supposed to be less "chatty" and thus we would expect fewer general SMB operations. Table~\ref{tbl:SMBCommands} shows a breakdown of SMB and SMB2 usage over the time period of May. From this table, one can see that despite the fact that the SMB2 protocol makes up $99.14$\% of total network operations compared to just $0.86$\% for SMB, indicating that most clients have upgraded to SMB2. However, $74.66$\% of SMB2 I/O are still general operations. Contrary to purpose of implementing the SMB2 protocol, there is still a large amount of general I/O. +Our examination of the collected network filesystem data revealed interesting patterns for the current use of CIFS/SMB in a large engineering academic setting. The first is that there is a major shift away from read and write operations towards more metadata-based ones. This matches the last CIFS observations made by Leung et.~al.~that files were being generated and accessed infrequently. The change in operations are due to a movement of use activity from reading and writing data to simply checking file and directory metadata. However, since the earlier study, SMB has transitioned to the SMB2 protocol which was supposed to be less "chatty" and thus we would expect fewer general SMB operations. Table~\ref{tbl:SMBCommands} shows a breakdown of SMB and SMB2 usage over the time period of May. From this table, one can see that the SMB2 protocol makes up $99.14$\% of total network operations compared to just $0.86$\% for SMB, indicating that most clients have upgraded to SMB2. However, $74.66$\% of SMB2 I/O are still general operations. Contrary to the purpose of implementing the SMB2 protocol, there is still a large amount of general I/O. %While CIFS/SMB protocol has less metadata operations, this is due to a depreciation of the SMB protocol commands, therefore we would expect to see less total operations (e.g. $0.04$\% of total operations). %The infrequency of file activity is further strengthened by our finding that within a week long window of time there are no Read or Write inter arrival times that can be calculated. %\textcolor{red}{XXX we are going to get questioned on this. its not likely that there are no IATs for reads and writes} + General operations happen at very high frequency with inter arrival times that were found to be relatively short (1317$\mu$s on average). Taking a deeper look at the SMB2 operations, shown in the bottom half of Table~\ref{tbl:SMBCommands}, we see that $9.06$\% of the general operations are negotiate commands. These are commands sent by the client to notify the server which dialects of the SMB2 protocol the client can understand. The three most common commands are close, tree connect, and query info. @@ -414,46 +426,6 @@ Oplock Break & \multicolumn{2}{|c|}{22397} & 0.008\% \\ \hline \vspace{-2em} \end{table} -\begin{table}[] -\centering -\begin{tabular}{|l|l|l|} -\hline -SMB2 Filename Extension & Occurrences & Percentage of Total \\ \hline --Travel & 33396147 & 15.26 \\ -o & 28670784 & 13.1 \\ -e & 28606421 & 13.07 \\ -N & 27639457 & 12.63 \\ -one & 27615505 & 12.62 \\ -\textless{}No Extension\textgreater{} & 27613845 & 12.62 \\ -d & 2799799 & 1.28 \\ -l & 2321338 & 1.06 \\ -x & 2108279 & 0.96 \\ -h & 2019714 & 0.92 \\ \hline -\end{tabular} -\caption{\textcolor{red}{Top 10 File Extensions Seen Over Three Week Period}} -\label{tab:top10SMB2FileExts} -\end{table} - -\begin{table}[] -\centering -\begin{tabular}{|l|l|l|} -\hline -SMB2 Filename Extension & Occurrences & Percentage of Total \\ \hline -doc & 352958 & 0.16 \\ -docx & 291047 & 0.13 \\ -ppt & 46706 & 0.02 \\ -pptx & 38604 & 0.02 \\ -xls & 218031 & 0.1 \\ -xlsx & 180676 & 0.08 \\ -odt & 28 & 1.28e-05 \\ -pdf & 375601 & 0.17 \\ -xml & 1192840 & 0.54 \\ -txt & 167827 & 0.08 \\ \hline -\end{tabular} -\caption{\textcolor{red}{Common File Extensions Seen Over Three Week Period}} -\label{tab:commonSMB2FileExts} -\end{table} - \subsection{I/O Data Request Sizes} %\textcolor{red}{Figures~\ref{fig:IO-All} and~\ref{fig:IO-R+W} show the amount of I/O in 15-minute periods during the week of March 12-18, 2017. %The general I/O (GIO) value is representative of I/O that does not include read, write, or create actions. For the most part, these general I/O are mostly metadata operations. As one can see in Figure~\ref{fig:IO-All}, the general I/O dominates any of the read or write operations. Figure~\ref{fig:IO-R+W} is a magnification of the read and write I/O from Figure~\ref{fig:IO-All}. Here we see that the majority of I/O operations belong to reads. There are some spikes where more write I/O occur, but these events are in the minority. One should also notice that, as would be expected, the spikes of I/O activity occur around the center of the day (e.g. 8am to 8pm), and during the week (March 12 was a Sunday and March 18 was a Saturday).} @@ -468,7 +440,8 @@ txt & 167827 & 0.08 \\ \hline % \caption{Read and Write I/O} % \label{fig:IO-R+W} %\end{figure} -Figures~\ref{fig:PDF-Bytes-Read} \&~\ref{fig:PDF-Bytes-Write} show the probability density function (PDF) of the different sizes of bytes transferred for read and write I/O operations; respectively. The most noticeable aspect of these graphs are that the majority of bytes transferred for read and write operations is around 64 bytes. It is worth noting that write I/O also have a larger number of very small transfer amounts. This is unexpected in terms of the amount of data passed in a frame. Our belief is that this is due to a large number of long term calculations/scripts being run that only require small but frequent updates. This assumption was later validated in part when examining the files transferred, as some were related to running scripts creating a large volume of files. +Each SMB Read and Write command is associated with a data request size that indicates how many bytes are to be read or written as part of that command. +Figures~\ref{fig:PDF-Bytes-Read} and~\ref{fig:PDF-Bytes-Write} show the probability density function (PDF) of the different sizes of bytes transferred for read and write I/O operations respectively. The most noticeable aspect of these graphs are that the majority of bytes transferred for read and write operations is around 64 bytes. It is worth noting that write I/O also have a larger number of very small transfer amounts. This is unexpected in terms of the amount of data passed in a frame. Our belief is that this is due to a large number of long term calculations/scripts being run that only require small but frequent updates. This assumption was later validated in part when examining the files transferred, as some were related to running scripts creating a large volume of files. %This could also be attributed to simple reads relating to metadata\textcolor{red}{???} %\begin{figure} @@ -483,25 +456,25 @@ Figures~\ref{fig:PDF-Bytes-Read} \&~\ref{fig:PDF-Bytes-Write} show the probabili % \label{fig:bytesCompare} %\end{figure} -\begin{figure} +\begin{figure}[t] \includegraphics[width=0.5\textwidth]{./images/smb_read_bytes_pdf.pdf} \caption{PDF of Bytes Transferred for Read I/O} \label{fig:PDF-Bytes-Read} \end{figure} -\begin{figure} +\begin{figure}[t] \includegraphics[width=0.5\textwidth]{./images/smb_read_bytes_cdf.pdf} \caption{CDF of Bytes Transferred for Read I/O} \label{fig:CDF-Bytes-Read} \end{figure} -\begin{figure} +\begin{figure}[t] \includegraphics[width=0.5\textwidth]{./images/smb_write_bytes_pdf.pdf} \caption{PDF of Bytes Transferred for Write I/O} \label{fig:PDF-Bytes-Write} \end{figure} -\begin{figure} +\begin{figure}[t] \includegraphics[width=0.5\textwidth]{./images/smb_write_bytes_cdf.pdf} \caption{CDF of Bytes Transferred for Write I/O} \label{fig:CDF-Bytes-Write} @@ -512,12 +485,12 @@ Figures~\ref{fig:PDF-Bytes-Read} \&~\ref{fig:PDF-Bytes-Write} show the probabili % \caption{CDF of Bytes Transferred for Read+Write I/O} % \label{fig:CDF-Bytes-RW} %\end{figure} -Figures~\ref{fig:CDF-Bytes-Read} and~\ref{fig:CDF-Bytes-Write} show cumulative distribution functions (CDF) for bytes read and bytes written. As can be seen the bytes transferred via reads increases by over $50$\% starting at 32 bytes, while the writes have approximately $20$\% below 32 bytes. Table~\ref{fig:transferSizes} shows a tabular view of this data. For reads, $34.97$\% are between 64 and 512 bytes, with another $28.86$\% at 64 byte request sizes. There are a negligible percentage of read requests larger than 512. -This read data is similar to what was observed by Leung et al. Writes, on the other hand, are very different. Leung et al. showed that writes were $60$-$70$\% less than 4K and $90$\% less than 64K. In our data, however, we see that only $11.16$\% of writes are less than 4K, $52.41$\% are 64K requests, and only $43.63$\% of requests are less than 64K writes. +Figures~\ref{fig:CDF-Bytes-Read} and~\ref{fig:CDF-Bytes-Write} show cumulative distribution functions (CDF) for bytes read and bytes written. As can be seen, almost no read transfer sizes are less than 32 bytes, whereas 20\% writes below 32 bytes. Table~\ref{fig:transferSizes} shows a tabular view of this data. For reads, $34.97$\% are between 64 and 512 bytes, with another $28.86$\% at 64 byte request sizes. There are a negligible percentage of read requests larger than 512. +This read data is similar to what was observed by Leung et al. Writes, on the other hand, are very different. Leung et al. showed that $60$-$70$\% of writes were less than 4K in size and $90$\% less than 64K in size. In our data, however, we see that only $11.16$\% of writes are less than 4K, $52.41$\% are 64K requests, and only $43.63$\% of requests are less than 64K writes. In the ten years since the last study, it is clear that writes have become significantly larger. This may be explained by the fact that large files, and multiple files, are being written as standardized blocks more fitting to the larger data-sets and disk space available. This could be as an effort to improve the fidelity of data across the network, allow for better realtime data consistency between client and backup locations, or could just be due to a large number of scripts being run that create and update a series of relatively smaller documents. %\textbf{Note: It seems like a change in the order of magnitude that is being passed per packet. What would this indicate?}\textcolor{red}{Answer the question. Shorter reads/writes = better?} -\begin{table} +\begin{table}[] \centering \begin{tabular}{|l|c|c|} \hline @@ -534,7 +507,10 @@ $= 1024$ & 1.22e-5\% & 3.81e-5\% \\ \hline \vspace{-2em} \end{table} -In comparison of the read, write, and create operations we found that the vast majority of these type of I/O belong to creates. Furthermore, read operations account for the largest aggregate of bytes transferred over the network. However, the amount of bytes transferred by write commands is not far behind, although, non-intuitively, including a larger number of standardized relatively smaller writes. The most unexpected finding of the data is that all the the read and writes are performed using much smaller buffers than expected; about an order of magnitude smaller (e.g. bytes instead of kilobytes). +In comparison of the read, write, and create operations we found that the vast majority +of these type of I/O belong to creates. By the fact that there are so many creates, it +seems apparent that many applications create new files rather than updating existing +files when files are modified. Furthermore, read operations account for the largest aggregate of bytes transferred over the network. However, the amount of bytes transferred by write commands is not far behind, although, non-intuitively, including a larger number of standardized relatively smaller writes. The most unexpected finding of the data is that all the the read and writes are performed using much smaller buffers than expected; about an order of magnitude smaller (e.g. bytes instead of kilobytes). % XXX I think we should get rid of this figure - not sure it conveys anything important that is not better conveyed than the CDF %Figure~\ref{fig:Agg-AvgRT} shows the average response time (RT) for the different I/O operations. The revealing information is that write I/Os take the longest average time. This is expected since writes transfer more data on average. There is an odd spike for create I/O which can be due to a batch of files or nested directories being made. There are points where read I/O RT can be seen, but this only occurs in areas where large RT for write I/O occur. This is attributed to a need to verify the written data. @@ -569,14 +545,49 @@ In comparison of the read, write, and create operations we found that the vast m \subsection{I/O Response Times} +%~!~ Addition since Chandy writing ~!~% +Most previous tracing work has not reported I/O response times or command latency which is generally proportional to data request size, but under load, the response times give an indication of server load. In +Table~\ref{tbl:PercentageTraceSummary} we show a summary of the response times for read, write, create, and general commands. We note that most general (metadata) operations occur fairly frequently, run relatively slowly, and happen at high frequency. +Other observations of the data show that the number of writes is very close to the number of reads, although the write response time for their operations is very small - most likely because the storage server caches the write without actually committing to disk. Reads on the other hand are in most cases probably not going to hit in the cache and require an actual read from the storage media. Although read operations are only a few percentage of the total operations they have a the greatest average response time; more than general I/O. As noted above, creates happen more frequently, but have a slightly slower response time, because of the extra metadata operations required for a create as opposed to a simple write. + +% Note: RT + IAT time CDFs exist in data output + +% IAT information + +\begin{figure}[t!] + \includegraphics[width=0.5\textwidth]{./images/smb_general_iats_cdf.pdf} + \caption{CDF of Inter Arrival Time for General I/O} + \label{fig:CDF-IAT-General} +\end{figure} + +\begin{figure}[t!] + \includegraphics[width=0.5\textwidth]{./images/smb_general_iats_pdf.pdf} + \caption{PDF of Inter Arrival Time for General I/O} + \label{fig:PDF-IAT-General} +\end{figure} + +\begin{figure}[t!] + \includegraphics[width=0.5\textwidth]{./images/smb_general_rts_cdf.pdf} + \caption{CDF of Response Time for General I/O} + \label{fig:CDF-RT-General} + \vspace{-2em} +\end{figure} + +\begin{figure}[t!] + \includegraphics[width=0.5\textwidth]{./images/smb_general_rts_pdf.pdf} + \caption{PDF of Response Time for General I/O} + \label{fig:PDF-RT-General} + \vspace{-2em} +\end{figure} + \begin{table}[] \centering -\begin{tabular}{|l|l|l|l|l|} +\begin{tabular}{|l|r|r|r|r|} \hline & Reads & Writes & Creates & General \\ \hline -I/O \% & 2.97 & \multicolumn{1}{l|}{2.80} & \multicolumn{1}{l|}{19.36} & \multicolumn{1}{l|}{74.87} \\ \hline -Avg RT ($\mu$s) & 59819.687407 & \multicolumn{1}{l|}{519.703834} & \multicolumn{1}{l|}{698.082457} & \multicolumn{1}{l|}{7013.37566} \\ \hline -Avg IAT ($\mu$s) & 33220.780444 & \multicolumn{1}{l|}{35260.421498} & \multicolumn{1}{l|}{5094.474400} & \multicolumn{1}{l|}{1317.374383} \\ \hline +I/O \% & 2.97 & \multicolumn{1}{r|}{2.80} & \multicolumn{1}{r|}{19.36} & \multicolumn{1}{r|}{74.87} \\ \hline +Avg RT ($\mu$s) & 59819.7 & \multicolumn{1}{r|}{519.7} & \multicolumn{1}{r|}{698.1} & \multicolumn{1}{r|}{7013.4} \\ \hline +Avg IAT ($\mu$s) & 33220.8 & \multicolumn{1}{r|}{35260.4} & \multicolumn{1}{r|}{5094.5} & \multicolumn{1}{r|}{1317.4} \\ \hline %\hline %Total RT (s) & 224248 & \multicolumn{1}{l|}{41100} & \multicolumn{1}{l|}{342251} & \multicolumn{1}{l|}{131495} \\ \hline %\% Total RT & 30.34\% & \multicolumn{1}{l|}{5.56\%} & \multicolumn{1}{l|}{46.3\%} & \multicolumn{1}{l|}{17.79\%} \\ \hline @@ -598,11 +609,6 @@ Avg IAT ($\mu$s) & 33220.780444 & \multicolumn{1}{l|}{35260.421498} & \mult %\label{tbl:PercentageRTSummary} %\end{table} -%~!~ Addition since Chandy writing ~!~% -Most previous tracing work has not provided data on I/O response times or command latency which serves as an approximation of server load. In -Table~\ref{tbl:PercentageTraceSummary} we show a summary of the response times for read, write, create, and general commands. We note that most general operations have the second longest average response times ($7013.38$ $\mu$s). This exemplifies that these general operations occur in great numbers, run relatively slowly, and happen at high frequency. -Other observations of the data show that the number of writes are very close to the number of reads, although the response time for their operations is the shortest. Creates happen more often, but have a slightly slower response time, because most of the create commands are actually opens. Although read operations are only a few percentage of the total operations they have a the greatest average response time; more than general I/O. - %\textcolor{red}{To get an indication of how much of an effect these general commands take on overall latency, we also calculated the total aggregate response time for read, write, create, and general operations. We see that even though general commands account for $74.87$\% of all commands, they only account for only $17.8$\% of the total response time. Thus, while the volume of general operations does not present an extraordinary burden on server load, reducing these operations can present a clear performance benefit. We also see that creates take the most amount of time ($46.3$\%) of the total response time for all operations. As seen in Table~\ref{tbl:SMBCommands}, the majority of general operations are negotiations while $28.71$\% are closes; which relate to create operations. %This shows that while creates are only $5.08$\% on March 15th (and $2.5$\% of the week's operations shown in Table~\ref{tbl:PercentageTraceSummary}) of the total operations performed, they are responsible for $46.3$\% of the time spent performing network I/O.} %\textbf{Do we need this above data piece?} @@ -675,123 +681,88 @@ Other observations of the data show that the number of writes are very close to %% \end{itemize} %%\end{enumerate} % -Figure~\ref{fig:CDF-IAT-General} shows the inter arrival times CDF for general I/O. As can be seen, SMB commands happen very frequently - $85$\% of commands are issued less than 1024~$\mu s$ apart. As was mentioned above, the SMB protocol is known to be very chatty, and it is clear that servers must spend a lot of time dealing with these commands. For the most part, most of these commands are also serviced fairly quickly as well as seen in Figure~\ref{fig:CDF-RT-General}. Interestingly, the response/return time (RT) for the general metadata operations follows a similar curve to the inter-arrival times. +Figure~\ref{fig:CDF-IAT-General} shows the inter arrival times CDF for general I/O. As can be seen, SMB commands happen very frequently - $85$\% of commands are issued less than 1024~$\mu s$ apart. As was mentioned above, the SMB protocol is known to be very chatty, and it is clear that servers must spend a lot of time dealing with these commands. For the most part, most of these commands are also serviced fairly quickly as +seen in Figure~\ref{fig:CDF-RT-General}. Interestingly, the response/return time (RT) for the general metadata operations follows a similar curve to the inter-arrival times. Next we examine the response time (RT) of the read, write, and create I/O operations that occur over the SMB network filesystem. The response time for write operations (shown in Figure~\ref{fig:CDF-RT-Write}) does not follow the step function similar to the bytes written CDF in Figure~\ref{fig:CDF-Bytes-Write}. This is understandable as the response time for a write would be expected to be a more standardized action and not necessarily proportional to the number of bytes written. However, the read response time (Figure~\ref{fig:CDF-RT-Read}) is smoother than the bytes read CDF (Figure~\ref{fig:CDF-Bytes-Write}). This is most likely due to the fact that some of the reads are satisfied by server caches, thus eliminating some long access times to persistent storage. However, one should notice that the response time on read operations grows at a rate similar to that of write operations. This, again, shows a form of standardization in the communication patterns although some read I/O take a far greater period of time; due to larger amounts of read data sent over several standardized size packets. %While the RT for Write operations are not included (due to their step function behavior) Figure~\ref{fig:CDF-RT-Read} and Figure~\ref{fig:CDF-RT-RW} show the response times for Read and Read+Write operations respectively. T %\textcolor{red}{The write I/O step function behavior is somewhat visible in the CDF of both reads and writes in Figures~\ref{fig:CDF-RT-Read}~and~\ref{fig:CDF-RT-Write}. Moreover, this shows that the majority ($80$\%) of read (and write) operations occur within 2~$ms$, the average access time for enterprise storage disks. As would be expected, this is still an order of magnitude greater than the general I/O.} -\subsection{File Extensions} -Tables~\ref{tab:top10SMB2FileExts} and~\ref{tab:commonSMB2FileExts} show a representation of the various file extensions that were seen within the three week capture period. The easier to understand is Table~\ref{tab:commonSMB2FileExts}, which illustrates the number of common file extensions (e.g. doc, ppt, xls, pdf) that were part of the data. -The greatest point of note is that the highest percentage is ``.xml'' with $0.54$\%, which is found to be surprising result. Originally we expected that these common file extensions would be a much larger total of traffic; more than $2$\% of total traffic. These concerns were further raised by the results of Table~\ref{tab:top10SMB2FileExts} which show the top ten file extensions present in the data; which make up approximately $84$\% of the total seen. -Furthermore the majority of extensions seem very strange if not nonsensical. Upon closer examination of the tracing system it was determined that these file extensions are in artifact of how Windows interprets file extensions. The Windows operating system merely guesses the file type based on the assumed extension (e.g. whatever characters follow after the final `.'). -There are a large number of files that do not meet this standard idea of having an extension, although we posit that an assortment of scenarios that would cause this issue. These range from linux-based library files, manual pages, odd naming schemes as part of scripts or back-up files, as well as date-times and IPs as file names. There are undoubtedly a larger number more, but exhaustive determination of all variations is seen as out of scope for this work. - -% Note: RT + IAT time CDFs exist in data output - -% IAT information - -\begin{figure} - \includegraphics[width=0.5\textwidth]{./images/smb_general_iats_cdf.pdf} - \caption{CDF of Inter Arrival Time for General I/O} - \label{fig:CDF-IAT-General} -\end{figure} - -\begin{figure} - \includegraphics[width=0.5\textwidth]{./images/smb_general_iats_pdf.pdf} - \caption{PDF of Inter Arrival Time for General I/O} - \label{fig:PDF-IAT-General} -\end{figure} - -\begin{figure} +\begin{figure}[tp!] \includegraphics[width=0.5\textwidth]{./images/smb_read_iats_cdf.pdf} \caption{CDF of Inter Arrival Time for Read I/O} \label{fig:CDF-IAT-Read} \end{figure} -\begin{figure} +\begin{figure}[tp!] \includegraphics[width=0.5\textwidth]{./images/smb_read_iats_pdf.pdf} \caption{PDF of Inter Arrival Time for Read I/O} \label{fig:PDF-IAT-Read} \end{figure} -\begin{figure} - \includegraphics[width=0.5\textwidth]{./images/smb_write_iats_cdf.pdf} - \caption{CDF of Inter Arrival Time for Write I/O} - \label{fig:CDF-IAT-Write} -\end{figure} - -\begin{figure} - \includegraphics[width=0.5\textwidth]{./images/smb_write_iats_pdf.pdf} - \caption{PDF of Inter Arrival Time for Write I/O} - \label{fig:PDF-IAT-Write} -\end{figure} - -\begin{figure} - \includegraphics[width=0.5\textwidth]{./images/smb_create_iats_cdf.pdf} - \caption{CDF of Inter Arrival Time for Create I/O} - \label{fig:CDF-IAT-Create} -\end{figure} - -\begin{figure} - \includegraphics[width=0.5\textwidth]{./images/smb_create_iats_pdf.pdf} - \caption{PDF of Inter Arrival Time for Create I/O} - \label{fig:PDF-IAT-Create} -\end{figure} - -% RTs information - -\begin{figure} - \includegraphics[width=0.5\textwidth]{./images/smb_general_rts_cdf.pdf} - \caption{CDF of Response Time for General I/O} - \label{fig:CDF-RT-General} - \vspace{-2em} -\end{figure} - -\begin{figure} - \includegraphics[width=0.5\textwidth]{./images/smb_general_rts_pdf.pdf} - \caption{PDF of Response Time for General I/O} - \label{fig:PDF-RT-General} - \vspace{-2em} -\end{figure} - -\begin{figure} +\begin{figure}[tp!] \includegraphics[width=0.5\textwidth]{./images/smb_read_rts_cdf.pdf} \caption{CDF of Response Time for Read I/O} \label{fig:CDF-RT-Read} \vspace{-2em} \end{figure} -\begin{figure} +\begin{figure}[tp!] \includegraphics[width=0.5\textwidth]{./images/smb_read_rts_pdf.pdf} \caption{PDF of Response Time for Read I/O} \label{fig:PDF-RT-Read} \vspace{-2em} \end{figure} -\begin{figure} +% RTs information + +\begin{figure}[t!] + \includegraphics[width=0.5\textwidth]{./images/smb_write_iats_cdf.pdf} + \caption{CDF of Inter Arrival Time for Write I/O} + \label{fig:CDF-IAT-Write} +\end{figure} + +\begin{figure}[t!] + \includegraphics[width=0.5\textwidth]{./images/smb_write_iats_pdf.pdf} + \caption{PDF of Inter Arrival Time for Write I/O} + \label{fig:PDF-IAT-Write} +\end{figure} + +\begin{figure}[t!] \includegraphics[width=0.5\textwidth]{./images/smb_write_rts_cdf.pdf} \caption{CDF of Return Time for Write IO} \label{fig:CDF-RT-Write} \vspace{-2em} \end{figure} -\begin{figure} +\begin{figure}[t!] \includegraphics[width=0.5\textwidth]{./images/smb_write_rts_pdf.pdf} \caption{PDF of Return Time for Write IO} \label{fig:PDF-RT-Write} \vspace{-2em} \end{figure} -\begin{figure} +\begin{figure}[t!] + \includegraphics[width=0.5\textwidth]{./images/smb_create_iats_cdf.pdf} + \caption{CDF of Inter Arrival Time for Create I/O} + \label{fig:CDF-IAT-Create} +\end{figure} + +\begin{figure}[t!] + \includegraphics[width=0.5\textwidth]{./images/smb_create_iats_pdf.pdf} + \caption{PDF of Inter Arrival Time for Create I/O} + \label{fig:PDF-IAT-Create} +\end{figure} + +\begin{figure}[t!] \includegraphics[width=0.5\textwidth]{./images/smb_create_rts_cdf.pdf} \caption{CDF of Response Time for Create I/O} \label{fig:CDF-RT-Create} \vspace{-2em} \end{figure} -\begin{figure} +\begin{figure}[t!] \includegraphics[width=0.5\textwidth]{./images/smb_create_rts_pdf.pdf} \caption{PDF of Response Time for Create I/O} \label{fig:PDF-RT-Create} @@ -822,6 +793,55 @@ There are a large number of files that do not meet this standard idea of having % \label{fig:CDF-Bytes-RW} %\end{figure} +\subsection{File Extensions} +Tables~\ref{tab:top10SMB2FileExts} and~\ref{tab:commonSMB2FileExts} show a summary of the various file extensions that were seen within the three-week capture period. The easier to understand is Table~\ref{tab:commonSMB2FileExts}, which illustrates the number of common file extensions (e.g. doc, ppt, xls, pdf) that were part of the data. +%The greatest point of note is that the highest percentage is ``.xml'' with $0.54$\%, which is found to be surprising result. +Originally we expected that these common file extensions would be a much larger total of traffic. However, as seen in Table~\ref{tab:commonSMB2FileExts}, these common file extensions were less than $2$\% of total files seen. The top ten extensions that we saw (Table~\ref{tab:top10SMB2FileExts}) comprised approximately $84$\% of the total seen. +Furthermore, the majority of extensions are not readily identified. +Upon closer examination of the tracing system it was determined that +%these file extensions are an artifact of how Windows interprets file extensions. The Windows operating system merely guesses the file type based on the assumed extension (e.g. whatever characters follow after the final `.'). +many files simply do not have a valid extension. These range from linux-based library files, manual pages, odd naming schemes as part of scripts or back-up files, as well as date-times and IPs as file names. There are undoubtedly a larger number more, but exhaustive determination of all variations is seen as out of scope for this work. + +\begin{table}[] +\centering +\begin{tabular}{|l|l|l|} +\hline +SMB2 Filename Extension & Occurrences & Percentage of Total \\ \hline +-Travel & 33396147 & 15.26 \\ +o & 28670784 & 13.1 \\ +e & 28606421 & 13.07 \\ +N & 27639457 & 12.63 \\ +one & 27615505 & 12.62 \\ +\textless{}No Extension\textgreater{} & 27613845 & 12.62 \\ +d & 2799799 & 1.28 \\ +l & 2321338 & 1.06 \\ +x & 2108279 & 0.96 \\ +h & 2019714 & 0.92 \\ \hline +\end{tabular} +\caption{\textcolor{red}{Top 10 File Extensions Seen Over Three Week Period}} +\label{tab:top10SMB2FileExts} +\end{table} + +\begin{table}[] +\centering +\begin{tabular}{|l|l|l|} +\hline +SMB2 Filename Extension & Occurrences & Percentage of Total \\ \hline +doc & 352958 & 0.16 \\ +docx & 291047 & 0.13 \\ +ppt & 46706 & 0.02 \\ +pptx & 38604 & 0.02 \\ +xls & 218031 & 0.1 \\ +xlsx & 180676 & 0.08 \\ +odt & 28 & 0.000013 \\ +pdf & 375601 & 0.17 \\ +xml & 1192840 & 0.54 \\ +txt & 167827 & 0.08 \\ \hline +\end{tabular} +\caption{\textcolor{red}{Common File Extensions Seen Over Three Week Period}} +\label{tab:commonSMB2FileExts} +\end{table} + %Points worth mentioning: %\begin{itemize} % \item Scale of time is only to the microsecond due to the original pcap file capturing process. \texttt{tshark} only captures to a microsecond scale in our implementation. @@ -830,7 +850,7 @@ There are a large number of files that do not meet this standard idea of having \subsection{Distribution Models} -For simulations and analytic modeling, it is often useful to have models that describe the behavior of storage systems I/O. In this section, we attempt to map traditional probabilistic distributions to the data that we have observed. +For simulations and analytic modeling, it is often useful to have models that describe storage systems I/O behavior. In this section, we attempt to map traditional probabilistic distributions to the data that we have observed. Specifically, taking the developed CDF graphs, we perform curve fitting to determine the applicability of Gaussian and Weibull distributions to the the network filesystem I/O behavior. Note that an exponential distribution, typically used to model interarrival times and response times, is a special case of a Weibull distribution where $k=1$. Table~\ref{tbl:curveFitting} shows best-fit parametrized distributions for the measured. % along with $R^2$ fitness values. @@ -956,12 +976,10 @@ Further analysis will be made in examining how the determined metrics change whe %ACKNOWLEDGMENTS are optional %\section{Acknowledgments} -%This section is optional; it is a location for you -%to acknowledge grants, funding, editing assistance and -%what have you. In the present case, for example, the -%authors would like to thank Gerald Murray of ACM for -%his help in codifying this \textit{Author's Guide} -%and the \textbf{.cls} and \textbf{.tex} files that it describes. +%This work was supported in part by a National Science Foundation grant (award number +%CNS-0855090). Any opinions, findings and conclusions or recommendations expressed in +%this material are those of the authors and do not necessarily reflect those of the +%National Science Foundation. % % The following two commands are all you need in the