diff --git a/images/netappBackend.pdf b/images/netappBackend.pdf new file mode 100644 index 0000000..9ffe8e4 Binary files /dev/null and b/images/netappBackend.pdf differ diff --git a/images/smb_create_iats_cdf.pdf b/images/smb_create_iats_cdf.pdf index 9803a9c..2505a29 100644 Binary files a/images/smb_create_iats_cdf.pdf and b/images/smb_create_iats_cdf.pdf differ diff --git a/images/smb_create_iats_pdf.pdf b/images/smb_create_iats_pdf.pdf index bc2e3d6..60d61b5 100644 Binary files a/images/smb_create_iats_pdf.pdf and b/images/smb_create_iats_pdf.pdf differ diff --git a/images/smb_create_rts_cdf.pdf b/images/smb_create_rts_cdf.pdf index 262d9f5..81b5240 100644 Binary files a/images/smb_create_rts_cdf.pdf and b/images/smb_create_rts_cdf.pdf differ diff --git a/images/smb_create_rts_pdf.pdf b/images/smb_create_rts_pdf.pdf index 028b0f5..a5eed93 100644 Binary files a/images/smb_create_rts_pdf.pdf and b/images/smb_create_rts_pdf.pdf differ diff --git a/images/smb_general_iats_cdf.pdf b/images/smb_general_iats_cdf.pdf index 9af911a..cad27aa 100644 Binary files a/images/smb_general_iats_cdf.pdf and b/images/smb_general_iats_cdf.pdf differ diff --git a/images/smb_general_iats_pdf.pdf b/images/smb_general_iats_pdf.pdf index 263a08c..eb99e1b 100644 Binary files a/images/smb_general_iats_pdf.pdf and b/images/smb_general_iats_pdf.pdf differ diff --git a/images/smb_general_rts_cdf.pdf b/images/smb_general_rts_cdf.pdf index 0164633..ebebb55 100644 Binary files a/images/smb_general_rts_cdf.pdf and b/images/smb_general_rts_cdf.pdf differ diff --git a/images/smb_general_rts_pdf.pdf b/images/smb_general_rts_pdf.pdf index 512e475..9dd95cc 100644 Binary files a/images/smb_general_rts_pdf.pdf and b/images/smb_general_rts_pdf.pdf differ diff --git a/images/smb_read_bytes_cdf.pdf b/images/smb_read_bytes_cdf.pdf index 5faafea..7cffd4c 100644 Binary files a/images/smb_read_bytes_cdf.pdf and b/images/smb_read_bytes_cdf.pdf differ diff --git a/images/smb_read_bytes_pdf.pdf b/images/smb_read_bytes_pdf.pdf new file mode 100644 index 0000000..30e9bea Binary files /dev/null and b/images/smb_read_bytes_pdf.pdf differ diff --git a/images/smb_read_iats_cdf.pdf b/images/smb_read_iats_cdf.pdf index dd2f88e..e635def 100644 Binary files a/images/smb_read_iats_cdf.pdf and b/images/smb_read_iats_cdf.pdf differ diff --git a/images/smb_read_iats_pdf.pdf b/images/smb_read_iats_pdf.pdf index e755b61..69dd8dc 100644 Binary files a/images/smb_read_iats_pdf.pdf and b/images/smb_read_iats_pdf.pdf differ diff --git a/images/smb_read_rts_cdf.pdf b/images/smb_read_rts_cdf.pdf index 0e0c164..2b1035c 100644 Binary files a/images/smb_read_rts_cdf.pdf and b/images/smb_read_rts_cdf.pdf differ diff --git a/images/smb_read_rts_pdf.pdf b/images/smb_read_rts_pdf.pdf index 2259513..c2adad2 100644 Binary files a/images/smb_read_rts_pdf.pdf and b/images/smb_read_rts_pdf.pdf differ diff --git a/images/smb_write_bytes_cdf.pdf b/images/smb_write_bytes_cdf.pdf index fc1b03b..bef3e8f 100644 Binary files a/images/smb_write_bytes_cdf.pdf and b/images/smb_write_bytes_cdf.pdf differ diff --git a/images/smb_write_bytes_pdf.pdf b/images/smb_write_bytes_pdf.pdf new file mode 100644 index 0000000..6aaaeea Binary files /dev/null and b/images/smb_write_bytes_pdf.pdf differ diff --git a/images/smb_write_iats_cdf.pdf b/images/smb_write_iats_cdf.pdf index 83605eb..381cec0 100644 Binary files a/images/smb_write_iats_cdf.pdf and b/images/smb_write_iats_cdf.pdf differ diff --git a/images/smb_write_iats_pdf.pdf b/images/smb_write_iats_pdf.pdf index a7e730c..8d41fb5 100644 Binary files a/images/smb_write_iats_pdf.pdf and b/images/smb_write_iats_pdf.pdf differ diff --git a/images/smb_write_rts_cdf.pdf b/images/smb_write_rts_cdf.pdf index 600016f..277ebcc 100644 Binary files a/images/smb_write_rts_cdf.pdf and b/images/smb_write_rts_cdf.pdf differ diff --git a/images/smb_write_rts_pdf.pdf b/images/smb_write_rts_pdf.pdf index ea387eb..6197f48 100644 Binary files a/images/smb_write_rts_pdf.pdf and b/images/smb_write_rts_pdf.pdf differ diff --git a/trackingPaper.aux b/trackingPaper.aux index d82ae14..41caab8 100644 --- a/trackingPaper.aux +++ b/trackingPaper.aux @@ -72,76 +72,82 @@ \@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {III-B}}High-speed Packet Capture}{4}\protected@file@percent } \newlabel{Capture}{{\unhbox \voidb@x \hbox {III-B}}{4}} \newlabel{tbl:TraceSummaryTotal}{{IV}{4}} -\@writefile{lot}{\contentsline {table}{\numberline {II}{\ignorespaces Summary of Trace I/O Statistics for the time of February 3rd, 2019 to March 15th, 2019}}{4}\protected@file@percent } +\@writefile{lot}{\contentsline {table}{\numberline {II}{\ignorespaces Summary of Trace I/O Statistics for the time of April 30th, 2019 to May 20th, 2019}}{4}\protected@file@percent } \@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {III-C}}DataSeries Analysis}{4}\protected@file@percent } \@writefile{toc}{\contentsline {section}{\numberline {IV}Data Analysis}{4}\protected@file@percent } \newlabel{sec:data-analysis}{{IV}{4}} -\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Visualization of Packet Capturing System}}{5}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces \leavevmode {\color {red}Visualization of Packet Capturing System}}}{5}\protected@file@percent } \newlabel{fig:captureTopology}{{2}{5}} \@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-A}}I/O Data Request Sizes}{5}\protected@file@percent } -\@writefile{lot}{\contentsline {table}{\numberline {III}{\ignorespaces Percentage of SMB and SMB2 Protocol Commands on March 15th. Breakdown of General Operations for SMB2}}{5}\protected@file@percent } +\@writefile{lot}{\contentsline {table}{\numberline {III}{\ignorespaces Percentage of SMB and SMB2 Protocol Commands from April 30th, 2019 to May 20th, 2019. Breakdown of General Operations for SMB2}}{5}\protected@file@percent } \newlabel{tbl:SMBCommands}{{III}{5}} -\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces CDF of Bytes Transferred for Read I/O}}{6}\protected@file@percent } -\newlabel{fig:CDF-Bytes-Read}{{3}{6}} -\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces CDF of Bytes Transferred for Write I/O}}{6}\protected@file@percent } -\newlabel{fig:CDF-Bytes-Write}{{4}{6}} -\@writefile{lot}{\contentsline {table}{\numberline {IV}{\ignorespaces Percentage of transfer sizes for reads and writes}}{6}\protected@file@percent } -\newlabel{fig:transferSizes}{{IV}{6}} -\@writefile{lot}{\contentsline {table}{\numberline {V}{\ignorespaces Summary of Trace Statistics: Average Response Time (RT) and Inter Arrival Time (IAT)}}{6}\protected@file@percent } -\newlabel{tbl:PercentageTraceSummary}{{V}{6}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-B}}I/O Response Times}{6}\protected@file@percent } -\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces CDF of Inter Arrival Time for General I/O}}{7}\protected@file@percent } -\newlabel{fig:CDF-IAT-General}{{5}{7}} -\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces PDF of Inter Arrival Time for General I/O}}{7}\protected@file@percent } -\newlabel{fig:PDF-IAT-General}{{6}{7}} -\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces CDF of Inter Arrival Time for Read I/O}}{7}\protected@file@percent } -\newlabel{fig:CDF-IAT-Read}{{7}{7}} -\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces PDF of Inter Arrival Time for Read I/O}}{7}\protected@file@percent } -\newlabel{fig:PDF-IAT-Read}{{8}{7}} -\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces CDF of Inter Arrival Time for Write I/O}}{7}\protected@file@percent } -\newlabel{fig:CDF-IAT-Write}{{9}{7}} -\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces PDF of Inter Arrival Time for Write I/O}}{8}\protected@file@percent } -\newlabel{fig:PDF-IAT-Write}{{10}{8}} -\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces CDF of Inter Arrival Time for Create I/O}}{8}\protected@file@percent } -\newlabel{fig:CDF-IAT-Create}{{11}{8}} -\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces PDF of Inter Arrival Time for Create I/O}}{8}\protected@file@percent } -\newlabel{fig:PDF-IAT-Create}{{12}{8}} -\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces CDF of Response Time for General I/O}}{8}\protected@file@percent } -\newlabel{fig:CDF-RT-General}{{13}{8}} -\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces PDF of Response Time for General I/O}}{8}\protected@file@percent } -\newlabel{fig:PDF-RT-General}{{14}{8}} -\@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces CDF of Response Time for Read I/O}}{8}\protected@file@percent } -\newlabel{fig:CDF-RT-Read}{{15}{8}} -\@writefile{lof}{\contentsline {figure}{\numberline {16}{\ignorespaces PDF of Response Time for Read I/O}}{8}\protected@file@percent } -\newlabel{fig:PDF-RT-Read}{{16}{8}} +\@writefile{lot}{\contentsline {table}{\numberline {IV}{\ignorespaces \leavevmode {\color {red}Top 10 File Extensions Seen Over Three Week Period}}}{6}\protected@file@percent } +\newlabel{tab:top10SMB2FileExts}{{IV}{6}} +\@writefile{lot}{\contentsline {table}{\numberline {V}{\ignorespaces \leavevmode {\color {red}Common File Extensions Seen Over Three Week Period}}}{6}\protected@file@percent } +\newlabel{tab:commonSMB2FileExts}{{V}{6}} +\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces PDF of Bytes Transferred for Read I/O}}{6}\protected@file@percent } +\newlabel{fig:PDF-Bytes-Read}{{3}{6}} +\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces CDF of Bytes Transferred for Read I/O}}{6}\protected@file@percent } +\newlabel{fig:CDF-Bytes-Read}{{4}{6}} +\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces PDF of Bytes Transferred for Write I/O}}{6}\protected@file@percent } +\newlabel{fig:PDF-Bytes-Write}{{5}{6}} +\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces CDF of Bytes Transferred for Write I/O}}{6}\protected@file@percent } +\newlabel{fig:CDF-Bytes-Write}{{6}{6}} +\@writefile{lot}{\contentsline {table}{\numberline {VI}{\ignorespaces Percentage of transfer sizes for reads and writes}}{6}\protected@file@percent } +\newlabel{fig:transferSizes}{{VI}{6}} +\@writefile{lot}{\contentsline {table}{\numberline {VII}{\ignorespaces Summary of Trace Statistics: Average Response Time (RT) and Inter Arrival Time (IAT)}}{7}\protected@file@percent } +\newlabel{tbl:PercentageTraceSummary}{{VII}{7}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-B}}I/O Response Times}{7}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces CDF of Inter Arrival Time for General I/O}}{7}\protected@file@percent } +\newlabel{fig:CDF-IAT-General}{{7}{7}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-C}}File Extensions}{7}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-D}}Distribution Models}{7}\protected@file@percent } +\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces PDF of Inter Arrival Time for General I/O}}{8}\protected@file@percent } +\newlabel{fig:PDF-IAT-General}{{8}{8}} +\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces CDF of Inter Arrival Time for Read I/O}}{8}\protected@file@percent } +\newlabel{fig:CDF-IAT-Read}{{9}{8}} +\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces PDF of Inter Arrival Time for Read I/O}}{8}\protected@file@percent } +\newlabel{fig:PDF-IAT-Read}{{10}{8}} +\@writefile{lof}{\contentsline {figure}{\numberline {11}{\ignorespaces CDF of Inter Arrival Time for Write I/O}}{8}\protected@file@percent } +\newlabel{fig:CDF-IAT-Write}{{11}{8}} +\@writefile{lof}{\contentsline {figure}{\numberline {12}{\ignorespaces PDF of Inter Arrival Time for Write I/O}}{8}\protected@file@percent } +\newlabel{fig:PDF-IAT-Write}{{12}{8}} +\@writefile{lof}{\contentsline {figure}{\numberline {13}{\ignorespaces CDF of Inter Arrival Time for Create I/O}}{8}\protected@file@percent } +\newlabel{fig:CDF-IAT-Create}{{13}{8}} +\@writefile{lof}{\contentsline {figure}{\numberline {14}{\ignorespaces PDF of Inter Arrival Time for Create I/O}}{9}\protected@file@percent } +\newlabel{fig:PDF-IAT-Create}{{14}{9}} +\@writefile{lof}{\contentsline {figure}{\numberline {15}{\ignorespaces CDF of Response Time for General I/O}}{9}\protected@file@percent } +\newlabel{fig:CDF-RT-General}{{15}{9}} +\@writefile{lof}{\contentsline {figure}{\numberline {16}{\ignorespaces PDF of Response Time for General I/O}}{9}\protected@file@percent } +\newlabel{fig:PDF-RT-General}{{16}{9}} +\@writefile{lof}{\contentsline {figure}{\numberline {17}{\ignorespaces CDF of Response Time for Read I/O}}{9}\protected@file@percent } +\newlabel{fig:CDF-RT-Read}{{17}{9}} +\@writefile{lof}{\contentsline {figure}{\numberline {18}{\ignorespaces PDF of Response Time for Read I/O}}{9}\protected@file@percent } +\newlabel{fig:PDF-RT-Read}{{18}{9}} +\@writefile{lof}{\contentsline {figure}{\numberline {19}{\ignorespaces CDF of Return Time for Write IO}}{9}\protected@file@percent } +\newlabel{fig:CDF-RT-Write}{{19}{9}} +\@writefile{lof}{\contentsline {figure}{\numberline {20}{\ignorespaces PDF of Return Time for Write IO}}{9}\protected@file@percent } +\newlabel{fig:PDF-RT-Write}{{20}{9}} \citation{Orosz2013} \citation{dabir2007bottleneck} \citation{skopko2012loss} \citation{Orosz2013} \citation{seltzer2003nfs} \citation{anderson2004buttress} -\@writefile{lof}{\contentsline {figure}{\numberline {17}{\ignorespaces CDF of Return Time for Write IO}}{9}\protected@file@percent } -\newlabel{fig:CDF-RT-Write}{{17}{9}} -\@writefile{lof}{\contentsline {figure}{\numberline {18}{\ignorespaces PDF of Return Time for Write IO}}{9}\protected@file@percent } -\newlabel{fig:PDF-RT-Write}{{18}{9}} -\@writefile{lof}{\contentsline {figure}{\numberline {19}{\ignorespaces CDF of Response Time for Create I/O}}{9}\protected@file@percent } -\newlabel{fig:CDF-RT-Create}{{19}{9}} -\@writefile{lof}{\contentsline {figure}{\numberline {20}{\ignorespaces PDF of Response Time for Create I/O}}{9}\protected@file@percent } -\newlabel{fig:PDF-RT-Create}{{20}{9}} -\@writefile{lot}{\contentsline {table}{\numberline {VI}{\ignorespaces Comparison of $\mu $, $\sigma $, $k$, and $\lambda $ Values for Curve Fitting Equations on CDF Graphs}}{9}\protected@file@percent } -\newlabel{tbl:curveFitting}{{VI}{9}} -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-C}}Distribution Models}{9}\protected@file@percent } -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-D}}System Limitations and Challenges}{9}\protected@file@percent } -\newlabel{System Limitations and Challenges}{{\unhbox \voidb@x \hbox {IV-D}}{9}} +\@writefile{lof}{\contentsline {figure}{\numberline {21}{\ignorespaces CDF of Response Time for Create I/O}}{10}\protected@file@percent } +\newlabel{fig:CDF-RT-Create}{{21}{10}} +\@writefile{lof}{\contentsline {figure}{\numberline {22}{\ignorespaces PDF of Response Time for Create I/O}}{10}\protected@file@percent } +\newlabel{fig:PDF-RT-Create}{{22}{10}} +\@writefile{lot}{\contentsline {table}{\numberline {VIII}{\ignorespaces Comparison of $\mu $, $\sigma $, $k$, and $\lambda $ Values for Curve Fitting Equations on CDF Graphs}}{10}\protected@file@percent } +\newlabel{tbl:curveFitting}{{VIII}{10}} +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {IV-E}}System Limitations and Challenges}{10}\protected@file@percent } +\newlabel{System Limitations and Challenges}{{\unhbox \voidb@x \hbox {IV-E}}{10}} \bibstyle{IEEEtran} \bibdata{sigproc} \bibcite{leung2008measurement}{1} \bibcite{PFRINGMan}{2} \bibcite{ousterhout1985trace}{3} \bibcite{ramakrishnan1992analysis}{4} -\@writefile{toc}{\contentsline {section}{\numberline {V}Conclusions and Future Work}{10}\protected@file@percent } -\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {V-A}}Future Work}{10}\protected@file@percent } -\@writefile{toc}{\contentsline {section}{References}{10}\protected@file@percent } \bibcite{baker1991measurements}{5} \bibcite{gribble1996self}{6} \bibcite{douceur1999large}{7} @@ -161,6 +167,9 @@ \bibcite{traeger2008nine}{21} \bibcite{ruemmler1992unix}{22} \bibcite{ntopWebsite}{23} +\@writefile{toc}{\contentsline {section}{\numberline {V}Conclusions and Future Work}{11}\protected@file@percent } +\@writefile{toc}{\contentsline {subsection}{\numberline {\unhbox \voidb@x \hbox {V-A}}Future Work}{11}\protected@file@percent } +\@writefile{toc}{\contentsline {section}{References}{11}\protected@file@percent } \bibcite{pfringWebsite}{24} \bibcite{dataseriesGit}{25} \bibcite{pandasPythonWebsite}{26} diff --git a/trackingPaper.log b/trackingPaper.log index ab28821..245ae61 100644 --- a/trackingPaper.log +++ b/trackingPaper.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.14159265-2.6-1.40.19 (TeX Live 2018/Arch Linux) (preloaded format=pdflatex 2019.4.3) 16 APR 2019 17:54 +This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Arch Linux) (preloaded format=pdflatex 2019.8.27) 21 DEC 2019 10:10 entering extended mode restricted \write18 enabled. %&-line parsing enabled. @@ -131,15 +131,15 @@ Package: keyval 2014/10/28 v1.15 key=value parser (DPC) \lst@maxwidth=\dimen129 (/usr/share/texmf-dist/tex/latex/listings/lstmisc.sty -File: lstmisc.sty 2018/09/02 1.7 (Carsten Heinz) +File: lstmisc.sty 2019/02/27 1.8b (Carsten Heinz) \c@lstnumber=\count101 \lst@skipnumbers=\count102 \lst@framebox=\box30 ) (/usr/share/texmf-dist/tex/latex/listings/listings.cfg -File: listings.cfg 2018/09/02 1.7 listings configuration +File: listings.cfg 2019/02/27 1.8b listings configuration )) -Package: listings 2018/09/02 1.7 (Carsten Heinz) +Package: listings 2019/02/27 1.8b (Carsten Heinz) (/usr/share/texmf-dist/tex/latex/graphics/color.sty Package: color 2016/07/10 v1.1e Standard LaTeX Color (DPC) @@ -294,12 +294,7 @@ Package: etexcmds 2016/05/16 v1.6 Avoid name clashes with e-TeX commands (HO) (/usr/share/texmf-dist/tex/generic/oberdiek/ifluatex.sty Package: ifluatex 2016/05/16 v1.4 Provides the ifluatex switch (HO) Package ifluatex Info: LuaTeX not detected. -) -Package etexcmds Info: Could not find \expanded. -(etexcmds) That can mean that you are not using pdfTeX 1.50 or -(etexcmds) that some package has redefined \expanded. -(etexcmds) In the latter case, load this package earlier. -))) +)))) (/usr/share/texmf-dist/tex/generic/oberdiek/pdftexcmds.sty Package: pdftexcmds 2018/09/10 v0.29 Utility functions of pdfTeX for LuaTeX (HO ) @@ -394,270 +389,271 @@ LaTeX Warning: No positions in optional float specifier. Default added (so using `tbp') on input line 327. [4] -Overfull \hbox (34.75874pt too wide) in paragraph at lines 376--414 +Overfull \hbox (22.75874pt too wide) in paragraph at lines 375--413 [][] [] -LaTeX Warning: Reference `fig:IO-All' on page 5 undefined on input line 420. - - -LaTeX Warning: Reference `fig:IO-R+W' on page 5 undefined on input line 420. - - -LaTeX Warning: Reference `fig:IO-All' on page 5 undefined on input line 420. +LaTeX Warning: No positions in optional float specifier. + Default added (so using `tbp') on input line 417. +LaTeX Font Info: Try loading font information for OML+ptm on input line 427. -LaTeX Warning: Reference `fig:IO-R+W' on page 5 undefined on input line 420. +(/usr/share/texmf-dist/tex/latex/psnfss/omlptm.fd +File: omlptm.fd +) +LaTeX Font Info: Font shape `OML/ptm/m/n' in size <8> not available +(Font) Font shape `OML/cmm/m/it' tried instead on input line 427. -LaTeX Warning: Reference `fig:IO-All' on page 5 undefined on input line 420. +LaTeX Warning: No positions in optional float specifier. + Default added (so using `tbp') on input line 437. +<./images/smb_read_bytes_pdf.pdf, id=26, 361.35pt x 216.81pt> +File: ./images/smb_read_bytes_pdf.pdf Graphic file (type pdf) + +Package pdftex.def Info: ./images/smb_read_bytes_pdf.pdf used on input line 48 +7. +(pdftex.def) Requested size: 258.0pt x 154.79962pt. -LaTeX Warning: Reference `fig:Agg-AvgBytes' on page 5 undefined on input line 4 -33. +Overfull \hbox (6.0pt too wide) in paragraph at lines 487--488 +[][] + [] -<./images/smb_read_bytes_cdf.pdf, id=26, 361.35pt x 216.81pt> +<./images/smb_read_bytes_cdf.pdf, id=27, 361.35pt x 216.81pt> File: ./images/smb_read_bytes_cdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_read_bytes_cdf.pdf used on input line 44 -9. +Package pdftex.def Info: ./images/smb_read_bytes_cdf.pdf used on input line 49 +3. (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 449--450 +Overfull \hbox (6.0pt too wide) in paragraph at lines 493--494 [][] [] -[5 <./images/packetcapturetopology.png>] -<./images/smb_write_bytes_cdf.pdf, id=31, 361.35pt x 216.81pt> -File: ./images/smb_write_bytes_cdf.pdf Graphic file (type pdf) - -Package pdftex.def Info: ./images/smb_write_bytes_cdf.pdf used on input line 4 -55. +<./images/smb_write_bytes_pdf.pdf, id=28, 361.35pt x 216.81pt> +File: ./images/smb_write_bytes_pdf.pdf Graphic file (type pdf) + +Package pdftex.def Info: ./images/smb_write_bytes_pdf.pdf used on input line 4 +99. (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 455--456 +Overfull \hbox (6.0pt too wide) in paragraph at lines 499--500 [][] [] +<./images/smb_write_bytes_cdf.pdf, id=29, 361.35pt x 216.81pt> +File: ./images/smb_write_bytes_cdf.pdf Graphic file (type pdf) + +Package pdftex.def Info: ./images/smb_write_bytes_cdf.pdf used on input line 5 +05. +(pdftex.def) Requested size: 258.0pt x 154.79962pt. -LaTeX Warning: Reference `fig:bytesCompare' on page 6 undefined on input line 4 -67. - - -LaTeX Warning: Reference `fig:Agg-AvgBytes' on page 6 undefined on input line 4 -67. - +Overfull \hbox (6.0pt too wide) in paragraph at lines 505--506 +[][] + [] -LaTeX Warning: Reference `fig:bytesCompare' on page 6 undefined on input line 4 -67. +[5 <./images/packetcapturetopology.png>] [6 <./images/smb_read_bytes_pdf.pdf> < +./images/smb_read_bytes_cdf.pdf +pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_read_bytes_cdf.pdf): PDF i +nclusion: multiple pdfs with page group included in a single page +> <./images/smb_write_bytes_pdf.pdf -LaTeX Warning: Reference `fig:CDF-Bytes-RW' on page 6 undefined on input line 4 -67. +pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_write_bytes_pdf.pdf): PDF +inclusion: multiple pdfs with page group included in a single page +> <./images/smb_write_bytes_cdf.pdf +pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_write_bytes_cdf.pdf): PDF +inclusion: multiple pdfs with page group included in a single page +>] LaTeX Warning: No positions in optional float specifier. - Default added (so using `tbp') on input line 521. + Default added (so using `tbp') on input line 572. -Overfull \hbox (36.28783pt too wide) in paragraph at lines 523--533 +Overfull \hbox (31.17738pt too wide) in paragraph at lines 574--584 [][] [] -[6 <./images/smb_read_bytes_cdf.pdf> <./images/smb_write_bytes_cdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_write_bytes_cdf.pdf): PDF -inclusion: multiple pdfs with page group included in a single page ->] - -LaTeX Warning: Reference `fig:CDF-RT-RW' on page 7 undefined on input line 629. - - -<./images/smb_general_iats_cdf.pdf, id=52, 361.35pt x 216.81pt> +<./images/smb_general_iats_cdf.pdf, id=68, 361.35pt x 216.81pt> File: ./images/smb_general_iats_cdf.pdf Graphic file (type pdf) Package pdftex.def Info: ./images/smb_general_iats_cdf.pdf used on input line -636. +696. (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 636--637 +Overfull \hbox (6.0pt too wide) in paragraph at lines 696--697 [][] [] -<./images/smb_general_iats_pdf.pdf, id=53, 361.35pt x 216.81pt> +<./images/smb_general_iats_pdf.pdf, id=69, 361.35pt x 216.81pt> File: ./images/smb_general_iats_pdf.pdf Graphic file (type pdf) Package pdftex.def Info: ./images/smb_general_iats_pdf.pdf used on input line -642. +702. (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 642--643 +Overfull \hbox (6.0pt too wide) in paragraph at lines 702--703 [][] [] -<./images/smb_read_iats_cdf.pdf, id=54, 361.35pt x 216.81pt> +<./images/smb_read_iats_cdf.pdf, id=70, 361.35pt x 216.81pt> File: ./images/smb_read_iats_cdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_read_iats_cdf.pdf used on input line 648 +Package pdftex.def Info: ./images/smb_read_iats_cdf.pdf used on input line 708 . (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 648--649 +Overfull \hbox (6.0pt too wide) in paragraph at lines 708--709 [][] [] -<./images/smb_read_iats_pdf.pdf, id=55, 361.35pt x 216.81pt> +<./images/smb_read_iats_pdf.pdf, id=71, 361.35pt x 216.81pt> File: ./images/smb_read_iats_pdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_read_iats_pdf.pdf used on input line 654 +Package pdftex.def Info: ./images/smb_read_iats_pdf.pdf used on input line 714 . (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 654--655 +Overfull \hbox (6.0pt too wide) in paragraph at lines 714--715 [][] [] -<./images/smb_write_iats_cdf.pdf, id=56, 361.35pt x 216.81pt> +<./images/smb_write_iats_cdf.pdf, id=72, 361.35pt x 216.81pt> File: ./images/smb_write_iats_cdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_write_iats_cdf.pdf used on input line 66 +Package pdftex.def Info: ./images/smb_write_iats_cdf.pdf used on input line 72 0. (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 660--661 +Overfull \hbox (6.0pt too wide) in paragraph at lines 720--721 [][] [] -<./images/smb_write_iats_pdf.pdf, id=57, 361.35pt x 216.81pt> +<./images/smb_write_iats_pdf.pdf, id=73, 361.35pt x 216.81pt> File: ./images/smb_write_iats_pdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_write_iats_pdf.pdf used on input line 66 +Package pdftex.def Info: ./images/smb_write_iats_pdf.pdf used on input line 72 6. (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 666--667 +Overfull \hbox (6.0pt too wide) in paragraph at lines 726--727 [][] [] -<./images/smb_create_iats_cdf.pdf, id=58, 361.35pt x 216.81pt> +<./images/smb_create_iats_cdf.pdf, id=74, 361.35pt x 216.81pt> File: ./images/smb_create_iats_cdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_create_iats_cdf.pdf used on input line 6 -72. +Package pdftex.def Info: ./images/smb_create_iats_cdf.pdf used on input line 7 +32. (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 672--673 +Overfull \hbox (6.0pt too wide) in paragraph at lines 732--733 [][] [] -<./images/smb_create_iats_pdf.pdf, id=59, 361.35pt x 216.81pt> +<./images/smb_create_iats_pdf.pdf, id=75, 361.35pt x 216.81pt> File: ./images/smb_create_iats_pdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_create_iats_pdf.pdf used on input line 6 -78. +Package pdftex.def Info: ./images/smb_create_iats_pdf.pdf used on input line 7 +38. (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 678--679 +Overfull \hbox (6.0pt too wide) in paragraph at lines 738--739 [][] [] -<./images/smb_general_rts_cdf.pdf, id=60, 361.35pt x 216.81pt> +<./images/smb_general_rts_cdf.pdf, id=76, 361.35pt x 216.81pt> File: ./images/smb_general_rts_cdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_general_rts_cdf.pdf used on input line 6 -86. +Package pdftex.def Info: ./images/smb_general_rts_cdf.pdf used on input line 7 +46. (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 686--687 +Overfull \hbox (6.0pt too wide) in paragraph at lines 746--747 [][] [] -<./images/smb_general_rts_pdf.pdf, id=61, 361.35pt x 216.81pt> +<./images/smb_general_rts_pdf.pdf, id=77, 361.35pt x 216.81pt> File: ./images/smb_general_rts_pdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_general_rts_pdf.pdf used on input line 6 -93. +Package pdftex.def Info: ./images/smb_general_rts_pdf.pdf used on input line 7 +53. (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 693--694 +Overfull \hbox (6.0pt too wide) in paragraph at lines 753--754 [][] [] -<./images/smb_read_rts_cdf.pdf, id=62, 361.35pt x 216.81pt> +<./images/smb_read_rts_cdf.pdf, id=78, 361.35pt x 216.81pt> File: ./images/smb_read_rts_cdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_read_rts_cdf.pdf used on input line 700. +Package pdftex.def Info: ./images/smb_read_rts_cdf.pdf used on input line 760. (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 700--701 +Overfull \hbox (6.0pt too wide) in paragraph at lines 760--761 [][] [] -<./images/smb_read_rts_pdf.pdf, id=63, 361.35pt x 216.81pt> +<./images/smb_read_rts_pdf.pdf, id=79, 361.35pt x 216.81pt> File: ./images/smb_read_rts_pdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_read_rts_pdf.pdf used on input line 707. +Package pdftex.def Info: ./images/smb_read_rts_pdf.pdf used on input line 767. (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 707--708 +Overfull \hbox (6.0pt too wide) in paragraph at lines 767--768 [][] [] -<./images/smb_write_rts_cdf.pdf, id=64, 361.35pt x 216.81pt> +<./images/smb_write_rts_cdf.pdf, id=80, 361.35pt x 216.81pt> File: ./images/smb_write_rts_cdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_write_rts_cdf.pdf used on input line 714 +Package pdftex.def Info: ./images/smb_write_rts_cdf.pdf used on input line 774 . (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 714--715 +Overfull \hbox (6.0pt too wide) in paragraph at lines 774--775 [][] [] -<./images/smb_write_rts_pdf.pdf, id=65, 361.35pt x 216.81pt> +<./images/smb_write_rts_pdf.pdf, id=81, 361.35pt x 216.81pt> File: ./images/smb_write_rts_pdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_write_rts_pdf.pdf used on input line 721 +Package pdftex.def Info: ./images/smb_write_rts_pdf.pdf used on input line 781 . (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 721--722 +Overfull \hbox (6.0pt too wide) in paragraph at lines 781--782 [][] [] -<./images/smb_create_rts_cdf.pdf, id=66, 361.35pt x 216.81pt> +<./images/smb_create_rts_cdf.pdf, id=82, 361.35pt x 216.81pt> File: ./images/smb_create_rts_cdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_create_rts_cdf.pdf used on input line 72 +Package pdftex.def Info: ./images/smb_create_rts_cdf.pdf used on input line 78 8. (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 728--729 +Overfull \hbox (6.0pt too wide) in paragraph at lines 788--789 [][] [] -<./images/smb_create_rts_pdf.pdf, id=67, 361.35pt x 216.81pt> +<./images/smb_create_rts_pdf.pdf, id=83, 361.35pt x 216.81pt> File: ./images/smb_create_rts_pdf.pdf Graphic file (type pdf) -Package pdftex.def Info: ./images/smb_create_rts_pdf.pdf used on input line 73 +Package pdftex.def Info: ./images/smb_create_rts_pdf.pdf used on input line 79 5. (pdftex.def) Requested size: 258.0pt x 154.79962pt. -Overfull \hbox (6.0pt too wide) in paragraph at lines 735--736 +Overfull \hbox (6.0pt too wide) in paragraph at lines 795--796 [][] [] - -Underfull \vbox (badness 6510) has occurred while \output is active [] - - [7 <./images/smb_general_iats_cdf.pdf> <./images/smb_general_iats_pdf.pdf - -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_general_iats_pdf.pdf): PDF - inclusion: multiple pdfs with page group included in a single page -> <./images/smb_read_iats_cdf.pdf +[7 <./images/smb_general_iats_cdf.pdf>] [8 <./images/smb_general_iats_pdf.pdf> +<./images/smb_read_iats_cdf.pdf pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_read_iats_cdf.pdf): PDF in clusion: multiple pdfs with page group included in a single page @@ -669,15 +665,15 @@ clusion: multiple pdfs with page group included in a single page pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_write_iats_cdf.pdf): PDF i nclusion: multiple pdfs with page group included in a single page ->] [8 <./images/smb_write_iats_pdf.pdf> <./images/smb_create_iats_cdf.pdf +> <./images/smb_write_iats_pdf.pdf -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_create_iats_cdf.pdf): PDF -inclusion: multiple pdfs with page group included in a single page -> <./images/smb_create_iats_pdf.pdf +pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_write_iats_pdf.pdf): PDF i +nclusion: multiple pdfs with page group included in a single page +> <./images/smb_create_iats_cdf.pdf -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_create_iats_pdf.pdf): PDF +pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_create_iats_cdf.pdf): PDF inclusion: multiple pdfs with page group included in a single page -> <./images/smb_general_rts_cdf.pdf +>] [9 <./images/smb_create_iats_pdf.pdf> <./images/smb_general_rts_cdf.pdf pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_general_rts_cdf.pdf): PDF inclusion: multiple pdfs with page group included in a single page @@ -693,15 +689,15 @@ lusion: multiple pdfs with page group included in a single page pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_read_rts_pdf.pdf): PDF inc lusion: multiple pdfs with page group included in a single page ->] [9 <./images/smb_write_rts_cdf.pdf> <./images/smb_write_rts_pdf.pdf +> <./images/smb_write_rts_cdf.pdf -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_write_rts_pdf.pdf): PDF in +pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_write_rts_cdf.pdf): PDF in clusion: multiple pdfs with page group included in a single page -> <./images/smb_create_rts_cdf.pdf +> <./images/smb_write_rts_pdf.pdf -pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_create_rts_cdf.pdf): PDF i -nclusion: multiple pdfs with page group included in a single page -> <./images/smb_create_rts_pdf.pdf +pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_write_rts_pdf.pdf): PDF in +clusion: multiple pdfs with page group included in a single page +>] [10 <./images/smb_create_rts_cdf.pdf> <./images/smb_create_rts_pdf.pdf pdfTeX warning: /usr/bin/pdflatex (file ./images/smb_create_rts_pdf.pdf): PDF i nclusion: multiple pdfs with page group included in a single page @@ -710,7 +706,7 @@ nclusion: multiple pdfs with page group included in a single page Package balance Warning: You have called \balance in second column (balance) Columns might not be balanced. -[10]) +[11]) ** Conference Paper ** Before submitting the final camera ready copy, remember to: @@ -722,38 +718,35 @@ Before submitting the final camera ready copy, remember to: uses only Type 1 fonts and that every step in the generation process uses the appropriate paper size. -[11] (./trackingPaper.aux) - -LaTeX Warning: There were undefined references. - - ) +[12] (./trackingPaper.aux) ) Here is how much of TeX's memory you used: - 4248 strings out of 492616 - 67045 string characters out of 6135178 - 149869 words of memory out of 5000000 - 8086 multiletter control sequences out of 15000+600000 + 4270 strings out of 492623 + 67663 string characters out of 6135669 + 149935 words of memory out of 5000000 + 8104 multiletter control sequences out of 15000+600000 38340 words of font info for 74 fonts, out of 8000000 for 9000 1141 hyphenation exceptions out of 8191 41i,11n,32p,1341b,307s stack positions out of 5000i,500n,10000p,200000b,80000s -{/usr/share/texmf-dist/fonts/enc/dvips/base/8r.enc} -Output written on trackingPaper.pdf (11 pages, 1030548 bytes). +{/usr/share/texmf-dist/fonts/enc/dvips/base/8r.enc} + +Output written on trackingPaper.pdf (12 pages, 1044919 bytes). PDF statistics: - 266 PDF objects out of 1000 (max. 8388607) - 176 compressed objects within 2 object streams + 286 PDF objects out of 1000 (max. 8388607) + 189 compressed objects within 2 object streams 0 named destinations out of 1000 (max. 500000) - 101 words of extra memory for PDF output out of 10000 (max. 10000000) + 111 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/trackingPaper.pdf b/trackingPaper.pdf index 1fddea8..7947575 100644 Binary files a/trackingPaper.pdf and b/trackingPaper.pdf differ diff --git a/trackingPaper.synctex.gz b/trackingPaper.synctex.gz index f82e7ac..55ddd4c 100644 Binary files a/trackingPaper.synctex.gz and b/trackingPaper.synctex.gz differ diff --git a/trackingPaper.tex b/trackingPaper.tex index 5e7d430..fb3b529 100644 --- a/trackingPaper.tex +++ b/trackingPaper.tex @@ -131,7 +131,7 @@ While file system traces have been well-studied in earlier work, it has been som The purpose of this work is to continue previous SMB studies to better understand the use of the protocol in a real-world production system in use at the University of Connecticut. The main contribution of our work is the exploration of I/O behavior in modern file system workloads as well as new examinations of the inter-arrival times and run times for I/O events. We further investigate if the recent standard models for traffic remain accurate. -\textcolor{red}{Our findings reveal interesting data relating to the number of read and write events. We notice that while the number of read events far exceeds writes, the average of bytes transferred over the wire is greater for writes. Furthermore we find an increase in the use of metadata for overall network communication that can be taken advantage of through the use of smart storage devices.} +Our findings reveal interesting data relating to the number of read and write events. We notice that the number of read events far exceeds writes and that the average of bytes transferred over the wire is greater for reads as well. Furthermore we find an increase in the use of metadata for overall network communication that can be taken advantage of through the use of smart storage devices. \end{abstract} \section{Introduction} @@ -155,12 +155,12 @@ In spite of the prevalence of SMB usage within most enterprise networks, there h been very little analysis of SMB workloads in prior academic research. The last major study of SMB was nearly a decade ago~\cite{leung2008measurement}, and the nature of storage usage has changed dramatically over the last decade. -It is always important to revisit commonly used protocols to examine their use in comparison to the expected use case(s). This is doubly so for network communications because the nuances of networked data exchange can greatly influence the effectiveness and efficiency of a chosen protocol. \textcolor{red}{Since an examination of SMB has not occurred in the past decade, we took a look at its current implementation and use in a large university network.} +It is always important to revisit commonly used protocols to examine their use in comparison to the expected use case(s). This is doubly so for network communications because the nuances of networked data exchange can greatly influence the effectiveness and efficiency of a chosen protocol. Since an examination of SMB has not occurred in the past decade, we took a look at its current implementation and use in a large university network. %Due to the sensitivity of the captured information, we ensure that all sensitive information is hashed and that the original network captures are not saved. -Our study is based on network packet traces collected on the University of Connecticut's centralized storage facility over \textcolor{red}{a period of a week of in March 2017.} This trace-driven analysis can help in the design of future storage products as well as providing data for future performance benchmarks. +Our study is based on network packet traces collected on the University of Connecticut's centralized storage facility over a period of three weeks in May 2019. This trace-driven analysis can help in the design of future storage products as well as providing data for future performance benchmarks. %Benchmarks are important for the purpose of developing technologies as well as taking accurate metrics. The reasoning behind this tracing capture work is to eventually better develop accurate benchmarks for network protocol evaluation. -Benchmarks allow for the stress testing of various aspects of a system (e.g. network, single system). Aggregate data analysis collected from traces can lead to the development of synthetic benchmarks. Traces can expose systems patterns that can also be reflected in synthetic benchmark. Finally, the traces themselves can drive system simulations that can be used to evaluate prospective storage architectures. +Benchmarks allow for the stress testing of various aspects of a system (e.g. network, single system). Aggregate data analysis collected from traces can lead to the development of synthetic benchmarks. Traces can expose systems patterns that can also be reflected in synthetic benchmarks. Finally, the traces themselves can drive system simulations that can be used to evaluate prospective storage architectures. %\begin{itemize} % \item \textbf{Why?:} Benchmarks allow for the stress testing of different/all aspects of a system (e.g. network, single system). @@ -187,7 +187,7 @@ PF\_RING acts as a kernel module that aids in minimizing packet loss/timestampin The tweaks and code additions to the existing DataSeries work are filtering for specific SMB protocol fields along with the writing of analysis tools to parse and dissect the captured packets. Specific fields were chosen to be the interesting fields kept for analysis. It should be noted that this was done originally arbitrarily and changes/additions have been made as the value of certain fields were determined to be worth examining; e.g. multiple runs were required to refine the captured data for later analysis. The code written for analysis of the captured DataSeries format packets focuses on I/O events and ID tracking (TID/UID). The future vision for this information is to combine ID tracking with the OpLock information in order to track resource sharing of the different clients on the network. As well as using IP information to recreate communication in a larger network trace to establish a better benchmark. %Focus should be aboiut analysis and new traces -The contributions of this work are the new traces of SMB traffic over a larger university network as well as new analysis of this traffic. Our new examination of the captured data reveals that \textcolor{red}{despite the streamlining of the CIFS/SMB protocol to be less chatty, the majority of SMB communication in metadata based I/O. We found that while read operations occur in greater numbers and cause a larger overall number of bytes to pass over the network. However, the average number of bytes transferred for each write I/O is an order of magnitude greater than that of the average read operation. We also find that the current standard for modeling network I/O holds for the majority of operations, while a more representative model needs to be developed for writes.} +The contributions of this work are the new traces of SMB traffic over a larger university network as well as new analysis of this traffic. Our new examination of the captured data reveals that despite the streamlining of the CIFS/SMB protocol to be less chatty, the majority of SMB communication in metadata based I/O. We found that read operations occur in greater numbers and cause a larger overall number of bytes to pass over the network. However, the average number of bytes transferred for each write I/O is greater than that of the average read operation. We also find that the current standard for modeling network I/O holds for the majority of operations, while a more representative model needs to be developed for reads. \subsection{Related Work} In this section we discuss previous studies examining traces and testing that has advanced benchmark development. We summarize major works in trace study in Table~\ref{tbl:studySummary}. In addition we examine issues that occur with traces and the assumptions in their study. @@ -235,7 +235,7 @@ Orosz and Skopko examined the effect of the kernel on packet loss in their 2013 Narayan and Chandy examined the concerns of distributed I/O and the different models of parallel application I/O. %There are five major models of parallel application I/O. (1) Single output file shared by multiple nodes. (2) Large sequential reads by a single node at the beginning of computation and large sequential writes by a single node at the end of computation. (3) Checkpointing of states. (4) Metadata and read intensive (e.g. small data I/O and frequent directory lookups for reads). -Due to the striping of files across multiple nodes, this can cause any read or write to access all the nodes; which does not decrease the IATs seen. As the number of I/O operations increase and the number of nodes increase, the IAT times decreased. +Due to the striping of files across multiple nodes, this can cause any read or write to access all the nodes; which does not decrease the inter-arrival times (IATs) seen. As the number of I/O operations increase and the number of nodes increase, the IAT times decreased. Observations from Skopko in a 2012 paper~\cite{skopko2012loss} examined the nuance concerns of software based capture solutions. The main observation was software solutions relied heavily on OS packet processing mechanisms. Further more, depending on the mode of operation (e.g. interrupt or polling), the timestamping of packets would change. As seen in previous trace work done~\cite{leung2008measurement,roselli2000comparison,seltzer2003nfs}, the general perceptions of how computer systems are being used versus their initial purpose have allowed for great strides in eliminating actual bottlenecks rather than spending unnecessary time working on imagined bottlenecks. Without illumination of these underlying actions (e.g. read-write ratios, file death rates, file access rates) these issues can not be readily tackled. @@ -306,11 +306,11 @@ The filesize used was in a ring buffer where each file captured was 64000 kB. %To simplify this aspect of the capturing process, the entirety of the capturing, dissection, and permanent storage was all automated through watch-dog scripts. \begin{figure*} \includegraphics[width=\textwidth]{./images/packetcapturetopology.png} - \caption{Visualization of Packet Capturing System} + \caption{\textcolor{red}{Visualization of Packet Capturing System}} \label{fig:captureTopology} \end{figure*} The system for taking captured \texttt{.pcap} files and writing them into the DataSeries format (i.e. \texttt{.ds}) does so by first creating a structure (based on a pre-written determination of the data desired to capture). Once the code builds this structure, it then reads through the capture traffic packets while dissecting and filling in the prepared structure with the desired information and format. -Due to the fundamental nature of this work, there is no need to track every piece of information that is exchanged, only that information which illuminates the behavior of the clients \& servers that function over the network (e.g. I/O transactions). It should also be noted that all sensitive information being captured by the tracing system is hashed to protect the users whose information is examined by the tracing system. Further more, we now only receive the SMB header information since that contains the I/O information we seek, while the body of the SMB traffic is not passed through to better ensure security of the university's network communications. +Due to the fundamental nature of this work, there is no need to track every piece of information that is exchanged, only that information which illuminates the behavior of the clients \& servers that function over the network (e.g. I/O transactions). It should also be noted that all sensitive information being captured by the tracing system is hashed to protect the users whose information is examined by the tracing system. Further more, we now only receive the SMB header information since that contains the I/O information we seek, while the body of the SMB traffic is not passed through to better ensure security of the university's network communications. It is worth noting that in the case of larger SMB headers, some information is lost, but this is a trade-off by the university to provide, on average, the correct sized SMB header but does lead to scenarios where some information may be captured incompletely. \subsection{DataSeries Analysis} @@ -333,18 +333,18 @@ Sessions are any communication where a valid UID and TID is used. %Total Tuples Seen & 2721 \\ \hline %\textcolor{red}{Maximum Sessions in 15-min Window} & 35 \\ %\hline %Maximum Non-Session in 15-min Window & 2 \\ \hline -Total Days & 41 \\ %\hline -Total Sessions & 2270 \\ %\hline -%Total Non-Sessions & 451 \\ \hline -Number of SMB Operations & 2596081187 \\ %\hline -Number of Read I/Os & 42948043 +Total Days & 21 \\ %\hline +Total Sessions & 2413589 \\ %\hline +%Total Non-Sessions & 279006484 \\ \hline +Number of SMB Operations & 281419686 \\ %\hline +Number of Read I/Os & 8355557 \\ %\hline -Number of Write I/Os & 75465684 \\ %\hline -R:W I/O Ratio & 0.569 \\ %\hline -Number of Creates & 386934029 \\ %\hline -Number of General SMB Operations & 2090733431 \\ \hline -Total Data Read (GB) & 5.858 \\ %\hline -Total Data Written (GB) & 4.747 \\ %\hline +Number of Write I/Os & 7872219 \\ %\hline +R:W I/O Ratio & 1.06 \\ %\hline +Number of Creates & 54486043 \\ %\hline +Number of General SMB Operations & 210705867 \\ \hline +Total Data Read (GB) & 0.97 \\ %\hline +Total Data Written (GB) & 0.6 \\ %\hline Average Read Size (B) & 144 \\ %\hline Average Write Size (B) & 63 \\ \hline %Percentage of Read Bytes of Total Data & 99.4\% \\ %\hline @@ -353,39 +353,38 @@ Average Write Size (B) & 63 \\ \hline %Average R:W Byte Ratio & 0.253996031053668 \\ \hline \end{tabular} \label{tbl:TraceSummaryTotal} -\caption{Summary of Trace I/O Statistics for the time of February 3rd, 2019 to March 15th, 2019} +\caption{Summary of Trace I/O Statistics for the time of April 30th, 2019 to May 20th, 2019} \vspace{-2em} \end{table} % NOTE: Not sure but this reference keeps referencing the WRONG table Table~\ref{tbl:TraceSummaryTotal} -shows a summary of the I/O operations, response times, and inter arrival times observed for the network filesystem. This table illustrates that the majority of I/O operations are \textcolor{red}{general; showing that $95.39$\% of the network file system I/O are metadata operations.} +shows a summary of the I/O operations, response times, and inter arrival times observed for the network filesystem. This table illustrates that the majority of I/O operations are general; showing that $74.87$\% of the network file system I/O are metadata operations. -Our examination of the collected network filesystem data revealed interesting patterns for the current use of CIFS/SMB in a large engineering academic setting. \textcolor{red}{The first is that there is a major shift away from read and write operations towards more metadata-based ones. This matches the last CIFS observations made by Leung et.~al.~that files were being generated and accessed infrequently. The change in operations are due to a movement of use activity from reading and writing data to simply checking file and directory metadata. -However, since the earlier study, SMB has transitioned to the SMB2 protocol which was supposed to be less "chatty" and thus we would expect fewer general SMB operations. -Table~\ref{tbl:SMBCommands} shows a breakdown of SMB and SMB2 usage on just March 15th. From this table, one can see that despite the fact that the SMB2 protocol makes up $99.96$\% of total network operations compared to just 0.04\% for SMB, indicating that most clients have upgraded to SMB2. However, $92.79$\% of SMB2 I/O are still general operations. Contrary to purpose of implementing the SMB2 protocol, there is still a large amount of general I/O. } +Our examination of the collected network filesystem data revealed interesting patterns for the current use of CIFS/SMB in a large engineering academic setting. The first is that there is a major shift away from read and write operations towards more metadata-based ones. This matches the last CIFS observations made by Leung et.~al.~that files were being generated and accessed infrequently. The change in operations are due to a movement of use activity from reading and writing data to simply checking file and directory metadata. However, since the earlier study, SMB has transitioned to the SMB2 protocol which was supposed to be less "chatty" and thus we would expect fewer general SMB operations. Table~\ref{tbl:SMBCommands} shows a breakdown of SMB and SMB2 usage over the time period of May. From this table, one can see that despite the fact that the SMB2 protocol makes up $99.14$\% of total network operations compared to just $0.86$\% for SMB, indicating that most clients have upgraded to SMB2. However, $74.66$\% of SMB2 I/O are still general operations. Contrary to purpose of implementing the SMB2 protocol, there is still a large amount of general I/O. %While CIFS/SMB protocol has less metadata operations, this is due to a depreciation of the SMB protocol commands, therefore we would expect to see less total operations (e.g. $0.04$\% of total operations). %The infrequency of file activity is further strengthened by our finding that within a week long window of time there are no Read or Write inter arrival times that can be calculated. %\textcolor{red}{XXX we are going to get questioned on this. its not likely that there are no IATs for reads and writes} -\textcolor{red}{General operations happen at very high frequency with inter arrival times that were found to be relatively short (36$\mu$s on average). } +General operations happen at very high frequency with inter arrival times that were found to be relatively short (1317$\mu$s on average). -\textcolor{red}{Taking a deeper look at the SMB2 operations, shown in the bottom half of Table~\ref{tbl:SMBCommands}, we see that $85.41$\% of the general operations are negotiate commands. These are commands sent by the client to notify the server which dialects of the SMB2 protocol the client can understand. The three next most common commands are close, query info, and query directory. The latter two relate to metadata information of shares and files accessed, however the close operation relates to the create operations relayed over the network. Note that the create command is also used as an open file. The first thing one will notice is that the number of closes is greater than the total number of create operations; by $16.44$\%. These extra close operations are most likely due to applications doing multiple closes that do not need to be done.} +Taking a deeper look at the SMB2 operations, shown in the bottom half of Table~\ref{tbl:SMBCommands}, we see that $9.06$\% of the general operations are negotiate commands. These are commands sent by the client to notify the server which dialects of the SMB2 protocol the client can understand. The three most common commands are close, tree connect, and query info. +The latter two relate to metadata information of shares and files accessed, however the close operation relates to the create operations relayed over the network. Note that the create command is also used as an open file. The first thing one will notice is that the number of closes is greater than the total number of create operations; by $9.35$\%. These extra close operations are most likely due to applications doing multiple closes that do not need to be done. \begin{table} \centering \begin{tabular}{|l|c|c|c|} \hline I/O Operation & SMB & SMB2 & Both \\ \hline -Read Operations & 23416 & 42924627 & 42948043 \\ -Read \% & 0.016\% & 1.75\%& 1.65\%\\ -Write Operations & 4047 & 75461637 & 75465684 \\ -Write \% & 0.028\% & 3.01\% & 2.91\% \\ -Create Operations & 0 & 386934029 & 386934029 \\ -Create \% & 0.00\% & 15.78\% & 14.9\% \\ -General Operations & 144177220 & 1946556211 & 2090733431 \\ -General \% & 99.98\% & 79.39\% & 80.53\% \\ \hline -Combine Protocol Operations & 144204683 & 2451876504 & 2596081187 \\ -Combined Protocols \% & 5.55\% & 94.45\% & 100\% \\ \hline +Read Operations & 1931 & 8353626 & 8355557 \\ +Read \% & 0.08\% & 2.99\%& 2.97\%\\ +Write Operations & 303 & 7871916 & 7872219 \\ +Write \% & 0.01\% & 2.82\% & 2.80\% \\ +Create Operations & 0 & 54486043 & 54486043 \\ +Create \% & 0.00\% & 19.53\% & 19.36\% \\ +General Operations & 2418980 & 208286887 & 210705867 \\ +General \% & 99.91\% & 74.66\% & 74.87\% \\ \hline +Combine Protocol Operations & 2421214 & 278998472 & 281419686 \\ +Combined Protocols \% & 0.86\% & 99.14\% & 100\% \\ \hline %\end{tabular} %\caption{\label{tbl:SMBCommands}Percentage of SMB and SMB2 Protocol Commands on March 15th} %\end{table} @@ -394,30 +393,71 @@ Combined Protocols \% & 5.55\% & 94.45\% & 100\% \\ \hline %\begin{tabular}{|l|c|c|} \hline \hline SMB2 General Operation & \multicolumn{2}{|c|}{Occurrences} & Percentage of Total \\ \hline -Negotiate & \multicolumn{2}{|c|}{139513151} & 5.69\% \\ -Session Setup & \multicolumn{2}{|c|}{24028626} & 0.98\%\\ -Logoff & \multicolumn{2}{|c|}{1160528} & 0.05\% \\ -Tree Connect & \multicolumn{2}{|c|}{867601720} & 35.39\% \\ -Tree Disconnect & \multicolumn{2}{|c|}{13509426} & 0.55\% \\ -Close & \multicolumn{2}{|c|}{563355572} & 22.97\% \\ -Flush & \multicolumn{2}{|c|}{7224088} & 0.29\% \\ -Lock & \multicolumn{2}{|c|}{11712449} & 0.48\% \\ -IOCtl & \multicolumn{2}{|c|}{35813324} & 1.46\% \\ +Negotiate & \multicolumn{2}{|c|}{25276447} & 9.06\% \\ +Session Setup & \multicolumn{2}{|c|}{2041208} & 0.73\%\\ +Logoff & \multicolumn{2}{|c|}{143592} & 0.05\% \\ +Tree Connect & \multicolumn{2}{|c|}{48414491} & 17.35\% \\ +Tree Disconnect & \multicolumn{2}{|c|}{9773361} & 3.5\% \\ +Close & \multicolumn{2}{|c|}{80114256} & 28.71\% \\ +Flush & \multicolumn{2}{|c|}{972790} & 0.35\% \\ +Lock & \multicolumn{2}{|c|}{1389250} & 0.5\% \\ +IOCtl & \multicolumn{2}{|c|}{4475494} & 1.6\% \\ Cancel & \multicolumn{2}{|c|}{0} & 0.00\% \\ -Echo & \multicolumn{2}{|c|}{64369} & 0.003\% \\ -Query Directory & \multicolumn{2}{|c|}{25574851} & 1.04\% \\ -Change Notify & \multicolumn{2}{|c|}{4160183} & 0.17\% \\ -Query Info & \multicolumn{2}{|c|}{213781466} & 8.72\% \\ -Set Info & \multicolumn{2}{|c|}{38941015} & 1.59\% \\ -Oplock Break & \multicolumn{2}{|c|}{118120} & 0.005\% \\ \hline +Echo & \multicolumn{2}{|c|}{4715} & 0.002\% \\ +Query Directory & \multicolumn{2}{|c|}{3443491} & 1.23\% \\ +Change Notify & \multicolumn{2}{|c|}{612850} & 0.22\% \\ +Query Info & \multicolumn{2}{|c|}{27155528} & 9.73\% \\ +Set Info & \multicolumn{2}{|c|}{4447218} & 1.59\% \\ +Oplock Break & \multicolumn{2}{|c|}{22397} & 0.008\% \\ \hline \end{tabular} -\caption{\label{tbl:SMBCommands}Percentage of SMB and SMB2 Protocol Commands on March 15th. Breakdown of General Operations for SMB2} +\caption{\label{tbl:SMBCommands}Percentage of SMB and SMB2 Protocol Commands from April 30th, 2019 to May 20th, 2019. Breakdown of General Operations for SMB2} \vspace{-2em} \end{table} +\begin{table}[] +\centering +\begin{tabular}{|l|l|l|} +\hline +SMB2 Filename Extension & Occurrences & Percentage of Total \\ \hline +-Travel & 33396147 & 15.26 \\ +o & 28670784 & 13.1 \\ +e & 28606421 & 13.07 \\ +N & 27639457 & 12.63 \\ +one & 27615505 & 12.62 \\ +\textless{}No Extension\textgreater{} & 27613845 & 12.62 \\ +d & 2799799 & 1.28 \\ +l & 2321338 & 1.06 \\ +x & 2108279 & 0.96 \\ +h & 2019714 & 0.92 \\ \hline +\end{tabular} +\caption{\textcolor{red}{Top 10 File Extensions Seen Over Three Week Period}} +\label{tab:top10SMB2FileExts} +\end{table} + +\begin{table}[] +\centering +\begin{tabular}{|l|l|l|} +\hline +SMB2 Filename Extension & Occurrences & Percentage of Total \\ \hline +doc & 352958 & 0.16 \\ +docx & 291047 & 0.13 \\ +ppt & 46706 & 0.02 \\ +pptx & 38604 & 0.02 \\ +xls & 218031 & 0.1 \\ +xlsx & 180676 & 0.08 \\ +odt & 28 & 1.28e-05 \\ +pdf & 375601 & 0.17 \\ +xml & 1192840 & 0.54 \\ +txt & 167827 & 0.08 \\ \hline +\end{tabular} +\caption{\textcolor{red}{Common File Extensions Seen Over Three Week Period}} +\label{tab:commonSMB2FileExts} +\end{table} + \subsection{I/O Data Request Sizes} -\textcolor{red}{Figures~\ref{fig:IO-All} and~\ref{fig:IO-R+W} show the amount of I/O in 15-minute periods during the week of March 12-18, 2017. -The general I/O (GIO) value is representative of I/O that does not include read, write, or create actions. For the most part, these general I/O are mostly metadata operations. As one can see in Figure~\ref{fig:IO-All}, the general I/O dominates any of the read or write operations. Figure~\ref{fig:IO-R+W} is a magnification of the read and write I/O from Figure~\ref{fig:IO-All}. Here we see that the majority of I/O operations belong to reads. There are some spikes where more write I/O occur, but these events are in the minority. One should also notice that, as would be expected, the spikes of I/O activity occur around the center of the day (e.g. 8am to 8pm), and during the week (March 12 was a Sunday and March 18 was a Saturday).} \textbf{EVERYTHING HERE FORWARD NEEDS TO BE RE-READ AND RE-WRITTEN} +%\textcolor{red}{Figures~\ref{fig:IO-All} and~\ref{fig:IO-R+W} show the amount of I/O in 15-minute periods during the week of March 12-18, 2017. +%The general I/O (GIO) value is representative of I/O that does not include read, write, or create actions. For the most part, these general I/O are mostly metadata operations. As one can see in Figure~\ref{fig:IO-All}, the general I/O dominates any of the read or write operations. Figure~\ref{fig:IO-R+W} is a magnification of the read and write I/O from Figure~\ref{fig:IO-All}. Here we see that the majority of I/O operations belong to reads. There are some spikes where more write I/O occur, but these events are in the minority. One should also notice that, as would be expected, the spikes of I/O activity occur around the center of the day (e.g. 8am to 8pm), and during the week (March 12 was a Sunday and March 18 was a Saturday).} + %\begin{figure} % \includegraphics[width=0.5\textwidth]{./images/AIO.pdf} % \caption{All I/O} @@ -428,9 +468,7 @@ The general I/O (GIO) value is representative of I/O that does not include read, % \caption{Read and Write I/O} % \label{fig:IO-R+W} %\end{figure} -\textcolor{red}{Figure~\ref{fig:Agg-AvgBytes} shows the average bytes transferred per read and write I/O operation. The most noticeable aspect of this graph is that the average number of bytes for write operations is much larger than that for reads. In other words, in spite of reads being more frequent than writes, each write transfer significantly more data. -%This is contrary to the number of I/O operations shown in Figure~\ref{fig:IO-R+W}, where one would expect that read operations cause the largest number of bytes. -Writes occur in the expected window of the center of each day, however there are reads that spike in-between these times. These large reads are due to backup processes reading from the fileservers for archiving to an offsite backup system. } +Figures~\ref{fig:PDF-Bytes-Read} \&~\ref{fig:PDF-Bytes-Write} show the probability density function (PDF) of the different sizes of bytes transferred for read and write I/O operations; respectively. The most noticeable aspect of these graphs are that the majority of bytes transferred for read and write operations is around 64 bytes. It is worth noting that write I/O also have a larger number of very small transfer amounts. This is unexpected in terms of the amount of data passed in a frame. Our belief is that this is due to a large number of long term calculations/scripts being run that only require small but frequent updates. This assumption was later validated in part when examining the files transferred, as some were related to running scripts creating a large volume of files. %This could also be attributed to simple reads relating to metadata\textcolor{red}{???} %\begin{figure} @@ -445,12 +483,24 @@ Writes occur in the expected window of the center of each day, however there are % \label{fig:bytesCompare} %\end{figure} +\begin{figure} + \includegraphics[width=0.5\textwidth]{./images/smb_read_bytes_pdf.pdf} + \caption{PDF of Bytes Transferred for Read I/O} + \label{fig:PDF-Bytes-Read} +\end{figure} + \begin{figure} \includegraphics[width=0.5\textwidth]{./images/smb_read_bytes_cdf.pdf} \caption{CDF of Bytes Transferred for Read I/O} \label{fig:CDF-Bytes-Read} \end{figure} +\begin{figure} + \includegraphics[width=0.5\textwidth]{./images/smb_write_bytes_pdf.pdf} + \caption{PDF of Bytes Transferred for Write I/O} + \label{fig:PDF-Bytes-Write} +\end{figure} + \begin{figure} \includegraphics[width=0.5\textwidth]{./images/smb_write_bytes_cdf.pdf} \caption{CDF of Bytes Transferred for Write I/O} @@ -462,28 +512,29 @@ Writes occur in the expected window of the center of each day, however there are % \caption{CDF of Bytes Transferred for Read+Write I/O} % \label{fig:CDF-Bytes-RW} %\end{figure} - -\textcolor{red}{Figure~\ref{fig:bytesCompare} shows that the overall total number of bytes is dominated by read I/O. We translate the difference between Figures~\ref{fig:Agg-AvgBytes} and~\ref{fig:bytesCompare} data to mean that while reads still dominate I/O over the network filesystem, the write I/O cause the largest generation of network traffic on average. -Figures~\ref{fig:CDF-Bytes-Read},~\ref{fig:CDF-Bytes-Write}, and~\ref{fig:CDF-Bytes-RW} show cumulative distribution functions (CDF) for bytes read, bytes written, and total bytes transferred respectively. As can be seen, the CDFs are step functions showing clearly that data transfers are on powers-of-2 boundaries. Table~\ref{fig:transferSizes} shows a tabular view of this data. For reads, 49\% are 4K or less, with another 23\% at 64K request sizes. There are no read requests larger than 64K. This data is similar to what was observed by Leung et al. Writes, on the other hand, are very different. Leung et al. showed that writes were 60-70\% less than 4K and 90\% less than 64K. In our data, however, we see that only 4\% of writes are less than 4K, 50\% are 64K requests, and 20\% of requests are very large 1M writes. In the 10 years since the last study, it is clear that writes have become significantly larger. This may be explained by the fact that files are much larger and being written as larger blocks.} +Figures~\ref{fig:CDF-Bytes-Read} and~\ref{fig:CDF-Bytes-Write} show cumulative distribution functions (CDF) for bytes read and bytes written. As can be seen the bytes transferred via reads increases by over $50$\% starting at 32 bytes, while the writes have approximately $20$\% below 32 bytes. Table~\ref{fig:transferSizes} shows a tabular view of this data. For reads, $34.97$\% are between 64 and 512 bytes, with another $28.86$\% at 64 byte request sizes. There are a negligible percentage of read requests larger than 512. +This read data is similar to what was observed by Leung et al. Writes, on the other hand, are very different. Leung et al. showed that writes were $60$-$70$\% less than 4K and $90$\% less than 64K. In our data, however, we see that only $11.16$\% of writes are less than 4K, $52.41$\% are 64K requests, and only $43.63$\% of requests are less than 64K writes. +In the ten years since the last study, it is clear that writes have become significantly larger. This may be explained by the fact that large files, and multiple files, are being written as standardized blocks more fitting to the larger data-sets and disk space available. This could be as an effort to improve the fidelity of data across the network, allow for better realtime data consistency between client and backup locations, or could just be due to a large number of scripts being run that create and update a series of relatively smaller documents. +%\textbf{Note: It seems like a change in the order of magnitude that is being passed per packet. What would this indicate?}\textcolor{red}{Answer the question. Shorter reads/writes = better?} \begin{table} \centering \begin{tabular}{|l|c|c|} \hline Transfer size & Reads & Writes \\ \hline -$< 4$ & 0.17\% & 14.14\% \\ -$= 4$ & 2.16\% & 4.36\% \\ -$>4, < 64$ & 19.8\% & 42.57\% \\ -$= 64$ & 35.12\% & 34.12\% \\ -$>64, < 512$ & 42.68\% & 4.8\% \\ -$= 512$ & 0.008\% & 1.06e-5\% \\ -$= 1024$ & 1.48e-5\% & 3.31e-5\% \\ \hline +$< 4$ & 0.098\% & 11.16\% \\ +$= 4$ & 1.16\% & 4.13\% \\ +$>4, < 64$ & 34.89\% & 28.14\% \\ +$= 64$ & 28.86\% & 52.41\% \\ +$>64, < 512$ & 34.97\% & 4.15\% \\ +$= 512$ & 0.002\% & 2.54e-5\% \\ +$= 1024$ & 1.22e-5\% & 3.81e-5\% \\ \hline \end{tabular} \caption{\label{fig:transferSizes}Percentage of transfer sizes for reads and writes} \vspace{-2em} \end{table} -\textcolor{red}{In comparison of the read, write, and create operations we founds that the vast majority of these type of I/O belong to reads. Furthermore, read operations account for the largest aggregate of bytes transferred over the network. However, non-intuitively, it is write operations that cause the largest average number of bytes to be transferred per operations; a magnitude more expensive. The observed byte data appears as a step function following in powers of 2 (e.g. 32K, 64K). } +In comparison of the read, write, and create operations we found that the vast majority of these type of I/O belong to creates. Furthermore, read operations account for the largest aggregate of bytes transferred over the network. However, the amount of bytes transferred by write commands is not far behind, although, non-intuitively, including a larger number of standardized relatively smaller writes. The most unexpected finding of the data is that all the the read and writes are performed using much smaller buffers than expected; about an order of magnitude smaller (e.g. bytes instead of kilobytes). % XXX I think we should get rid of this figure - not sure it conveys anything important that is not better conveyed than the CDF %Figure~\ref{fig:Agg-AvgRT} shows the average response time (RT) for the different I/O operations. The revealing information is that write I/Os take the longest average time. This is expected since writes transfer more data on average. There is an odd spike for create I/O which can be due to a batch of files or nested directories being made. There are points where read I/O RT can be seen, but this only occurs in areas where large RT for write I/O occur. This is attributed to a need to verify the written data. @@ -523,9 +574,9 @@ $= 1024$ & 1.48e-5\% & 3.31e-5\% \\ \hline \begin{tabular}{|l|l|l|l|l|} \hline & Reads & Writes & Creates & General \\ \hline -I/O \% & 1.65 & \multicolumn{1}{l|}{2.91} & \multicolumn{1}{l|}{14.9} & \multicolumn{1}{l|}{80.53} \\ \hline -Avg RT (ms) & 131773.717697 & \multicolumn{1}{l|}{495.248564} & \multicolumn{1}{l|}{730.298784} & \multicolumn{1}{l|}{9187.611725} \\ \hline -Avg IAT (ms) & 78299.580708 & \multicolumn{1}{l|}{44560.891349} & \multicolumn{1}{l|}{8690.900040} & \multicolumn{1}{l|}{1608.438123} \\ \hline +I/O \% & 2.97 & \multicolumn{1}{l|}{2.80} & \multicolumn{1}{l|}{19.36} & \multicolumn{1}{l|}{74.87} \\ \hline +Avg RT ($\mu$s) & 59819.687407 & \multicolumn{1}{l|}{519.703834} & \multicolumn{1}{l|}{698.082457} & \multicolumn{1}{l|}{7013.37566} \\ \hline +Avg IAT ($\mu$s) & 33220.780444 & \multicolumn{1}{l|}{35260.421498} & \multicolumn{1}{l|}{5094.474400} & \multicolumn{1}{l|}{1317.374383} \\ \hline %\hline %Total RT (s) & 224248 & \multicolumn{1}{l|}{41100} & \multicolumn{1}{l|}{342251} & \multicolumn{1}{l|}{131495} \\ \hline %\% Total RT & 30.34\% & \multicolumn{1}{l|}{5.56\%} & \multicolumn{1}{l|}{46.3\%} & \multicolumn{1}{l|}{17.79\%} \\ \hline @@ -549,10 +600,12 @@ Avg IAT (ms) & 78299.580708 & \multicolumn{1}{l|}{44560.891349} & \multicol %~!~ Addition since Chandy writing ~!~% Most previous tracing work has not provided data on I/O response times or command latency which serves as an approximation of server load. In -Table~\ref{tbl:PercentageTraceSummary} we show a summary of the response times for read, write, create, and general commands. We that most general operations have short average response times (24.17 $\mu$s). This exemplifies that these general operations occur in great numbers, run very quickly, and happen at high frequency. -Other observations of the data show that the number of writes are very few, although the response time for their operations is the longest. Creates happen more often, but have a quicker response time, because most of the create commands are actually opens. Although read operations are only a few percentage of the total operations they have a greater average response time than creates. +Table~\ref{tbl:PercentageTraceSummary} we show a summary of the response times for read, write, create, and general commands. We note that most general operations have the second longest average response times ($7013.38$ $\mu$s). This exemplifies that these general operations occur in great numbers, run relatively slowly, and happen at high frequency. +Other observations of the data show that the number of writes are very close to the number of reads, although the response time for their operations is the shortest. Creates happen more often, but have a slightly slower response time, because most of the create commands are actually opens. Although read operations are only a few percentage of the total operations they have a the greatest average response time; more than general I/O. -To get an indication of how much of an effect these general commands take on overall latency, we also calculated the total aggregate response time for read, write, create, and general operations. We see that even though general commands account for 95\% of all commands, they only account for only 17.8\% of the total response time. Thus, while the volume of general operations does not present an extraordinary burden on server load, reducing these operations can present a clear performance benefit. We also see that creates take the most amount of time ($46.3$\%) of the total response time for all operations. As seen in Table~\ref{tbl:SMBCommands}, the majority of general operations are negotiations while $6.38$\% are closes; which relate to create operations. This shows that while creates are only $5.08$\% on March 15th (and $2.5$\% of the week's operations shown in Table~\ref{tbl:PercentageTraceSummary}) of the total operations performed, they are responsible for $46.3$\% of the time spent performing network I/O. +%\textcolor{red}{To get an indication of how much of an effect these general commands take on overall latency, we also calculated the total aggregate response time for read, write, create, and general operations. We see that even though general commands account for $74.87$\% of all commands, they only account for only $17.8$\% of the total response time. Thus, while the volume of general operations does not present an extraordinary burden on server load, reducing these operations can present a clear performance benefit. We also see that creates take the most amount of time ($46.3$\%) of the total response time for all operations. As seen in Table~\ref{tbl:SMBCommands}, the majority of general operations are negotiations while $28.71$\% are closes; which relate to create operations. +%This shows that while creates are only $5.08$\% on March 15th (and $2.5$\% of the week's operations shown in Table~\ref{tbl:PercentageTraceSummary}) of the total operations performed, they are responsible for $46.3$\% of the time spent performing network I/O.} +%\textbf{Do we need this above data piece?} % %% Not Needed to Say Since we have no data %%One key observation is that there were no inter arrival time calculations for read, write, or create operations. We interpret this data to reflect the observations of Leung et.~al.~that noticed that files are interacted with only a few times and then not interacted with again. Extrapolating this concept, we interpret the data to illustrate that files may be read or written once, but then are not examined or interacted with again. @@ -622,11 +675,18 @@ To get an indication of how much of an effect these general commands take on ove %% \end{itemize} %%\end{enumerate} % -Figure~\ref{fig:CDF-IAT-General} shows the inter arrival times CDF for general I/O. As can be seen, SMB commands happen very frequently - 85\% of commands are issued less than 20~$\mu s$ apart. As was mentioned above, the SMB protocol is known to be very chatty, and it is clear that servers must spend a lot of time dealing with these commands. For the most part, most of these commands are also serviced fairly quickly as well as seen in Figure~\ref{fig:CDF-RT-General}. Interestingly, the response/return time (RT) for the general metadata operations follows a linear growth. +Figure~\ref{fig:CDF-IAT-General} shows the inter arrival times CDF for general I/O. As can be seen, SMB commands happen very frequently - $85$\% of commands are issued less than 1024~$\mu s$ apart. As was mentioned above, the SMB protocol is known to be very chatty, and it is clear that servers must spend a lot of time dealing with these commands. For the most part, most of these commands are also serviced fairly quickly as well as seen in Figure~\ref{fig:CDF-RT-General}. Interestingly, the response/return time (RT) for the general metadata operations follows a similar curve to the inter-arrival times. -Next we examine the response time (RT) of the read, write, and create I/O operations that occur over the SMB network filesystem. The response time for write operations (shown in Figure~\ref{fig:CDF-RT-Write}) follows a step function similar to the bytes written CDF in Figure~\ref{fig:CDF-Bytes-Write}. This is understandable as the response time for a write would be expected to be proportional to the number of bytes written. However, the read response time (Figure~\ref{fig:CDF-RT-Read}) is smoother than the bytes read CDF (Figure~\ref{fig:CDF-Bytes-Write}). This is most likely due to the fact that some of the reads are satisfied by server caches, thus eliminating some long access times to persistent storage. +Next we examine the response time (RT) of the read, write, and create I/O operations that occur over the SMB network filesystem. The response time for write operations (shown in Figure~\ref{fig:CDF-RT-Write}) does not follow the step function similar to the bytes written CDF in Figure~\ref{fig:CDF-Bytes-Write}. This is understandable as the response time for a write would be expected to be a more standardized action and not necessarily proportional to the number of bytes written. However, the read response time (Figure~\ref{fig:CDF-RT-Read}) is smoother than the bytes read CDF (Figure~\ref{fig:CDF-Bytes-Write}). This is most likely due to the fact that some of the reads are satisfied by server caches, thus eliminating some long access times to persistent storage. +However, one should notice that the response time on read operations grows at a rate similar to that of write operations. This, again, shows a form of standardization in the communication patterns although some read I/O take a far greater period of time; due to larger amounts of read data sent over several standardized size packets. %While the RT for Write operations are not included (due to their step function behavior) Figure~\ref{fig:CDF-RT-Read} and Figure~\ref{fig:CDF-RT-RW} show the response times for Read and Read+Write operations respectively. T -The write I/O step function behavior is somewhat visible in the CDF of both reads and writes in Figure~\ref{fig:CDF-RT-RW}. Moreover, this shows that the majority (80\%) of read (and write) operations occur within 2~$ms$, the average access time for enterprise storage disks. As would be expected, this is still an order of magnitude greater than the general I/O. +%\textcolor{red}{The write I/O step function behavior is somewhat visible in the CDF of both reads and writes in Figures~\ref{fig:CDF-RT-Read}~and~\ref{fig:CDF-RT-Write}. Moreover, this shows that the majority ($80$\%) of read (and write) operations occur within 2~$ms$, the average access time for enterprise storage disks. As would be expected, this is still an order of magnitude greater than the general I/O.} + +\subsection{File Extensions} +Tables~\ref{tab:top10SMB2FileExts} and~\ref{tab:commonSMB2FileExts} show a representation of the various file extensions that were seen within the three week capture period. The easier to understand is Table~\ref{tab:commonSMB2FileExts}, which illustrates the number of common file extensions (e.g. doc, ppt, xls, pdf) that were part of the data. +The greatest point of note is that the highest percentage is ``.xml'' with $0.54$\%, which is found to be surprising result. Originally we expected that these common file extensions would be a much larger total of traffic; more than $2$\% of total traffic. These concerns were further raised by the results of Table~\ref{tab:top10SMB2FileExts} which show the top ten file extensions present in the data; which make up approximately $84$\% of the total seen. +Furthermore the majority of extensions seem very strange if not nonsensical. Upon closer examination of the tracing system it was determined that these file extensions are in artifact of how Windows interprets file extensions. The Windows operating system merely guesses the file type based on the assumed extension (e.g. whatever characters follow after the final `.'). +There are a large number of files that do not meet this standard idea of having an extension, although we posit that an assortment of scenarios that would cause this issue. These range from linux-based library files, manual pages, odd naming schemes as part of scripts or back-up files, as well as date-times and IPs as file names. There are undoubtedly a larger number more, but exhaustive determination of all variations is seen as out of scope for this work. % Note: RT + IAT time CDFs exist in data output @@ -795,15 +855,18 @@ Model & \multicolumn{3}{|c|}{Gaussian} CDF & \multicolumn{3}{|c|}{$\frac{1}{\sqrt{2\pi}}\int_{-\infty}^{\frac{x-\mu}{\sigma}}e^{\frac{-t^2}{2}}dt$} & \multicolumn{3}{|c|}{$1 - e^{(-x/\lambda)^k}$} \\ \hline \hline I/O Operation & $\mu$ & \multicolumn{2}{|c|}{$\sigma$} & $k$ & \multicolumn{2}{|c|}{$\lambda$} \\ \hline -General IAT & 0.8508 & \multicolumn{2}{|c|}{0.2429} & 4.593 & \multicolumn{2}{|c|}{0.976} \\ -General RT & 0.5945 & \multicolumn{2}{|c|}{0.2694} & 2.2993 & \multicolumn{2}{|c|}{0.6665} \\ -Read RT & 0.8322 & \multicolumn{2}{|c|}{0.194} & 6.147 & \multicolumn{2}{|c|}{0.8963} \\ -Write RT & 0.6918 & \multicolumn{2}{|c|}{0.2898} & 2.455 & \multicolumn{2}{|c|}{0.7675} \\ -Create RT & 0.8539 & \multicolumn{2}{|c|}{0.1906} & 6.379 & \multicolumn{2}{|c|}{0.9103} \\ -R+W RT & 0.8045 & \multicolumn{2}{|c|}{0.2122} & 5.103 & \multicolumn{2}{|c|}{0.3937} \\ \hline -R+W Byte Transfer & 0.3744 & \multicolumn{2}{|c|}{0.2983} & 1.153 & \multicolumn{2}{|c|}{0.3937} \\ -Read Buff Transfer & 0.3737 & \multicolumn{2}{|c|}{0.2982} & 1.152 & \multicolumn{2}{|c|}{0.3928} \\ -Write Buff Transfer & 0.5742 & \multicolumn{2}{|c|}{0.2428} & 1.806 & \multicolumn{2}{|c|}{0.6172} \\ \hline +General IAT & 786.72 & \multicolumn{2}{|c|}{10329.6} & 0.9031 & \multicolumn{2}{|c|}{743.2075} \\ +General RT & 3606.66 & \multicolumn{2}{|c|}{2.74931e+06} & 0.5652 & \multicolumn{2}{|c|}{980.9721} \\ +Read RT & 44718.5 & \multicolumn{2}{|c|}{1.72776e+07} & 0.0004 & \multicolumn{2}{|c|}{1.5517} \\ +Read IAT & 24146 & \multicolumn{2}{|c|}{1.189e+07} & 0.0005 & \multicolumn{2}{|c|}{3.8134} \\ +Write RT & 379.823 & \multicolumn{2}{|c|}{4021.72} & 0.8569 & \multicolumn{2}{|c|}{325.2856} \\ +Write IAT & 25785.7 & \multicolumn{2}{|c|}{1.22491e+07} & 0.0004 & \multicolumn{2}{|c|}{3.1287} \\ +Create RT & 502.084 & \multicolumn{2}{|c|}{21678.4} & 0.9840 & \multicolumn{2}{|c|}{496.9497} \\ +Create IAT & 3694.82 & \multicolumn{2}{|c|}{4.65553e+06} & 0.0008 & \multicolumn{2}{|c|}{2.3504} \\ \hline +%R+W RT & \textcolor{red}{0.8045} & \multicolumn{2}{|c|}{\textcolor{red}{0.2122}} & \textcolor{red}{5.103} & \multicolumn{2}{|c|}{\textcolor{red}{0.3937}} \\ \hline +%R+W Byte Transfer & \textcolor{red}{0.3744} & \multicolumn{2}{|c|}{\textcolor{red}{0.2983}} & \textcolor{red}{1.153} & \multicolumn{2}{|c|}{\textcolor{red}{0.3937}} \\ +Read Buff Transfer & 82.9179 & \multicolumn{2}{|c|}{1117.9} & 1.0548 & \multicolumn{2}{|c|}{85.2525} \\ +Write Buff Transfer & 46.2507 & \multicolumn{2}{|c|}{640.621} & 1.0325 & \multicolumn{2}{|c|}{46.8707} \\ \hline \end{tabular} \caption{\label{tbl:curveFitting}Comparison of %$R^2$ $\mu$, $\sigma$, $k$, and $\lambda$ Values for Curve Fitting Equations on CDF Graphs} @@ -832,10 +895,13 @@ $\mu$, $\sigma$, $k$, and $\lambda$ Values for Curve Fitting Equations on CDF Gr %Examination of the Response Time (RT) and Inter Arrival Times (IAT) revealed the speed and frequency with which metadata operations are performed, as well as the infrequency of individual users and sessions to interact with a given share. +%% NEED: Run the matlab curve fitting to complete this section of the writing Our comparison of the existing standard use of a exponential distribution to model network interarrival and response times is still valid. One should notice that the Gaussian distributions % had better $R^2$ result than the exponential equivalent for write operations. This is not surprising due to the step-function shape of the Figure~\ref{fig:CDF-RT-Write} CDF. Examining the $R^2$ results for the read + write I/O operations we find that the exponential distribution is far more accurate at modeling this combined behavior. -for read and create operations are similar, while those for write operations are not. Further more there is less similarity between the modeled behavior of general operation inter arrival times and their response times, showing the need for a more refined model for each aspects of the network filesystem interactions. One should also notice that the read + write operation model is more closely similar to that of the reads. -This makes sense since the influence of read operations are found to dominate the I/O behavior of the network filesystem, which improves the ability of a exponential distribution to model the combined behavior. +for write and create operations are similar, while those for read operations are not. Further more there is less similarity between the modeled behavior of general operation inter arrival times and their response times, showing the need for a more refined model for each aspect of the network filesystem interactions. +One should also notice that the general operation model is more closely similar to that of the creates. +This makes sense since the influence of create operations are found to dominate the I/O behavior of the network filesystem, which aligns well with the number of existing close operations. +%improves the ability of a exponential distribution to model the combined behavior.} %Observations: %\begin{itemize} % \item Byte data appears in powers of 2 (e.g. 32K, 64K) @@ -874,16 +940,17 @@ A complication of this process is that the DataSeries code makes use of a push-p Normally, one could simply re-perform the conversion process to a DataSeries file, but due to the rate of the packets being captured and security concerns of the data being captured, we are unable to re-run any captured information. \section{Conclusions and Future Work} -Our analysis of this university network filesystem illustrated the current implementation and use of the CIFS/SMB protocol in a large academic setting. We notice the effect of caches on the ability of the filesystem to limit the number of accesses to persistant storage. The effect of enterprise storage disks access time can seen in the response time for read and write I/O. The majority of network communication is dominated by metadata operation, which is of less surprise since SMB is a known chatty protocol. We do notice that the CIFS/SMB protocol continues to be chatty with metadata I/O operations regardless of the version of SMB being implemented; $92.79$\% of I/O being metadata operations for SMB2. -We also find that while read operations happen in far greater number than write operations (at a ratio of 4.35), the size of the transfers are far less. In fact, the size of the average write operation is an order of magnitude greater than that of reads. Examination of the return times for these different I/O operations shows that exponential distribution curve fitting equation is most accurate at modeling the CDF of the various I/O operations. This shows that the current model is still effective for the majority of I/O, but that for write operations there needs to be further research in modeling the step behavior. +Our analysis of this university network filesystem illustrated the current implementation and use of the CIFS/SMB protocol in a large academic setting. We notice the effect of caches on the ability of the filesystem to limit the number of accesses to persistant storage. The effect of enterprise storage disks access time can seen in the response time for read and write I/O. The majority of network communication is dominated by metadata operation, which is of less surprise since SMB is a known chatty protocol. We do notice that the CIFS/SMB protocol continues to be chatty with metadata I/O operations regardless of the version of SMB being implemented; $74.66$\% of I/O being metadata operations for SMB2. +We also find that read operations happen in greater number than write operations (at a ratio of 1.06) and the size of their transfers are is also greater by a factor of about 2. +However, the average write operation includes a larger number of relatively smaller writes. Examination of the return times for these different I/O operations shows that exponential distribution curve fitting equation is most accurate at modeling the CDF of the various I/O operations. This shows that the current model is still effective for the majority of I/O, but that for read operations there needs to be further research in modeling their behavior. %Our work finds that a single term Gaussian distribution has an $R^2$ value of $0.7797$, but further work needs to be made in order to refine the model. -Our work finds that read and create response times can be modeled similarly, but that the write response times require the alteration of the general model. -However, a combination of read and write I/O can be modeled using the same standard; which has similar shape and scale to that of the read and create operations. +Our work finds that write and create response times can be modeled similarly, but that the read response times require the alteration of the general model. +However, the general I/O can be modeled using the same standard; which has similar shape and scale to that of the write and create operations. \subsection{Future Work} The analysis work will eventually incorporate oplocks and other aspects of resource sharing on the network to gain a more complete picture of the network's usage and bottlenecks. Network filesystem usage from an individual user scope has become simple and does not contain a greater deal of read, write, and create operations. -Further analysis will be made in examining how the determined metircs change when examined at the scope of a per share (i.e. TID) or per user (i.e. UID). At this level of examination we will be able to obtain a better idea of how each share is interacted with, as well as how files and directories are shared and access control is implemented. +Further analysis will be made in examining how the determined metrics change when examined at the scope of a per share (i.e. TID) or per user (i.e. UID). At this level of examination we will be able to obtain a better idea of how each share is interacted with, as well as how files and directories are shared and access control is implemented. %\end{document} % This is where a 'short' article might terminate