diff --git a/Thumbs.db b/Thumbs.db index 55dc797..420a7bf 100644 Binary files a/Thumbs.db and b/Thumbs.db differ diff --git a/TracingPaper.aux b/TracingPaper.aux index 63c559c..644f585 100644 --- a/TracingPaper.aux +++ b/TracingPaper.aux @@ -8,21 +8,26 @@ \citation{Anderson2004} \@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}} \newlabel{Introduction}{{1}{1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Purpose of Tracing}{1}} -\newlabel{Purpose of Tracing}{{1.1}{1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Issues with Tracing}{1}} -\newlabel{Issues with Tracing}{{1.2}{1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Issues with Tracing}{1}} +\newlabel{Issues with Tracing}{{1.1}{1}} \citation{Anderson2004} +\citation{Traeger2008} +\citation{Vogels1999} +\citation{Dabir2008} +\citation{Orosz2013} +\citation{Skopko2012} +\citation{Ellard2003} \citation{EllardLedlie2003} +\citation{Ruemmler1993} +\citation{Roselli2000} +\citation{Ruemmler1993} +\citation{Traeger2008} +\citation{Ellard2003} \citation{EllardLedlie2003} -\citation{EllardLedlie2003} +\citation{Douceur1999} +\citation{Leung2008} \citation{Ellard2003} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.3}Previous Advances Due to Testing}{2}} -\newlabel{Previous Advances Due to Testing}{{1.3}{2}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.3.1}Ellard Ledlie 2003}{2}} -\newlabel{Ellard Ledlie 2003}{{1.3.1}{2}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.3.2}Ellard 2003}{2}} -\newlabel{Ellard 2003}{{1.3.2}{2}} +\citation{Roselli2000} \citation{Leung2008} \citation{Leung2008} \citation{Ellard2003} @@ -36,37 +41,43 @@ \citation{PFRING} \citation{Ellard2003} \citation{Anderson2004} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Previous Advances Due to Testing}{2}} +\newlabel{Previous Advances Due to Testing}{{1.2}{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.3}The Need for a New Study}{2}} +\newlabel{The Need for a New Study}{{1.3}{2}} +\@writefile{toc}{\contentsline {section}{\numberline {2}Methodology}{2}} +\newlabel{Methodology}{{2}{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}System Limitations}{2}} +\newlabel{System Limitations}{{2.1}{2}} \citation{Leung2008} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.4}The Need for a New Study}{3}} -\newlabel{The Need for a New Study}{{1.4}{3}} -\@writefile{toc}{\contentsline {section}{\numberline {2}Methodology}{3}} -\newlabel{Methodology}{{2}{3}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}System Limitations}{3}} -\newlabel{System Limitations}{{2.1}{3}} +\citation{Ellard2003} \@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Main Challenges}{3}} \newlabel{Main Challenges}{{2.2}{3}} \@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Interpretation of Data}{3}} \newlabel{Interpretation of Data}{{2.3}{3}} -\citation{Ellard2003} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.4}Scope of Interpretation}{4}} -\newlabel{Scope of Interpretation}{{2.4}{4}} -\@writefile{toc}{\contentsline {section}{\numberline {3}Tracing System}{4}} -\newlabel{Tracing System}{{3}{4}} -\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Stages of Trace}{4}} -\newlabel{Stages of Trace}{{3.1}{4}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.1}Capture}{4}} -\newlabel{Capture}{{3.1.1}{4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.4}Scope of Interpretation}{3}} +\newlabel{Scope of Interpretation}{{2.4}{3}} +\@writefile{toc}{\contentsline {section}{\numberline {3}Tracing System}{3}} +\newlabel{Tracing System}{{3}{3}} +\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Stages of Trace}{3}} +\newlabel{Stages of Trace}{{3.1}{3}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.1}Capture}{3}} +\newlabel{Capture}{{3.1.1}{3}} +\citation{MS-CIFS} \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.2}Collection}{4}} \newlabel{Collection}{{3.1.2}{4}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.3}Dissection/Analysis}{4}} \newlabel{Dissection/Analysis}{{3.1.3}{4}} -\citation{MS-CIFS} -\@writefile{toc}{\contentsline {section}{\numberline {4}Trace Analysis}{5}} -\newlabel{Trace Analysis}{{4}{5}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}SMB}{5}} -\newlabel{SMB}{{4.1}{5}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}ID Tracking}{5}} -\newlabel{ID Tracking}{{4.2}{5}} +\@writefile{toc}{\contentsline {section}{\numberline {4}Trace Analysis}{4}} +\newlabel{Trace Analysis}{{4}{4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}SMB}{4}} +\newlabel{SMB}{{4.1}{4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}ID Tracking}{4}} +\newlabel{ID Tracking}{{4.2}{4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}System Information and Predictions}{5}} +\newlabel{System Information and Predictions}{{4.3}{5}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Run Patterns}{5}} +\newlabel{Run Patterns}{{4.4}{5}} \bibcite{Leung2008}{1} \bibcite{Ellard2003}{2} \bibcite{EllardLedlie2003}{3} @@ -77,22 +88,20 @@ \bibcite{Skopko2012}{8} \bibcite{MS-CIFS}{9} \bibcite{MS-SMB}{10} -\bibcite{MS-SMB2}{11} -\bibcite{Roselli2000}{12} -\bibcite{Vogels1999}{13} -\bibcite{Meyer2012}{14} -\bibcite{PFRING}{15} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Run Patterns}{6}} -\newlabel{Run Patterns}{{4.3}{6}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Locating Performance Bottlenecks}{6}} -\newlabel{Locating Performance Bottlenecks}{{4.4}{6}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.5}Run Patterns}{6}} -\newlabel{Run Patterns}{{4.5}{6}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.6}Locating Performance Bottlenecks}{6}} -\newlabel{Locating Performance Bottlenecks}{{4.6}{6}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.5}Locating Performance Bottlenecks}{6}} +\newlabel{Locating Performance Bottlenecks}{{4.5}{6}} \@writefile{toc}{\contentsline {section}{\numberline {5}Intuition Confirm/Change}{6}} \newlabel{Intuition Confirm/Change}{{5}{6}} \@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Characterizations of Different Packet Types}{6}} \newlabel{Characterizations of Different Packet Types}{{5.1}{6}} \@writefile{toc}{\contentsline {section}{\numberline {6}Conclusion}{6}} \newlabel{Conclusion}{{6}{6}} +\bibcite{MS-SMB2}{11} +\bibcite{Roselli2000}{12} +\bibcite{Vogels1999}{13} +\bibcite{Meyer2012}{14} +\bibcite{PFRING}{15} +\bibcite{Traeger2008}{16} +\bibcite{Kavalanekar2009}{17} +\bibcite{Douceur1999}{18} +\bibcite{Ruemmler1993}{19} diff --git a/TracingPaper.log b/TracingPaper.log deleted file mode 100644 index f636bf3..0000000 --- a/TracingPaper.log +++ /dev/null @@ -1,223 +0,0 @@ -This is pdfTeX, Version 3.1415926-2.5-1.40.14 (MiKTeX 2.9 64-bit) (preloaded format=pdflatex 2014.12.22) 22 DEC 2014 14:56 -entering extended mode -**C:/Users/Wortman/Documents/UConn/TracingPaper/TracingPaper.tex -(C:/Users/Wortman/Documents/UConn/TracingPaper/TracingPaper.tex -LaTeX2e <2014/05/01> -Babel <3.9l> and hyphenation patterns for 68 languages loaded. -(C:\Users\Wortman\Documents\UConn\TracingPaper\usetex-v1.cls -Document Class: usetex-v1 2002/10/31 v1.2 usetex Usenix article class -("C:\Program Files\MiKTeX 2.9\tex\latex\base\article.cls" -Document Class: article 2014/09/29 v1.4h Standard LaTeX document class -("C:\Program Files\MiKTeX 2.9\tex\latex\base\size10.clo" -File: size10.clo 2014/09/29 v1.4h Standard LaTeX file (size option) -) -\c@part=\count79 -\c@section=\count80 -\c@subsection=\count81 -\c@subsubsection=\count82 -\c@paragraph=\count83 -\c@subparagraph=\count84 -\c@figure=\count85 -\c@table=\count86 -\abovecaptionskip=\skip41 -\belowcaptionskip=\skip42 -\bibindent=\dimen102 -) -("C:\Program Files\MiKTeX 2.9\tex\latex\endnotes\endnotes.sty" -\c@endnote=\count87 -\endnotesep=\dimen103 -\@enotes=\write3 -) -\@discard=\box26 - -("C:\Program Files\MiKTeX 2.9\tex\latex\psnfss\times.sty" -Package: times 2005/04/12 PSNFSS-v9.2a (SPQR) -) -Warning: endnotes support is deprecated (see documentation for details) -\@sectionaboveskip=\skip43 -\@sectionbelowskip=\skip44 -\@subsectionaboveskip=\skip45 -) ("C:\Program Files\MiKTeX 2.9\tex\latex\graphics\epsfig.sty" -Package: epsfig 1999/02/16 v1.7a (e)psfig emulation (SPQR) - -("C:\Program Files\MiKTeX 2.9\tex\latex\graphics\graphicx.sty" -Package: graphicx 2014/10/28 v1.0g Enhanced LaTeX Graphics (DPC,SPQR) - -("C:\Program Files\MiKTeX 2.9\tex\latex\graphics\keyval.sty" -Package: keyval 2014/10/28 v1.15 key=value parser (DPC) -\KV@toks@=\toks14 -) -("C:\Program Files\MiKTeX 2.9\tex\latex\graphics\graphics.sty" -Package: graphics 2014/10/28 v1.0p Standard LaTeX Graphics (DPC,SPQR) - -("C:\Program Files\MiKTeX 2.9\tex\latex\graphics\trig.sty" -Package: trig 1999/03/16 v1.09 sin cos tan (DPC) -) -("C:\Program Files\MiKTeX 2.9\tex\latex\00miktex\graphics.cfg" -File: graphics.cfg 2007/01/18 v1.5 graphics configuration of teTeX/TeXLive -) -Package graphics Info: Driver file: pdftex.def on input line 91. - -("C:\Program Files\MiKTeX 2.9\tex\latex\pdftex-def\pdftex.def" -File: pdftex.def 2011/05/27 v0.06d Graphics/color for pdfTeX - -("C:\Program Files\MiKTeX 2.9\tex\generic\oberdiek\infwarerr.sty" -Package: infwarerr 2010/04/08 v1.3 Providing info/warning/error messages (HO) -) -("C:\Program Files\MiKTeX 2.9\tex\generic\oberdiek\ltxcmds.sty" -Package: ltxcmds 2011/11/09 v1.22 LaTeX kernel commands for general use (HO) -) -\Gread@gobject=\count88 -)) -\Gin@req@height=\dimen104 -\Gin@req@width=\dimen105 -) -\epsfxsize=\dimen106 -\epsfysize=\dimen107 -) -("C:\Program Files\MiKTeX 2.9\tex\latex\url\url.sty" -\Urlmuskip=\muskip10 -Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc. -) - -LaTeX Warning: Unused global option(s): - [XXX]. - -(C:\Users\Wortman\Documents\UConn\TracingPaper\TracingPaper.aux - -LaTeX Warning: Label `Run Patterns' multiply defined. - - -LaTeX Warning: Label `Locating Performance Bottlenecks' multiply defined. - -) -LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 54. -LaTeX Font Info: ... okay on input line 54. -LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 54. -LaTeX Font Info: ... okay on input line 54. -LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 54. -LaTeX Font Info: ... okay on input line 54. -LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 54. -LaTeX Font Info: ... okay on input line 54. -LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 54. -LaTeX Font Info: ... okay on input line 54. -LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 54. -LaTeX Font Info: ... okay on input line 54. -LaTeX Font Info: Try loading font information for OT1+ptm on input line 54. - ("C:\Program Files\MiKTeX 2.9\tex\latex\psnfss\ot1ptm.fd" -File: ot1ptm.fd 2001/06/04 font definitions for OT1/ptm. -) -("C:\Program Files\MiKTeX 2.9\tex\context\base\supp-pdf.mkii" -[Loading MPS to PDF converter (version 2006.09.02).] -\scratchcounter=\count89 -\scratchdimen=\dimen108 -\scratchbox=\box27 -\nofMPsegments=\count90 -\nofMParguments=\count91 -\everyMPshowfont=\toks15 -\MPscratchCnt=\count92 -\MPscratchDim=\dimen109 -\MPnumerator=\count93 -\makeMPintoPDFobject=\count94 -\everyMPtoPDFconversion=\toks16 -) -LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <14.4> not available -(Font) Font shape `OT1/ptm/b/n' tried instead on input line 76. -LaTeX Font Info: External font `cmex10' loaded for size -(Font) <12> on input line 76. -LaTeX Font Info: External font `cmex10' loaded for size -(Font) <8> on input line 76. -LaTeX Font Info: External font `cmex10' loaded for size -(Font) <6> on input line 76. -LaTeX Font Info: Try loading font information for OT1+pcr on input line 76. - ("C:\Program Files\MiKTeX 2.9\tex\latex\psnfss\ot1pcr.fd" -File: ot1pcr.fd 2001/06/04 font definitions for OT1/pcr. -) -LaTeX Font Info: External font `cmex10' loaded for size -(Font) <7> on input line 76. -LaTeX Font Info: External font `cmex10' loaded for size -(Font) <5> on input line 76. -LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <12> not available -(Font) Font shape `OT1/ptm/b/n' tried instead on input line 78. -Missing character: There is no â in font ptmr7t! -Missing character: There is no € in font ptmr7t! -Missing character: There is no ś in font ptmr7t! -Missing character: There is no â in font ptmr7t! -Missing character: There is no € in font ptmr7t! -Missing character: There is no ť in font ptmr7t! -LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <10> not available -(Font) Font shape `OT1/ptm/b/n' tried instead on input line 98. - [1{C:/ProgramData/MiKTeX/2.9/pdftex/config/pdftex.map} - - -] -Underfull \hbox (badness 10000) in paragraph at lines 100--101 -[]\OT1/ptm/m/n/10 [CLOSING SEN-TENCES?]While per-form-ing a - [] - - -Underfull \hbox (badness 1596) in paragraph at lines 100--101 -\OT1/ptm/m/n/10 ma-chines com-mu-ni-cat-ing with each other, and the - [] - -LaTeX Font Info: Try loading font information for OMS+ptm on input line 108. - -("C:\Program Files\MiKTeX 2.9\tex\latex\psnfss\omsptm.fd" -File: omsptm.fd -) -LaTeX Font Info: Font shape `OMS/ptm/m/n' in size <10> not available -(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 108. - [2] [3] -Underfull \vbox (badness 10000) has occurred while \output is active [] - - [4] -LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <10> not available -(Font) Font shape `OT1/ptm/b/it' tried instead on input line 185. - -Underfull \vbox (badness 10000) has occurred while \output is active [] - - [5] -Underfull \hbox (badness 10000) in paragraph at lines 408--409 -[]\OT1/ptm/m/it/10 Common In-ter-net File Sys-tem (CIFS) Pro- - [] - - -Underfull \hbox (badness 10000) in paragraph at lines 408--409 -\OT1/ptm/m/it/10 to-col\OT1/ptm/m/n/10 , urlhttp://msdn.microsoft.com/en- - [] - - -Underfull \hbox (badness 10000) in paragraph at lines 410--411 -[]\OT1/ptm/m/it/10 Server Mes-sage Block (SMB) Pro-to- - [] - - -Underfull \hbox (badness 10000) in paragraph at lines 410--411 -\OT1/ptm/m/it/10 col\OT1/ptm/m/n/10 , urlhttp://msdn.microsoft.com/en- - [] - -[6] (C:\Users\Wortman\Documents\UConn\TracingPaper\TracingPaper.aux) - -LaTeX Warning: There were multiply-defined labels. - - ) -Here is how much of TeX's memory you used: - 1470 strings out of 493705 - 19738 string characters out of 3144563 - 80869 words of memory out of 3000000 - 4797 multiletter control sequences out of 15000+200000 - 20443 words of font info for 42 fonts, out of 3000000 for 9000 - 1025 hyphenation exceptions out of 8191 - 34i,8n,21p,1930b,437s stack positions out of 5000i,500n,10000p,200000b,50000s -{C:/Program Files/MiKTeX 2.9/fonts/enc/dvips/fontname/8r.enc} -Output written on TracingPaper.pdf (6 pages, 104879 bytes). -PDF statistics: - 46 PDF objects out of 1000 (max. 8388607) - 0 named destinations out of 1000 (max. 500000) - 1 words of extra memory for PDF output out of 10000 (max. 10000000) - diff --git a/TracingPaper.pdf b/TracingPaper.pdf deleted file mode 100644 index b52b3ac..0000000 Binary files a/TracingPaper.pdf and /dev/null differ diff --git a/TracingPaper.synctex.gz b/TracingPaper.synctex.gz deleted file mode 100644 index b67b033..0000000 Binary files a/TracingPaper.synctex.gz and /dev/null differ diff --git a/TracingPaper.tex b/TracingPaper.tex index acd4cb1..84dc486 100644 --- a/TracingPaper.tex +++ b/TracingPaper.tex @@ -87,10 +87,6 @@ Benchmarks are important for the purpose of developing and taking accurate metri The purpose of my work is to tackle this gap and hopefully bring insight to the complexity of network communication. I/O benchmarking, the process of comparing I/O systems by subjecting them to known workloads, is a widespread pratice in the storage industry and serves as the basis for purchasing decisions, performance tuning studies, and marketing campaigns ~\cite{Anderson2004}. -\subsection{Purpose of Tracing} -\label{Purpose of Tracing} -Performing these sorts of investigations and traces is important because without these attempts to better understand the intricacies of computer systems it is much more difficult for humankind to progress its technologies and make optimal use of its resources. Without a better understanding of materials one is not able to improve computer hardware, without a greater understanding of memory one is unable to make effective (and efficient) use of memory resources, and without further investigation one can not hope to strengthen the human understanding of network communication between devices and how the aspects of this communication may be directly effecting the performance of these systems. - \subsection{Issues with Tracing} \label{Issues with Tracing} The majority of benchmarks are attempts to represent a known system and structure on which some “original” design/system was tested. While this is all well and good, there are many issues with this sort of approach; temporal \& spatial scaling concerns, timestamping and buffer copying, as well as driver operation for capturing packets~\cite{Orosz2013,Dabir2008,Skopko2012}. Each of these aspects contribute to the inital problems with dissection and analysis of the captured information. Inaccuracies in scheduling I/Os may result in as much as a factor of 3.5 differences in measured response time and factor of 26 in measured queue sizes; differences that are too large to ignore~\cite{Anderson2004}. [MENTION EXAMPLE ISSUES BROUGHT FROM THIS - TWO GOOD EXAMPLES]. @@ -101,33 +97,9 @@ With the matter of temporal scaling, the main concern is that current day benchm \subsection{Previous Advances Due to Testing} \label{Previous Advances Due to Testing} -[ADD TO ISSUES SECTION ABOVE?]Most studies assume issue accuracy using standard system calls adequate. Measures indicate this is not the case and errors in issuing I/O can lead to substantial errors in I/O statistic measurements (e.g. mean latency and number of outstanding I/Os)~\cite{Anderson2004}. - -Timing accuracy and high through-put involves three challenges -\begin{itemize} - \item 1. Designing for peak performance requirements - \item 2. Coping with OS timing inaccuracy - \item 3. Working around unpredictable OS behavior - \begin{itemize} - \item 1. Standard OS mechanisms to keep time and issue I/Os; accuracy determined by scheduling granularity of underlying OS - \item 2. Accuracy of I/O scheduling contingent upon thread being scheduled at right time by OS scheduling boundaries \textit{or} flatten bursts - \end{itemize} - \item 4. Unpredictable performance effects due to interrupts; locking, resource contention, kernel scheduling intracacies - \item 5. Examples of performance effects - \begin{itemize} - \item 1. \textit{gettimeofday}() function (SMP) from multiple threads may cause locking to preserve clock invarience - \item 2. Thread moving from one CPU to another difficulty keeping track of wall clock time - \end{itemize} - \item 6. In higher load case the kernel gets more opportunities to schedule threads and hence more I/O issuing threads get scheduled at right time -\end{itemize} - -\subsubsection{Ellard Ledlie 2003} -\label{Ellard Ledlie 2003} -The work done by Ellard \textit{et. al.} examined two workloads (research and email) to see if they resemble previously studied workloads, as well as perform several new analyses on the NFS protocol. Trace-based analyses have guided and motivated contemporary file system design for the past two decades; where the original analysis of the 4.2BSD file system motivated many of the design decisions of the log-structured file system (LFS)~\cite{EllardLedlie2003}. This paper also takes the stance that since the use of technology has expanded and evolved, this fundamental change in workloads needs to be traced to observe and understand the behavior. "We believe that as the community of computer users has expanded and evolved there has been a fundamental change in the workloads seen by file servers, and that the research community must find ways to observe and measure these new workloads."~\cite{EllardLedlie2003} Leung \textit{et al.} (as well as Ellard \textit{et. al.}) also observed that much of the variance of load characterization statistics over time can be explained by high-level changes in the workload over time; despite, this correlation having been observed in many trace studies, its effects are usually ignored~\cite{EllardLedlie2003}. The most noticeable change in their traces was the difference between peak and off-peak hours of operation. This finding conveyed that time is a strong predictor of operation counts, amount of data transferred, and the read-write ratios for their CAMPUS (e.g. email) workload. +Previous tracing work has shown that one of the largest \& broadest hurdles to tackle is that benchmarks must be tailored (to every extent) to the system being tested. There are always some generalizations taken into account but these generalizations can also be a major source of error~\cite{Anderson2004,Traeger2008,Vogels1999,Dabir2008,Orosz2013,Skopko2012,Ellard2003,EllardLedlie2003,Ruemmler1993}. To produce a benchmark with high fidelity one needs to understand not only the technology being used but how it is being implemented within the system to benchmark~\cite{Roselli2000,Ruemmler1993,Traeger2008}. All of these aspects will lend to the behavior of the system; from timing \& resource elements to how the managing software governs~\cite{Ellard2003,EllardLedlie2003,Douceur1999}. Further more, in persuing this work one may find unexpected results and learn new things through examination~\cite{Leung2008,Ellard2003,Roselli2000}. -\subsubsection{Ellard 2003} -\label{Ellard 2003} -This paper shows that the technology being actively researched gains improvement faster and that the technology that is not improved will end up being the bottleneck of the system. Ellard and Seltzer give the example of how file system performance is steadily losing ground relative to CPU, memory, and even network performance. Even though Ellard and Seltzer began their efforts to accurately measure the impact of changes to their system, they also discovered several other phenomena that interacted with the performance of the disk and file system in ways that had far more impact on the overall performance of the system than their improvements~\cite{Ellard2003}. This paper loosely groups all benchmarks into two categories: micro benchmarks and macro/workload benchmarks, the difference between these two being that micro benchmarks measure specific low-level aspects of system performance while workload benchmarks estimate the performance of the system running a particular workload. +[PERHAPS USE THIS PART?]Understanding that no paper an really see the whole scope of tracing/benchmarks, this paper attempts to tackle an aspect of trying to bridge macro and micro benchmarks by building a system that incorporates a micro benchmark's low level replication fidelity with proper scaling to allow for macro level and a "full spectrum scope" analysis of everything in-between using traces of data input and synthetic trace generation. Due to the magnitude of this goal, this paper will further limit its focus towards the often forgot [CITE NEEDED?] networking aspect of multi-system scalable benchmarking and tracing. \subsection{The Need for a New Study} \label{The Need for a New Study} @@ -190,12 +162,6 @@ All comands sent over the network are coupled to an identifying MID/PID/TID/UID \\Following these process IDs is as a way to check for intercommunication between two or more processes. In particular, we examine the compute time \& I/O (input/output) time (i.e. time spent in communication; between information arrivals). This is done by examining the inter-arrival times (IAT) between the server \& the client. This is interesting because this information will give us a realistic sense of the data transit time of the network connections being used (e.g. ethernet, firewire, fibre, etc.). Other pertinent information would be how often the client makes requests \& how often this event occurs per client process ID, identifiable by their PID/MID tuple. One could also track the amount of sharing that is occurring between users. The PID is the process identifier and the MID is the multiplex identifier, which is set by the client and is to be used for identifying groups of commands belonging to the same logical thread of operation on the client node. \\The per client process ID can be used to map the activity of given programs, thus allowing for finer granularity in the produced benchmark (e.g. control down to process types ran by individual client levels). Other features of interest are the time between an open \& close, or how many opens/closes occurred in a window (e.g. a period of time). This information could be used as a gauge of current day trends in filesystem usage \& its consequent taxation on the surrounding network. It would also allow for greater insight on the r/w habits of users on a network along with a rough comparison between other registered events that occur on the network. Lastly, though no less important, it would allow us to look at how many occurrences there are of shared files between different users, though one must note that there is some issue (though hopefully rare) of resource locking (e.g. shared files) that needs to be taken into account. This is initially addressed by monitoring any oplock flags that are sent for read \& writes. This information also helps provide a preliminary mapping of how the network is used and what sort of traffic populates the communication. -\subsection{Run Patterns} -\label{Run Patterns} - -\subsection{Locating Performance Bottlenecks} -\label{Locating Performance Bottlenecks} - %\subsection{Other (e.g. HTML)} %\label{Other (e.g. HTML)} % @@ -221,8 +187,25 @@ All comands sent over the network are coupled to an identifying MID/PID/TID/UID %\label{event_data Structure Tracking} %The purpose of the event\_data structure is to maintain a list of the interesting information associated with each PID/MID/TID/UID tuple seen on the network. It is through this structure that the read \& write times, IATs, and even number of occurances are tracked, along with the request/response IAT pairings. In this manner each tuple has the following information tracked, and both the packet processing is performed and the meaningful data is output from the AnalysisModule code. \textit{\textbf{ADD LIST OF event\_data INFORMATION HERE}}. Although there is a large number of aspects that can be examined when dealing with all of this network information, the current focus of this paper is to examine the possible read/write commands that can occur in via SMB protcols and the IAT times of the request and response packets for these commands. \textit{\textbf{Note:}} Eventually the addition of resource locks WILL be included because it is through this information that we can gain any sort of idea as to the interaction between users/other programs with the resources on the network. +\subsection{System Information and Predictions} +\label{System Information and Predictions} +The following is an explination the UITS system from which trace1 pulls it's packet information along with predicitions of how the data will look. + +The UITS system consisnts of five Microsoft file server cluster nodes. These blade servers are used to host home directories for all UConn users within the list of 88 departments. These home directories are used to provide personal drive share space to facultiy, staff and students, along with at lest one small group of users. Each server is capable of handling 1Gb of traffic in each direction (e.g. outbound and inbound traffic). All together the five blade server system can in theory handle 10Gb of recieving and tranmitting data. Some of these blade servers have local storage but the majority do not have any. To the understanding of this paper, the blade servers are purposed purely for dealing with incoming traffic to the SAN storage that sits beihnd them. This system does not currently implement load balancing, instead the servers are set up to spead the traffic load among four of the active cluster nodes while the fifth node is passive and purposed to take over in the case that any of the other nodes goes down. \\ + +The following are my predictions about what the data will tell me about the system. First are the predictions based on what was learned from talking to people within UITS, after that are my general predictions. + +From this paper's understanding of the file server system there are spikes of traffic that tend to happen during the night time. The assumption is that the majority of this traffic will occur between 2am and 6am because this is when backups occur to the SAN system. The point of note is that, however, it is not expected that we would see any of this traffic as the protocol used is not the SMB/CIFS protocol that is being analyzed by this paper. The reasoning for this is that this traffic would be encrypted, therefore this traffic would appear as some other protocol. Further more, any traffic that does occur during the duration of "day time hours" (i.e. 9am to 5pm) would be soley due to the actions taken by the users of this system (e.g facutly, staff, students). \\ +Assumptions: +\begin{itemize} +\item Some backup traffic will be seen because traffic will be generated as the data being stored using this "oneline storage" is backed up to the SAN system. Note: Any traffic past moving the data to the SAN system will \textbf{not} be seen. +\item All backup will be performed late night/early morning (e.g. 11pm-5am) +\item One general assumption is that these blade servers are "rock solid" and therefore should \textbf{not} ever go down. If this is the case then the expectation is that we should see at most a transfer rate of 8Gb since the fifth server will not be in operation. If we do find that there is a greater rate of transfer of data, then this means that the fifth server is actually helping with the traffic, not just acting as a backup in the case that any other blade server crashes or "goes down". +\end{itemize} + \subsection{Run Patterns} \label{Run Patterns} +Interpreting the data collected is done by producing three separate graphs showing three important areas of interest for being able to understand the traffic being examined. These areas are: \textbf{1)} the total number of read/write IO events, \textbf{2)} the occurrrences of different file sizes being read/written, \textbf{3)} the total number of bytes (combined read \& write) that are communicated over the traffic captured by this paper's tracing system. The reason for needing these three areas of information is that combined together one is able to better interpret all the collected and dissected information. By comparing the byte traffic to the IO information this is how we are able to tell not only when the times of greatest times of traffic are but which types of IO interactions dominate these periods. It should be noted that unfortunately the analysis program does not include a granularity to allow knowledge on whether the read, or write, events are responsible for the most data transfer (via communication) but that is one of the many future additions to this work. The file size information allows for interpretation of the size of the information that is being passed between the UITS servers and clients. Although the granualrity for this information is far corser (24 hours versus the 15 minute time window) it still shows which variations of file length were most encountered over the period of the given day. This information coupled with the byte and IO information reflects a priliminary protrait of how the UITS file server system is used, which can be compared to the internal network information that the UITS department keeps for their own maintenance and troubleshooting purposes. \subsection{Locating Performance Bottlenecks} \label{Locating Performance Bottlenecks} @@ -374,6 +357,15 @@ All comands sent over the network are coupled to an identifying MID/PID/TID/UID \section{Conclusion} \label{Conclusion} \textit{Do the results show a continuation in the trend of traditional computer science workloads?} +On the outset of this work it was believed that the data collected and analyzed would follow similar behavior patterns seen in previous papers \textit{Cite?}. Our initial results were confusing and most definitely did not meet expections. One of these oddities was that during the day one would see a greater increase in writes instead of reads. The frist assumption was that this is due to the system and how users interact with everything. +I belive that the greater number of writes comes from students doing intro work for different classes in which students are constantly saving their work while reading instructions from a single source. One must also recall that this data itself has limited interpretation because only a small three week windows of infomration is being examined. A better, and far more complete, image can be constructed using data captured from the following months, or more ideally, from an entire year's worth of data. An other limitation of the results is the scope of the analysis is curbed and does not yet fully dissect all of the fields being passed in network communication. +The future work of this project would be to +\begin{itemize} + \item 1. Complete the dissection analysis to include all captured fields from the originating pcap files. + \item 2. All DataSeries files (which are purposed for distribution) would be a single file per day's worth of communication; this may be possible with new additions to the DataSeries code but pcap limitations do not currently allow for this. + \item 3. Modulation of the capturing software would not only pull out information pertanent to the SMB/CIFS protocol, but would be able to pull multiple protocols which a user would be able to define prior to run-time. + \item 4. Better automation of the capturing system would remove the potential of human error cause loss of data. Use of new DataSeries tools may allow for recovery of previously corrupted DataSeries files. +\end{itemize} %references section %\bibliographystyle{plain} @@ -422,6 +414,18 @@ A Study of Practical Deduplication}, ACM Transactions on Storage (January 2012) \bibitem{PFRING} \emph{PF\_RING High-speed packet capture, filtering and analysis}, url{http://www.ntop.org/products/pf\_ring/} +\bibitem{Traeger2008} Avishay Traeger and Erez Zadok and Nikolai Joukov and Charles P.~Wright, \emph{ +A Nine Year Study of File System and Storage Benchmarking}, ACM Transactions on Storage (May 2008) + +\bibitem{Kavalanekar2009} Swaroop Kavalanekar and Dushyanth Narayanan and Sriram Sankar and Eno Thereska and Kushagra Vaid and Bruce Worthington, \emph{ +Measuring Database Performance in Online Services: A Trace-Based Approach}, Performance Evaluation and Benchmarking (2009) + +\bibitem{Douceur1999} John R.~Douceur and William J.~Bolosky, \emph{ +A Large-Scale Study of File-System Contents}, Proceedings of the 1999 ACM SIFMETRICS international conference on Measurement and modeling of computer systems (June 1999) + +\bibitem{Ruemmler1993} Chris Ruemmler and John Wilkes, \emph{ +UNIX disk access patterns}, Winter USENIX 1993 (January 1993) + \end{thebibliography} \end{document}