diff --git a/TracingPaper.aux b/TracingPaper.aux index 63c559c..35b281b 100644 --- a/TracingPaper.aux +++ b/TracingPaper.aux @@ -8,21 +8,26 @@ \citation{Anderson2004} \@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{1}} \newlabel{Introduction}{{1}{1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Purpose of Tracing}{1}} -\newlabel{Purpose of Tracing}{{1.1}{1}} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Issues with Tracing}{1}} -\newlabel{Issues with Tracing}{{1.2}{1}} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.1}Issues with Tracing}{1}} +\newlabel{Issues with Tracing}{{1.1}{1}} \citation{Anderson2004} +\citation{Traeger2008} +\citation{Vogels1999} +\citation{Dabir2008} +\citation{Orosz2013} +\citation{Skopko2012} +\citation{Ellard2003} \citation{EllardLedlie2003} +\citation{Ruemmler1993} +\citation{Roselli2000} +\citation{Ruemmler1993} +\citation{Traeger2008} +\citation{Ellard2003} \citation{EllardLedlie2003} -\citation{EllardLedlie2003} +\citation{Douceur1999} +\citation{Leung2008} \citation{Ellard2003} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.3}Previous Advances Due to Testing}{2}} -\newlabel{Previous Advances Due to Testing}{{1.3}{2}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.3.1}Ellard Ledlie 2003}{2}} -\newlabel{Ellard Ledlie 2003}{{1.3.1}{2}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {1.3.2}Ellard 2003}{2}} -\newlabel{Ellard 2003}{{1.3.2}{2}} +\citation{Roselli2000} \citation{Leung2008} \citation{Leung2008} \citation{Ellard2003} @@ -36,37 +41,39 @@ \citation{PFRING} \citation{Ellard2003} \citation{Anderson2004} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.2}Previous Advances Due to Testing}{2}} +\newlabel{Previous Advances Due to Testing}{{1.2}{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {1.3}The Need for a New Study}{2}} +\newlabel{The Need for a New Study}{{1.3}{2}} +\@writefile{toc}{\contentsline {section}{\numberline {2}Methodology}{2}} +\newlabel{Methodology}{{2}{2}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}System Limitations}{2}} +\newlabel{System Limitations}{{2.1}{2}} \citation{Leung2008} -\@writefile{toc}{\contentsline {subsection}{\numberline {1.4}The Need for a New Study}{3}} -\newlabel{The Need for a New Study}{{1.4}{3}} -\@writefile{toc}{\contentsline {section}{\numberline {2}Methodology}{3}} -\newlabel{Methodology}{{2}{3}} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}System Limitations}{3}} -\newlabel{System Limitations}{{2.1}{3}} +\citation{Ellard2003} \@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Main Challenges}{3}} \newlabel{Main Challenges}{{2.2}{3}} \@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Interpretation of Data}{3}} \newlabel{Interpretation of Data}{{2.3}{3}} -\citation{Ellard2003} -\@writefile{toc}{\contentsline {subsection}{\numberline {2.4}Scope of Interpretation}{4}} -\newlabel{Scope of Interpretation}{{2.4}{4}} -\@writefile{toc}{\contentsline {section}{\numberline {3}Tracing System}{4}} -\newlabel{Tracing System}{{3}{4}} -\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Stages of Trace}{4}} -\newlabel{Stages of Trace}{{3.1}{4}} -\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.1}Capture}{4}} -\newlabel{Capture}{{3.1.1}{4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {2.4}Scope of Interpretation}{3}} +\newlabel{Scope of Interpretation}{{2.4}{3}} +\@writefile{toc}{\contentsline {section}{\numberline {3}Tracing System}{3}} +\newlabel{Tracing System}{{3}{3}} +\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Stages of Trace}{3}} +\newlabel{Stages of Trace}{{3.1}{3}} +\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.1}Capture}{3}} +\newlabel{Capture}{{3.1.1}{3}} +\citation{MS-CIFS} \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.2}Collection}{4}} \newlabel{Collection}{{3.1.2}{4}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {3.1.3}Dissection/Analysis}{4}} \newlabel{Dissection/Analysis}{{3.1.3}{4}} -\citation{MS-CIFS} -\@writefile{toc}{\contentsline {section}{\numberline {4}Trace Analysis}{5}} -\newlabel{Trace Analysis}{{4}{5}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}SMB}{5}} -\newlabel{SMB}{{4.1}{5}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}ID Tracking}{5}} -\newlabel{ID Tracking}{{4.2}{5}} +\@writefile{toc}{\contentsline {section}{\numberline {4}Trace Analysis}{4}} +\newlabel{Trace Analysis}{{4}{4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}SMB}{4}} +\newlabel{SMB}{{4.1}{4}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}ID Tracking}{4}} +\newlabel{ID Tracking}{{4.2}{4}} \bibcite{Leung2008}{1} \bibcite{Ellard2003}{2} \bibcite{EllardLedlie2003}{3} @@ -82,17 +89,21 @@ \bibcite{Vogels1999}{13} \bibcite{Meyer2012}{14} \bibcite{PFRING}{15} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Run Patterns}{6}} -\newlabel{Run Patterns}{{4.3}{6}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Locating Performance Bottlenecks}{6}} -\newlabel{Locating Performance Bottlenecks}{{4.4}{6}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.5}Run Patterns}{6}} -\newlabel{Run Patterns}{{4.5}{6}} -\@writefile{toc}{\contentsline {subsection}{\numberline {4.6}Locating Performance Bottlenecks}{6}} -\newlabel{Locating Performance Bottlenecks}{{4.6}{6}} -\@writefile{toc}{\contentsline {section}{\numberline {5}Intuition Confirm/Change}{6}} -\newlabel{Intuition Confirm/Change}{{5}{6}} -\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Characterizations of Different Packet Types}{6}} -\newlabel{Characterizations of Different Packet Types}{{5.1}{6}} -\@writefile{toc}{\contentsline {section}{\numberline {6}Conclusion}{6}} -\newlabel{Conclusion}{{6}{6}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Run Patterns}{5}} +\newlabel{Run Patterns}{{4.3}{5}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Locating Performance Bottlenecks}{5}} +\newlabel{Locating Performance Bottlenecks}{{4.4}{5}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.5}Run Patterns}{5}} +\newlabel{Run Patterns}{{4.5}{5}} +\@writefile{toc}{\contentsline {subsection}{\numberline {4.6}Locating Performance Bottlenecks}{5}} +\newlabel{Locating Performance Bottlenecks}{{4.6}{5}} +\@writefile{toc}{\contentsline {section}{\numberline {5}Intuition Confirm/Change}{5}} +\newlabel{Intuition Confirm/Change}{{5}{5}} +\@writefile{toc}{\contentsline {subsection}{\numberline {5.1}Characterizations of Different Packet Types}{5}} +\newlabel{Characterizations of Different Packet Types}{{5.1}{5}} +\@writefile{toc}{\contentsline {section}{\numberline {6}Conclusion}{5}} +\newlabel{Conclusion}{{6}{5}} +\bibcite{Traeger2008}{16} +\bibcite{Kavalanekar2009}{17} +\bibcite{Douceur1999}{18} +\bibcite{Ruemmler1993}{19} diff --git a/TracingPaper.log b/TracingPaper.log index 0744e0e..2fdfb0f 100644 --- a/TracingPaper.log +++ b/TracingPaper.log @@ -1,4 +1,4 @@ -This is pdfTeX, Version 3.1415926-2.5-1.40.14 (MiKTeX 2.9 64-bit) (preloaded format=pdflatex 2014.12.20) 21 DEC 2014 21:35 +This is pdfTeX, Version 3.1415926-2.5-1.40.14 (MiKTeX 2.9 64-bit) (preloaded format=pdflatex 2014.12.20) 4 JAN 2015 15:17 entering extended mode **TracingPaper.tex (C:\UConn\TracingPaper\TracingPaper.tex @@ -90,12 +90,6 @@ LaTeX Warning: Label `Run Patterns' multiply defined. LaTeX Warning: Label `Locating Performance Bottlenecks' multiply defined. - -LaTeX Warning: Label `Ellard Ledlie 2003' multiply defined. - - -LaTeX Warning: Label `Ellard 2003' multiply defined. - ) LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 54. LaTeX Font Info: ... okay on input line 54. @@ -152,78 +146,71 @@ Missing character: There is no Missing character: There is no € in font ptmr7t! Missing character: There is no ť in font ptmr7t! LaTeX Font Info: Font shape `OT1/ptm/bx/n' in size <10> not available -(Font) Font shape `OT1/ptm/b/n' tried instead on input line 98. +(Font) Font shape `OT1/ptm/b/n' tried instead on input line 94. [1{F:/ProgramData/MiKTeX/2.9/pdftex/config/pdftex.map} ] -Underfull \hbox (badness 10000) in paragraph at lines 100--101 +Underfull \hbox (badness 10000) in paragraph at lines 96--97 []\OT1/ptm/m/n/10 [CLOSING SEN-TENCES?]While per-form-ing a [] -Underfull \hbox (badness 1596) in paragraph at lines 100--101 +Underfull \hbox (badness 1596) in paragraph at lines 96--97 \OT1/ptm/m/n/10 ma-chines com-mu-ni-cat-ing with each other, and the [] -LaTeX Font Info: Try loading font information for OMS+ptm on input line 108. -("C:\Program Files\MiKTeX 2.9\tex\latex\psnfss\omsptm.fd" -File: omsptm.fd -) -LaTeX Font Info: Font shape `OMS/ptm/m/n' in size <10> not available -(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 108. - [2] [3] -Underfull \vbox (badness 10000) has occurred while \output is active [] +Underfull \hbox (badness 2269) in paragraph at lines 102--103 +\OT1/ptm/m/n/10 no pa-per an re-ally see the whole scope of trac- + [] - [4] +[2] [3] LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <10> not available -(Font) Font shape `OT1/ptm/b/it' tried instead on input line 185. - -Underfull \vbox (badness 10000) has occurred while \output is active [] - - [5] -Underfull \hbox (badness 10000) in paragraph at lines 408--409 +(Font) Font shape `OT1/ptm/b/it' tried instead on input line 157. + [4] +Underfull \hbox (badness 10000) in paragraph at lines 380--381 []\OT1/ptm/m/it/10 Common In-ter-net File Sys-tem (CIFS) Pro- [] -Underfull \hbox (badness 10000) in paragraph at lines 408--409 +Underfull \hbox (badness 10000) in paragraph at lines 380--381 \OT1/ptm/m/it/10 to-col\OT1/ptm/m/n/10 , urlhttp://msdn.microsoft.com/en- [] -Underfull \hbox (badness 10000) in paragraph at lines 410--411 +Underfull \hbox (badness 10000) in paragraph at lines 382--383 []\OT1/ptm/m/it/10 Server Mes-sage Block (SMB) Pro-to- [] -Underfull \hbox (badness 10000) in paragraph at lines 410--411 +Underfull \hbox (badness 10000) in paragraph at lines 382--383 \OT1/ptm/m/it/10 col\OT1/ptm/m/n/10 , urlhttp://msdn.microsoft.com/en- [] -[6] (C:\UConn\TracingPaper\TracingPaper.aux) +[5] [6 + +] (C:\UConn\TracingPaper\TracingPaper.aux) LaTeX Warning: There were multiply-defined labels. ) Here is how much of TeX's memory you used: - 1481 strings out of 493705 - 19740 string characters out of 3144575 - 80973 words of memory out of 3000000 - 4808 multiletter control sequences out of 15000+200000 + 1454 strings out of 493705 + 19238 string characters out of 3144575 + 82851 words of memory out of 3000000 + 4784 multiletter control sequences out of 15000+200000 20443 words of font info for 42 fonts, out of 3000000 for 9000 1025 hyphenation exceptions out of 8191 34i,8n,21p,1884b,437s stack positions out of 5000i,500n,10000p,200000b,50000s {C:/Program Files/MiKTeX 2.9/fonts/enc/dvips/fontname/8r.enc} -Output written on TracingPaper.pdf (6 pages, 104879 bytes). +s/MiKTeX 2.9/fonts/type1/urw/courier/ucrr8a.pfb> +Output written on TracingPaper.pdf (6 pages, 95725 bytes). PDF statistics: - 46 PDF objects out of 1000 (max. 8388607) + 42 PDF objects out of 1000 (max. 8388607) 0 named destinations out of 1000 (max. 500000) 1 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/TracingPaper.pdf b/TracingPaper.pdf index eb4d764..d566632 100644 Binary files a/TracingPaper.pdf and b/TracingPaper.pdf differ diff --git a/TracingPaper.synctex.gz b/TracingPaper.synctex.gz index 109377b..0123b34 100644 Binary files a/TracingPaper.synctex.gz and b/TracingPaper.synctex.gz differ diff --git a/TracingPaper.tex b/TracingPaper.tex index acd4cb1..46369a6 100644 --- a/TracingPaper.tex +++ b/TracingPaper.tex @@ -87,10 +87,6 @@ Benchmarks are important for the purpose of developing and taking accurate metri The purpose of my work is to tackle this gap and hopefully bring insight to the complexity of network communication. I/O benchmarking, the process of comparing I/O systems by subjecting them to known workloads, is a widespread pratice in the storage industry and serves as the basis for purchasing decisions, performance tuning studies, and marketing campaigns ~\cite{Anderson2004}. -\subsection{Purpose of Tracing} -\label{Purpose of Tracing} -Performing these sorts of investigations and traces is important because without these attempts to better understand the intricacies of computer systems it is much more difficult for humankind to progress its technologies and make optimal use of its resources. Without a better understanding of materials one is not able to improve computer hardware, without a greater understanding of memory one is unable to make effective (and efficient) use of memory resources, and without further investigation one can not hope to strengthen the human understanding of network communication between devices and how the aspects of this communication may be directly effecting the performance of these systems. - \subsection{Issues with Tracing} \label{Issues with Tracing} The majority of benchmarks are attempts to represent a known system and structure on which some “original” design/system was tested. While this is all well and good, there are many issues with this sort of approach; temporal \& spatial scaling concerns, timestamping and buffer copying, as well as driver operation for capturing packets~\cite{Orosz2013,Dabir2008,Skopko2012}. Each of these aspects contribute to the inital problems with dissection and analysis of the captured information. Inaccuracies in scheduling I/Os may result in as much as a factor of 3.5 differences in measured response time and factor of 26 in measured queue sizes; differences that are too large to ignore~\cite{Anderson2004}. [MENTION EXAMPLE ISSUES BROUGHT FROM THIS - TWO GOOD EXAMPLES]. @@ -101,33 +97,9 @@ With the matter of temporal scaling, the main concern is that current day benchm \subsection{Previous Advances Due to Testing} \label{Previous Advances Due to Testing} -[ADD TO ISSUES SECTION ABOVE?]Most studies assume issue accuracy using standard system calls adequate. Measures indicate this is not the case and errors in issuing I/O can lead to substantial errors in I/O statistic measurements (e.g. mean latency and number of outstanding I/Os)~\cite{Anderson2004}. - -Timing accuracy and high through-put involves three challenges -\begin{itemize} - \item 1. Designing for peak performance requirements - \item 2. Coping with OS timing inaccuracy - \item 3. Working around unpredictable OS behavior - \begin{itemize} - \item 1. Standard OS mechanisms to keep time and issue I/Os; accuracy determined by scheduling granularity of underlying OS - \item 2. Accuracy of I/O scheduling contingent upon thread being scheduled at right time by OS scheduling boundaries \textit{or} flatten bursts - \end{itemize} - \item 4. Unpredictable performance effects due to interrupts; locking, resource contention, kernel scheduling intracacies - \item 5. Examples of performance effects - \begin{itemize} - \item 1. \textit{gettimeofday}() function (SMP) from multiple threads may cause locking to preserve clock invarience - \item 2. Thread moving from one CPU to another difficulty keeping track of wall clock time - \end{itemize} - \item 6. In higher load case the kernel gets more opportunities to schedule threads and hence more I/O issuing threads get scheduled at right time -\end{itemize} - -\subsubsection{Ellard Ledlie 2003} -\label{Ellard Ledlie 2003} -The work done by Ellard \textit{et. al.} examined two workloads (research and email) to see if they resemble previously studied workloads, as well as perform several new analyses on the NFS protocol. Trace-based analyses have guided and motivated contemporary file system design for the past two decades; where the original analysis of the 4.2BSD file system motivated many of the design decisions of the log-structured file system (LFS)~\cite{EllardLedlie2003}. This paper also takes the stance that since the use of technology has expanded and evolved, this fundamental change in workloads needs to be traced to observe and understand the behavior. "We believe that as the community of computer users has expanded and evolved there has been a fundamental change in the workloads seen by file servers, and that the research community must find ways to observe and measure these new workloads."~\cite{EllardLedlie2003} Leung \textit{et al.} (as well as Ellard \textit{et. al.}) also observed that much of the variance of load characterization statistics over time can be explained by high-level changes in the workload over time; despite, this correlation having been observed in many trace studies, its effects are usually ignored~\cite{EllardLedlie2003}. The most noticeable change in their traces was the difference between peak and off-peak hours of operation. This finding conveyed that time is a strong predictor of operation counts, amount of data transferred, and the read-write ratios for their CAMPUS (e.g. email) workload. - -\subsubsection{Ellard 2003} -\label{Ellard 2003} -This paper shows that the technology being actively researched gains improvement faster and that the technology that is not improved will end up being the bottleneck of the system. Ellard and Seltzer give the example of how file system performance is steadily losing ground relative to CPU, memory, and even network performance. Even though Ellard and Seltzer began their efforts to accurately measure the impact of changes to their system, they also discovered several other phenomena that interacted with the performance of the disk and file system in ways that had far more impact on the overall performance of the system than their improvements~\cite{Ellard2003}. This paper loosely groups all benchmarks into two categories: micro benchmarks and macro/workload benchmarks, the difference between these two being that micro benchmarks measure specific low-level aspects of system performance while workload benchmarks estimate the performance of the system running a particular workload. +Previous tracing work has shown that one of the largest \& broadest hurdles to tackle is that benchmarks must be tailored (to every extent) to the system being tested. There are always some generalizations taken into account but these generalizations can also be a major source of error~\cite{Anderson2004,Traeger2008,Vogels1999,Dabir2008,Orosz2013,Skopko2012,Ellard2003,EllardLedlie2003,Ruemmler1993}. To produce a benchmark with high fidelity one needs to understand not only the technology being used but how it is being implemented within the system to benchmark~\cite{Roselli2000,Ruemmler1993,Traeger2008}. All of these aspects will lend to the behavior of the system; from timing \& resource elements to how the managing software governs~\cite{Ellard2003,EllardLedlie2003,Douceur1999}. Further more, in persuing this work one may find unexpected results and learn new things through examination~\cite{Leung2008,Ellard2003,Roselli2000}. + +[PERHAPS USE THIS PART?]Understanding that no paper an really see the whole scope of tracing/benchmarks, this paper attempts to tackle an aspect of trying to bridge macro and micro benchmarks by building a system that incorporates a micro benchmark's low level replication fidelity with proper scaling to allow for macro level and a "full spectrum scope" analysis of everything in-between using traces of data input and synthetic trace generation. Due to the magnitude of this goal, this paper will further limit its focus towards the often forgot [CITE NEEDED?] networking aspect of multi-system scalable benchmarking and tracing. \subsection{The Need for a New Study} \label{The Need for a New Study} @@ -422,6 +394,18 @@ A Study of Practical Deduplication}, ACM Transactions on Storage (January 2012) \bibitem{PFRING} \emph{PF\_RING High-speed packet capture, filtering and analysis}, url{http://www.ntop.org/products/pf\_ring/} +\bibitem{Traeger2008} Avishay Traeger and Erez Zadok and Nikolai Joukov and Charles P.~Wright, \emph{ +A Nine Year Study of File System and Storage Benchmarking}, ACM Transactions on Storage (May 2008) + +\bibitem{Kavalanekar2009} Swaroop Kavalanekar and Dushyanth Narayanan and Sriram Sankar and Eno Thereska and Kushagra Vaid and Bruce Worthington, \emph{ +Measuring Database Performance in Online Services: A Trace-Based Approach}, Performance Evaluation and Benchmarking (2009) + +\bibitem{Douceur1999} John R.~Douceur and William J.~Bolosky, \emph{ +A Large-Scale Study of File-System Contents}, Proceedings of the 1999 ACM SIFMETRICS international conference on Measurement and modeling of computer systems (June 1999) + +\bibitem{Ruemmler1993} Chris Ruemmler and John Wilkes, \emph{ +UNIX disk access patterns}, Winter USENIX 1993 (January 1993) + \end{thebibliography} \end{document}