diff --git a/TracingPaper.aux b/TracingPaper.aux index 6854238..9b8ebad 100644 --- a/TracingPaper.aux +++ b/TracingPaper.aux @@ -52,6 +52,12 @@ \@writefile{toc}{\contentsline {subsubsection}{\numberline {2.1.1}Capture}{2}} \newlabel{Capture}{{2.1.1}{2}} \citation{MS-CIFS} +\citation{Orosz2013} +\citation{Dabir2008} +\citation{Skopko2012} +\citation{Orosz2013} +\citation{Ellard2003} +\citation{Anderson2004} \@writefile{toc}{\contentsline {subsubsection}{\numberline {2.1.2}Collection}{3}} \newlabel{Collection}{{2.1.2}{3}} \@writefile{toc}{\contentsline {subsubsection}{\numberline {2.1.3}Dissection/Analysis}{3}} @@ -60,14 +66,10 @@ \newlabel{ID Tracking}{{2.2}{3}} \@writefile{toc}{\contentsline {subsection}{\numberline {2.3}SMB}{3}} \newlabel{SMB}{{2.3}{3}} -\citation{Orosz2013} -\citation{Dabir2008} -\citation{Skopko2012} -\citation{Orosz2013} \citation{Ellard2003} -\citation{Anderson2004} \citation{Leung2008} -\citation{Ellard2003} +\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces SMB Header Specification.}}{4}} +\newlabel{fig:SMBSpec}{{1}{4}} \@writefile{toc}{\contentsline {subsection}{\numberline {2.4}System Limitations and Challenges}{4}} \newlabel{System Limitations and Challenges}{{2.4}{4}} \citation{Traeger2008} @@ -87,14 +89,16 @@ \newlabel{Intuition Confirm/Change}{{4}{6}} \@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Characterizations of Different Behaviors}{6}} \newlabel{Characterizations of Different Behaviors}{{4.1}{6}} -\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces R/W IO over three weeks.}}{7}} -\newlabel{fig:IOWeek}{{1}{7}} -\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Btyes throughput over three weeks.}}{7}} -\newlabel{fig:BytesWeek}{{2}{7}} -\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Zoom in on IOs seen.}}{7}} -\newlabel{fig:IOWeek}{{3}{7}} -\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Zoom in on bytes seen.}}{7}} -\newlabel{fig:BytesWeekZoom}{{4}{7}} +\@writefile{toc}{\contentsline {section}{\numberline {5}Conclusion}{6}} +\newlabel{Conclusion}{{5}{6}} +\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces R/W IO over three weeks.}}{7}} +\newlabel{fig:IOWeek}{{2}{7}} +\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Btyes throughput over three weeks.}}{7}} +\newlabel{fig:BytesWeek}{{3}{7}} +\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Zoom in on IOs seen.}}{7}} +\newlabel{fig:IOWeek}{{4}{7}} +\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Zoom in on bytes seen.}}{7}} +\newlabel{fig:BytesWeekZoom}{{5}{7}} \bibcite{Leung2008}{1} \bibcite{Ellard2003}{2} \bibcite{EllardLedlie2003}{3} @@ -107,8 +111,6 @@ \bibcite{MS-SMB}{10} \bibcite{MS-SMB2}{11} \bibcite{Roselli2000}{12} -\@writefile{toc}{\contentsline {section}{\numberline {5}Conclusion}{8}} -\newlabel{Conclusion}{{5}{8}} \bibcite{Vogels1999}{13} \bibcite{Meyer2012}{14} \bibcite{PFRING}{15} diff --git a/TracingPaper.log b/TracingPaper.log index 7bae1c1..d499f41 100644 --- a/TracingPaper.log +++ b/TracingPaper.log @@ -1,15 +1,15 @@ -This is pdfTeX, Version 3.1415926-2.5-1.40.14 (MiKTeX 2.9 64-bit) (preloaded format=pdflatex 2014.12.22) 9 APR 2015 09:41 +This is pdfTeX, Version 3.1415926-2.5-1.40.14 (MiKTeX 2.9 64-bit) (preloaded format=pdflatex 2014.12.20) 20 APR 2015 20:41 entering extended mode -**C:/Users/Wortman/Documents/UConn/TracingPaper/TracingPaper.tex -(C:/Users/Wortman/Documents/UConn/TracingPaper/TracingPaper.tex +**TracingPaper.tex +(C:\UConn\TracingPaper\TracingPaper.tex LaTeX2e <2014/05/01> Babel <3.9l> and hyphenation patterns for 68 languages loaded. -(C:\Users\Wortman\Documents\UConn\TracingPaper\usetex-v1.cls +(C:\UConn\TracingPaper\usetex-v1.cls Document Class: usetex-v1 2002/10/31 v1.2 usetex Usenix article class ("C:\Program Files\MiKTeX 2.9\tex\latex\base\article.cls" -Document Class: article 2014/09/29 v1.4h Standard LaTeX document class +Document Class: article 2007/10/19 v1.4h Standard LaTeX document class ("C:\Program Files\MiKTeX 2.9\tex\latex\base\size10.clo" -File: size10.clo 2014/09/29 v1.4h Standard LaTeX file (size option) +File: size10.clo 2007/10/19 v1.4h Standard LaTeX file (size option) ) \c@part=\count79 \c@section=\count80 @@ -23,7 +23,7 @@ File: size10.clo 2014/09/29 v1.4h Standard LaTeX file (size option) \belowcaptionskip=\skip42 \bibindent=\dimen102 ) -("C:\Program Files\MiKTeX 2.9\tex\latex\endnotes\endnotes.sty" +(F:\Users\PAW\AppData\Roaming\MiKTeX\2.9\tex\latex\endnotes\endnotes.sty \c@endnote=\count87 \endnotesep=\dimen103 \@enotes=\write3 @@ -41,14 +41,14 @@ Warning: endnotes support is deprecated (see documentation for details) Package: epsfig 1999/02/16 v1.7a (e)psfig emulation (SPQR) ("C:\Program Files\MiKTeX 2.9\tex\latex\graphics\graphicx.sty" -Package: graphicx 2014/10/28 v1.0g Enhanced LaTeX Graphics (DPC,SPQR) +Package: graphicx 1999/02/16 v1.0f Enhanced LaTeX Graphics (DPC,SPQR) ("C:\Program Files\MiKTeX 2.9\tex\latex\graphics\keyval.sty" -Package: keyval 2014/10/28 v1.15 key=value parser (DPC) +Package: keyval 1999/03/16 v1.13 key=value parser (DPC) \KV@toks@=\toks14 ) ("C:\Program Files\MiKTeX 2.9\tex\latex\graphics\graphics.sty" -Package: graphics 2014/10/28 v1.0p Standard LaTeX Graphics (DPC,SPQR) +Package: graphics 2009/02/05 v1.0o Standard LaTeX Graphics (DPC,SPQR) ("C:\Program Files\MiKTeX 2.9\tex\latex\graphics\trig.sty" Package: trig 1999/03/16 v1.09 sin cos tan (DPC) @@ -75,7 +75,7 @@ Package: ltxcmds 2011/11/09 v1.22 LaTeX kernel commands for general use (HO) \epsfxsize=\dimen106 \epsfysize=\dimen107 ) -("C:\Program Files\MiKTeX 2.9\tex\latex\url\url.sty" +(F:\Users\PAW\AppData\Roaming\MiKTeX\2.9\tex\latex\url\url.sty \Urlmuskip=\muskip10 Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc. ) @@ -83,7 +83,7 @@ Package: url 2013/09/16 ver 3.4 Verb mode for urls, etc. LaTeX Warning: Unused global option(s): [XXX]. -(C:\Users\Wortman\Documents\UConn\TracingPaper\TracingPaper.aux +(C:\UConn\TracingPaper\TracingPaper.aux LaTeX Warning: Label `fig:IOWeek' multiply defined. @@ -104,7 +104,7 @@ LaTeX Font Info: Try loading font information for OT1+ptm on input line 56. ("C:\Program Files\MiKTeX 2.9\tex\latex\psnfss\ot1ptm.fd" File: ot1ptm.fd 2001/06/04 font definitions for OT1/ptm. ) -("C:\Program Files\MiKTeX 2.9\tex\context\base\supp-pdf.mkii" +(F:\Users\PAW\AppData\Roaming\MiKTeX\2.9\tex\context\base\supp-pdf.mkii [Loading MPS to PDF converter (version 2006.09.02).] \scratchcounter=\count89 \scratchdimen=\dimen108 @@ -159,51 +159,62 @@ Underfull \hbox (badness 10000) in paragraph at lines 107--110 [] -[1{C:/ProgramData/MiKTeX/2.9/pdftex/config/pdftex.map} +[1{F:/Users/PAW/AppData/Local/MiKTeX/2.9/pdftex/config/pdftex.map} ] [2] LaTeX Font Info: Font shape `OT1/ptm/bx/it' in size <10> not available (Font) Font shape `OT1/ptm/b/it' tried instead on input line 147. - [3] -Underfull \hbox (badness 2042) in paragraph at lines 160--162 + +<./images/SMBHeader.jpg, id=12, 401.5pt x 197.73875pt> +File: ./images/SMBHeader.jpg Graphic file (type jpg) + + +Package pdftex.def Info: ./images/SMBHeader.jpg used on input line 153. +(pdftex.def) Requested size: 466.1753pt x 113.81053pt. + +Underfull \hbox (badness 2042) in paragraph at lines 165--167 \OT1/ptm/m/n/10 been fil-tered for spe-cific pro-to-col in-for-ma-tion and [] -Underfull \hbox (badness 1552) in paragraph at lines 160--162 +Underfull \hbox (badness 1552) in paragraph at lines 165--167 \OT1/ptm/m/n/10 to how the packet cap-tur-ing drivers and pro-grams [] -Underfull \hbox (badness 10000) in paragraph at lines 160--162 +Underfull \hbox (badness 10000) in paragraph at lines 165--167 [] -[4] -Underfull \hbox (badness 10000) in paragraph at lines 208--209 + +Underfull \vbox (badness 5578) has occurred while \output is active [] + + [3] +Underfull \vbox (badness 10000) has occurred while \output is active [] + + [4 ] +Underfull \hbox (badness 10000) in paragraph at lines 185--186 [] -LaTeX Font Info: Try loading font information for OMS+ptm on input line 215. -("C:\Program Files\MiKTeX 2.9\tex\latex\psnfss\omsptm.fd" -File: omsptm.fd -) -LaTeX Font Info: Font shape `OMS/ptm/m/n' in size <10> not available -(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 215. - [5] -Underfull \hbox (badness 1867) in paragraph at lines 230--231 +Underfull \hbox (badness 1028) in paragraph at lines 189--192 +\OT1/ptm/m/n/10 One gen-eral as-sump-tion is that these blade servers + [] + +[5] +Underfull \hbox (badness 1867) in paragraph at lines 207--208 \OT1/ptm/m/n/10 In or-der ot in-ter-pret the data be-ing an-a-lyzed and [] -Underfull \hbox (badness 1303) in paragraph at lines 230--231 +Underfull \hbox (badness 1303) in paragraph at lines 207--208 \OT1/ptm/m/n/10 dis-sected, the first step was to un-der-stand how to [] -Underfull \hbox (badness 10000) in paragraph at lines 230--231 +Underfull \hbox (badness 10000) in paragraph at lines 207--208 [] @@ -211,100 +222,102 @@ Underfull \hbox (badness 10000) in paragraph at lines 230--231 File: ./images/IOWeek.png Graphic file (type png) -Package pdftex.def Info: ./images/IOWeek.png used on input line 235. +Package pdftex.def Info: ./images/IOWeek.png used on input line 210. (pdftex.def) Requested size: 466.13518pt x 113.81151pt. <./images/BytesWeek.png, id=24, 686.565pt x 499.11469pt> File: ./images/BytesWeek.png Graphic file (type png) -Package pdftex.def Info: ./images/BytesWeek.png used on input line 241. +Package pdftex.def Info: ./images/BytesWeek.png used on input line 216. (pdftex.def) Requested size: 466.13518pt x 113.81151pt. <./images/IOWeekZoom.png, id=25, 686.565pt x 499.11469pt> File: ./images/IOWeekZoom.png Graphic file (type png) -Package pdftex.def Info: ./images/IOWeekZoom.png used on input line 247. +Package pdftex.def Info: ./images/IOWeekZoom.png used on input line 222. (pdftex.def) Requested size: 466.13518pt x 113.81151pt. <./images/BytesWeekZoom.png, id=26, 686.565pt x 499.11469pt> File: ./images/BytesWeekZoom.png Graphic file (type png) -Package pdftex.def Info: ./images/BytesWeekZoom.png used on input line 253. +Package pdftex.def Info: ./images/BytesWeekZoom.png used on input line 228. (pdftex.def) Requested size: 466.13518pt x 113.81151pt. +LaTeX Font Info: Try loading font information for OMS+ptm on input line 238. -Underfull \hbox (badness 1221) in paragraph at lines 270--271 -\OT1/ptm/m/n/10 SMB's in-ter-nal man-age-ment would tackle - [] -[6] [7 ] -Underfull \hbox (badness 1077) in paragraph at lines 294--295 +("C:\Program Files\MiKTeX 2.9\tex\latex\psnfss\omsptm.fd" +File: omsptm.fd +) +LaTeX Font Info: Font shape `OMS/ptm/m/n' in size <10> not available +(Font) Font shape `OMS/cmsy/m/n' tried instead on input line 238. +Missing character: There is no â in font ptmr7t! +Missing character: There is no € in font ptmr7t! +Missing character: There is no ™ in font ptmr7t! + [6] [7 ] +Underfull \hbox (badness 1077) in paragraph at lines 254--255 \OT1/ptm/m/n/10 not only pull out in-for-ma-tion per-ta-nent to the [] -Underfull \vbox (badness 2393) has occurred while \output is active [] - - -Underfull \hbox (badness 10000) in paragraph at lines 336--337 +Underfull \hbox (badness 10000) in paragraph at lines 298--299 []\OT1/ptm/m/it/10 Common In-ter-net File Sys-tem (CIFS) Pro- [] -Underfull \hbox (badness 10000) in paragraph at lines 336--337 +Underfull \hbox (badness 10000) in paragraph at lines 298--299 \OT1/ptm/m/it/10 to-col\OT1/ptm/m/n/10 , urlhttp://msdn.microsoft.com/en- [] -Underfull \hbox (badness 10000) in paragraph at lines 338--339 +Underfull \hbox (badness 10000) in paragraph at lines 300--301 []\OT1/ptm/m/it/10 Server Mes-sage Block (SMB) Pro-to- [] -Underfull \hbox (badness 10000) in paragraph at lines 338--339 +Underfull \hbox (badness 10000) in paragraph at lines 300--301 \OT1/ptm/m/it/10 col\OT1/ptm/m/n/10 , urlhttp://msdn.microsoft.com/en- [] -[8] -Underfull \hbox (badness 10000) in paragraph at lines 353--355 + +Underfull \hbox (badness 10000) in paragraph at lines 315--317 []\OT1/ptm/m/it/10 PF[]RING User Guide\OT1/ptm/m/n/10 , url- [] -Overfull \hbox (61.33023pt too wide) in paragraph at lines 353--355 +Overfull \hbox (61.33023pt too wide) in paragraph at lines 315--317 \OT1/ptm/m/n/10 https://svn.ntop.org/svn/ntop/trunk/PF[]RING/doc/UsersGuide.pdf [] -[9 +[8] [9 -] (C:\Users\Wortman\Documents\UConn\TracingPaper\TracingPaper.aux) +] (C:\UConn\TracingPaper\TracingPaper.aux) LaTeX Warning: There were multiply-defined labels. ) Here is how much of TeX's memory you used: - 1498 strings out of 493705 - 20319 string characters out of 3144563 + 1505 strings out of 493705 + 20348 string characters out of 3144575 83807 words of memory out of 3000000 - 4819 multiletter control sequences out of 15000+200000 + 4824 multiletter control sequences out of 15000+200000 20443 words of font info for 42 fonts, out of 3000000 for 9000 1025 hyphenation exceptions out of 8191 - 34i,8n,21p,2172b,437s stack positions out of 5000i,500n,10000p,200000b,50000s + 34i,8n,21p,2111b,437s stack positions out of 5000i,500n,10000p,200000b,50000s {C:/Program Files/MiKTeX 2.9/fonts/enc/dvips/fontname/8r.enc} -Output written on TracingPaper.pdf (9 pages, 369942 bytes). +Output written on TracingPaper.pdf (9 pages, 390552 bytes). PDF statistics: - 61 PDF objects out of 1000 (max. 8388607) + 62 PDF objects out of 1000 (max. 8388607) 0 named destinations out of 1000 (max. 500000) - 21 words of extra memory for PDF output out of 10000 (max. 10000000) + 26 words of extra memory for PDF output out of 10000 (max. 10000000) diff --git a/TracingPaper.pdf b/TracingPaper.pdf index 62d6110..8d87c75 100644 Binary files a/TracingPaper.pdf and b/TracingPaper.pdf differ diff --git a/TracingPaper.synctex.gz b/TracingPaper.synctex.gz index 1d324ab..e802b05 100644 Binary files a/TracingPaper.synctex.gz and b/TracingPaper.synctex.gz differ diff --git a/TracingPaper.tex b/TracingPaper.tex index 886f089..956673d 100644 --- a/TracingPaper.tex +++ b/TracingPaper.tex @@ -78,7 +78,7 @@ \maketitle \begin{abstract} -Traces are an important and necessary part of systems research because this works leads to a better understanding of the behavior of protocols, architectures, or even entire networks. Further more the lessons learned through examining these traces can lead to the development of better benchmarks which will in turns allow for more accurate testing and advancement of technologies and their performance. Some key findings found from the examination of CIFS traces include \textbf{ADD ONCE KEY FINDINGS HAVE BEEN FOUND}. +Traces are an important and necessary part of systems research because this works leads to a better understanding of the behavior of protocols, architectures, or even entire networks. Further more the lessons learned through examining these traces can lead to the development of better benchmarks which will in turn allow for more accurate testing and advancement of technologies and their performance. Some key findings found from the examination of CIFS traces include \textbf{ADD ONCE KEY FINDINGS HAVE BEEN FOUND}. \\ %NOTE: Perhaps bring up at the end of the paper when mentioning why trace work is important in a grander scheme %With any sort of benchmark, there are inherent oversimplifications that are taken into account when first designing these watermarks for advancing technology. In the case of networking benchmarks, many of these simplifications occur when dealing with the low level operations of the system; spatial/temporal scaling, timestamping, IO and system behavior. While these simplifications were acceptable for past systems being tested, this facile outlook is no longer acceptable for supplying worthwhile information. Without taking into account the intricacies of current day machines, technology will only be able to progress in the avenues that we know of, while never being able to tackle the bottlenecks that are made apparent through more accurate benchmarking. @@ -88,7 +88,7 @@ Traces are an important and necessary part of systems research because this work \label{Introduction} Traces are important for the purpose of developing and taking accurate metrics of current technologies. One must determine which aspects of the trace are most representative of what occurred during the tracing of the system, while figuring out which are represntative of the habits and patterns of said system. This discovered information is used to produce a benchmark, either by running a repeat of the captured traces or by using synthetic benchmark created from the trends detailed within the captured tracing data~\cite{Anderson2004}. -As seen in previous trace work done [Leung et al, Ellard et al, Roselli et al], the general perceptions of how computer systems are being used versus their initial purpose have allowed for great strides in eliminating actual bottlenecks rather than spending unnecessary time working on imagined bottlenecks. Leung's \textit{et. al.} work led to a series of obervations, from the fact that files are rarely re-opened to finding that read-write access patterns are more frequent ~\cite{Leung2008}. Without illumination of these underlying actions (e.g. read-write ratios, file death rates, file access rates) these issues can not be readily tackled. +As seen in previous trace work done [Leung et al, Ellard et al, Roselli et al], the general perceptions of how computer systems are being used versus their initial purpose have allowed for great strides in eliminating actual bottlenecks rather than spending unnecessary time working on imagined bottlenecks. Leung's \textit{et. al.} work led to a series of obervations, from the fact that files are rarely re-opened to finding that read-write access patterns are more frequent ~\cite{Leung2008}. Without illumination of these underlying actions (e.g. read-write ratios, file death rates, file access rates) these issues can not be readily \& effectively tackled. \\ %\textbf{NOT SURE IF KEEP OR NEEDED} I/O benchmarking, the process of comparing I/O systems by subjecting them to known workloads, is a widespread pratice in the storage industry and serves as the basis for purchasing decisions, performance tuning studies, and marketing campaigns ~\cite{Anderson2004}. @@ -108,7 +108,7 @@ Tracing collection and analysis has proved its worth in time from previous studi Previous tracing work has shown that one of the largest \& broadest hurdles to tackle is that traces (and benchmarks) must be tailored (to every extent) to the system being tested. There are always some generalizations taken into account but these generalizations can also be a major source of error~\cite{Ellard2003,EllardLedlie2003,Anderson2004,Orosz2013,Dabir2008,Skopko2012,Vogels1999,Traeger2008,Ruemmler1993}. To produce a benchmark with high fidelity one needs to understand not only the technology being used but how it is being implemented within the system being traced \& benchmarked~\cite{Roselli2000,Traeger2008,Ruemmler1993}. All of these aspects will lend to the behavior of the system; from timing \& resource elements to how the managing software governs actions~\cite{Ellard2003,EllardLedlie2003,Douceur1999}. Further more, in pursuing this work one may find unexpected results and learn new things through examination~\cite{Leung2008,Ellard2003,Roselli2000}. \\ These studies are required in order to evaluate the development of technologies and methodologies along with furthering knowledge of different system aspects and capabilities. As has been pointed out by past work, the design of systems is usually guided by an understanding of the file system workloads and user behavior~\cite{Leung2008}. It is for that reason that new studies are constantly performed by the science community, from large scale studies to individual protocol studies~\cite{Leung2008,Ellard2003,Anderson2004,Roselli2000,Vogels1999}. Even within these studies, the information gleaned is only as meaningful as the considerations of how the data is handled. \\ -A detailed overview of the tracings and analysis system can be seen in section ~\ref{Trace Collection}. The hope is to further the progress made with benchmarks \& tracing in the hope that it too will lend to improving and deepening the knowledge and understanding of these systems so that as a result the technology and methodology is bettered as a whole. +A detailed overview of the tracings and analysis system can be seen in section ~\ref{Trace Collection}. \subsection{Contributions} \label{Contributions} @@ -116,7 +116,7 @@ Out of all the elements that make up the tracing system used for this research, % PF\_RING section The addition of PF\_RING lends to the tracing system by minimizing the copying of packets which, in turn, allows for more accurate timestamping of incoming traffic packets being captured ~\cite{Orosz2013,Skopko2012,PFRING,PFRINGMan}. PF\_RING acts as a kernel module which allows for kernel-based capture and sampling that limits packet loss and timestamping overhead leading to faster packet capture while efficiently preserving CPU cycles ~\cite{PFRING}. This aids in minimizing packet loss/timestamping issues by not passing packets through the kernel data structures~\cite{PFRINGMan}. The other reason PF\_RING is instrumental is that it functions with the 10Gb/s hardware that was installed into the Trace1 server; allowing for full throughput from the network tap on the UITS system. \\ % DataSeries + Code section -The tweaks and code additions to the existing DataSeries work are filtering for specific CIFS/SMB protocol fields along with the writing of analysis tools to parse and dissect the captured packets. Specific fields were chosen to be the interesting fields to be kept for analysis. It should be noted that this was done arbitrarily and changes/additions have been made as the value of certain fields are determined to be worth examining. \textbf{ADD BIT ABOUT FIELDS' VALUE AND WORTH/IMPACT}. The code written for analysis of the captured DataSeries format packets focuses on read/write events, ID tracking (PID/MID/TID/UID), and OpLock information. The future vision for this information is to combine ID tracking with the OpLock information in order to track resource sharing of the different clients on the network. +The tweaks and code additions to the existing DataSeries work are filtering for specific CIFS/SMB protocol fields along with the writing of analysis tools to parse and dissect the captured packets. Interesting fields were chosen to be kept for analysis. It should be noted that this was done arbitrarily and changes/additions have been made as the value of certain fields are determined to be worth examining. The code written for analysis of the captured DataSeries format packets focuses on read/write events, ID tracking (PID/MID/TID/UID), and OpLock information. The future vision for this information is to combine ID tracking with the OpLock information in order to track resource sharing of the different clients on the network. %\subsection{My Work} %\label{My Work} @@ -146,14 +146,19 @@ The trace analysis in performed by an analysis module code that both processes t \label{ID Tracking} \textit{\textbf{Note:} It should be noted that this system is currently not implemented due to the poorly written way in which it was implemented. The new concept for this ID tracking is to combine the MID/PID/TID/UID tuple tracking along with FID tracking to know what files are opened, by whom (i.e. tuple identification), and tracking of file sizes for files that are created with an initial size of zero. The purpose for this tracking will be to track the habits of individual users. While initially simplistic (drawing a connection between FIDs and tuple IDs) this aspect of the research will be developed in future work to be move inclusive.} \\ All comands sent over the network are coupled to an identifying MID/PID/TID/UID tuple. Since the only commands being examined are read or write commands, the identifying characteristic distinguishing a request command packet from a reponse command packet is the addition of an FID field with the sent packet. It is examination of the packets for this FID field that allows the analysis code to distinguish between request \& response command pakets. The pairing is done by examining the identifying tuple and assuming that each tuple-identified system will only send one command at a time (awaiting a response before sending the next command of that same type). -\\Following these process IDs is as a way to check for intercommunication between two or more processes. In particular, we examine the compute time \& I/O (input/output) time (i.e. time spent in communication; between information arrivals). This is done by examining the inter-arrival times (IAT) between the server \& the client. This is interesting because this information will give us a realistic sense of the data transit time of the network connections being used (e.g. ethernet, firewire, fibre, etc.). Other pertinent information would be how often the client makes requests \& how often this event occurs per client process ID, identifiable by their PID/MID tuple. One could also track the amount of sharing that is occurring between users. The PID is the process identifier and the MID is the multiplex identifier, which is set by the client and is to be used for identifying groups of commands belonging to the same logical thread of operation on the client node. +\\One could also track the amount of sharing that is occurring between users. The PID is the process identifier and the MID is the multiplex identifier, which is set by the client and is to be used for identifying groups of commands belonging to the same logical thread of operation on the client node. \\The per client process ID can be used to map the activity of given programs, thus allowing for finer granularity in the produced benchmark (e.g. control down to process types ran by individual client levels). Other features of interest are the time between an open \& close, or how many opens/closes occurred in a window (e.g. a period of time). This information could be used as a gauge of current day trends in filesystem usage \& its consequent taxation on the surrounding network. It would also allow for greater insight on the r/w habits of users on a network along with a rough comparison between other registered events that occur on the network. Lastly, though no less important, it would allow us to look at how many occurrences there are of shared files between different users. One must note that the type of sharing may differ and there can be an issue of resource locking (e.g. shared files) that needs to be taken into account. This is preliminarily addressed by monitoring any oplock flags that are sent for read \& writes. This information also helps provide a preliminary mapping of how the network is used and what sort of traffic populates the communication. +\begin{figure*} + \includegraphics[width=\textwidth,height=4cm]{./images/SMBHeader.jpg} + \caption{SMB Header Specification.} + \label{fig:SMBSpec} +\end{figure*} + \subsection{SMB} \label{SMB} Server Message Block (SMB) is the modern dialect of Common Internet File System (CIFS). The most important aspect of SAMBA (e.g. SMB) is that it is a stateful protocol , i.e. one where the information being sent via SMB has identifying fields that allow for process ID tracking. -\\The structure for sending message payloads in SMB is as follows: each SMB message is split into three blocks. The first block is a fixed-length SMB header. The second block is made up of two variable-length blocks called the SMB parameters. The third block is made up of the SMB data. Depending on the transaction occurring these different blocks are used in different manners. The purpose of the SMB header is particularly important because the header identifies the message as an SMB message payload~\cite{MS-CIFS}. When used in a response message the header also includes status information that indicates whether and how the command succeeded or failed. The most important aspects of the SMB header, which the tracing system constantly examines, are the PID/MID tuple (for the purpose of identifying a client/server) and the commands value which is passed (notifying our tracing system of the actions taking place on the network). It is through this command field that the process ID tracking system is able to follow the different commands (read/write/general event) that occur and try to find patterns in these network communications. -\\\textit{\textbf{Expectations}}: SMB will be heavily used by students to access their network accounts from any networked computer, along with network access to shared file systems and connected printers. Oplocks will be in heavy use and cause a slowdown of the system for multiuser shared storage space. Authentication of network computers could bottleneck during moments of high traffic (e.g. students all logging in for a class). +\\The structure for sending message payloads in SMB is as follows: each SMB message is split into three blocks. The first block is a fixed-length SMB header. The second block is made up of two variable-length blocks called the SMB parameters. The third block is made up of the SMB data. Depending on the transaction occurring these different blocks are used in different manners. The purpose of the SMB header is particularly important because the header identifies the message as an SMB message payload~\cite{MS-CIFS}. The most important aspects of the SMB header, which the tracing system constantly examines, are the PID/MID tuple (for the purpose of identifying a client/server) and the commands value which is passed (notifying our tracing system of the actions taking place on the network). It is through this command field that the process ID tracking system is able to follow the different commands (read/write/general event) that occur and try to find patterns in these network communications. \subsection{System Limitations and Challenges} \label{System Limitations and Challenges} @@ -162,64 +167,36 @@ An other concern was whether or not the system would be able to function optimal %About Challenges of system While the limitations of the system were concerns, there were other challenges that were tackled in the development of this research. -One glaring challenge with building this tracing system was using code written by others; tshark \& DataSeries. While these programs are used within the tracing structure there are some issues when working with them. These issues ranged from data type limitations of the code to hash value \& checksum miscalculations due to encryption of specific fields/data. Attempt was made to dig and correct these issues, but they were so inherrent to the code being worked with that hacks and workaround were developed to minimize their effect. Other challenges centralize around selection, intrepretations and distribution scope of the data collected. Which fields should be filtered out from the original packet capture? What data is most prophetic to the form and function of the network being traced? What should be the scope, with respect to time, of the data being examined? Where will the most interesting information appear? As each obstacle was tackled, new information and ways of examining the data reveal themselves and with each development different alterations \& corrections are made. +One glaring challenge with building this tracing system was using code written by others; tshark \& DataSeries. While these programs are used within the tracing structure there are some issues when working with them. These issues were unfortunately so inherent to the code that hacks and workarounds were developed to minimize their effect. Other challenges centralize around selection, interpretations and distribution scope of the data collected. A researcher must select which information is important or which information will give the greatest insight to the workings on the network. Too little information can lead to incorrect conclusions being drawn about the workings on the system, while too much information (and not knowing which information is pertinent) can lead to erroneous conclusions as well. There is a need to strike a balance between what information is important enough to capture (so as not to slow down the capturing process through needless processing) while still obtaining enough information to acquire the bigger picture of what is going on. Every step of the tracing process requires a degree of human input to decide what network information will end up providing the most complete picture of the network communication and how to interpret that data into meaningful graphs and tables. This can lead to either finds around the focus of the work being done, or even lead to discoveries of other phenomena that end up having far more impact on the overall performance of the system~\cite{Ellard2003}. %About interpretation of data To some degree these interpretations are easy to make (e.g. file system behavior \& user behavior~\cite{Leung2008}) while others are more complicated (e.g. temporal scaling of occurances of read/write), but in all scenarios there is still the requirment for human interpretation of the data. While having humans do the interpretations can be adventageous, a lack of all the "background" information can also lead to incorrectly interpreting the information. -%About scope of interpretation (affect of time on data seen) -A researcher must select which information is important or which information will give the greatest insight to the workings on the network. Too little information can lead to incorrect conclusions being drawn about the workings on the system, while too much information (and not knowing which information is pertinent) can lead to erroneous conclusions as well. There is a need to strike a balance between what information is important enough to capture (so as not to slow down the capturing process through needless processing) while still obtaining enough information to acquire the bigger picture of what is going on. Every step of the tracing process requires a degree of human input to decide what network information will end up providing the most complete picture of the network communication and how to interpret that data into meaningful graphs and tables. This can lead to either finds around the focus of the work being done, or even lead to discoveries of other phenomena that end up having far more impact on the overall performance of the system~\cite{Ellard2003}. - -Even when all the information is collected and the most important data has been selected, there is still the issue of what lens should be used to view this information. Because the data being collected is from an active network, there will be differing activity depending on the time of day, week, and scholastic year. For example, although the first week or so of the year may contain a lot of traffic, this does not mean that trends of that period of time will occur for every week of the year (except perhaps the final week of the semester). The trends and habits of the network will change based on the time of year, time of day, and even depend on the exam schedule. Truly interesting examination of data requires looking at all different periods of time to see how all these factors play into the communications of the network. +Because the data being collected is from an active network, there will be differing activity depending on the time of day, week, and scholastic year. For example, although the first week or so of the year may contain a lot of traffic, this does not mean that trends of that period of time will occur for every week of the year (except perhaps the final week of the semester). The trends and habits of the network will change based on the time of year, time of day, and even depend on the exam schedule. Truly interesting examination of data requires looking at all different periods of time to see how all these factors play into the communications of the network. \section{Trace Analysis} \label{Trace Analysis} -The trace analysis is performed by an AnalysisModule code that both processes the ds-files for extraction of information to an event\_data structure and also outputs meaningful information (such as the IAT times between request and response packets) to a file that can be used for further analysis. - -%\subsection{Other (e.g. HTML)} -%\label{Other (e.g. HTML)} -% -%\subsection{Process ID Tracking} -%\label{Process ID Tracking} -% -%\begin{figure}[htbp] -%\begin{centering} -%\epsfig{file=communications_sketch, width=2.50in} -%\small\itshape -%\caption{\small\itshape Rough Sketch of Communication} -%\label{fig-communication} -%\end{centering} -%\end{figure} -% -%Figure~\ref{fig-communication} shows a rough sketch of communication between a client \& server. The general order that constitutes a full tracking is as follows: (client) computation [process to filesystem], (client) communication [SMB protocol used to send data client→server], (server) timestamping + service [server gets data, logs it, performs service], (server) communication [SMB data send server→client], (client) next computation. -% -%Currently the focus of process ID tracking is to see the number of reads, writes and events that occur due to the actions of clients on the network. This is done by using a tuple of the PID \& MID fields which allows for the identification of client. Since these values are unique and \textbf{MUST} be sent with each packet, this tuple is used as the key for the unordered map that is used to track this information. The structure is as follows: the tuple functions as the key for the pairing of the identifying tuple \& corresponding event\_data structure; which is used to house pertinent information about reads/writes/events. The information stored in the structure is the last time a read/write/event occurred, the total IAT of the observed read/write/events, and the total number of reads/writes/events that have occurred for the identified tuple. The purpose for tracking this information is to profile the read/write “habits” of the users on the network as well as comparing this information against the general events’ inter-arrival times, thus allowing one to see if the read \& write events are being processed differently (e.g. longer or shorter IATs) than the rest of the events occurring on the network. -% -%One should note that there are separate purposes to the PID/MID tuple from the PID/MID/TID/UID tuple. The first tuple (2-tuple) is used to uniquely identify groups of commands belonging to the same logical thread of operation on the client node, while the latter tuple (4-tuple) allows for unique identification for request \& responses that are part of the same transaction. While the PID/MID tuple is mainly what we are interested in, since this allows the following of a single logical thread, there is some interest in making use of the TID/UID tuple because this would allow us to count the number of transactions that occur in a single logical thread. This information could provide interesting information on how the computer systems on the network may be deciding to handle/send commands over the network; e.g. sending multiple commands per transaction, multiple packet commands per transaction, etc. -% -%\subsubsection{event\_data Structure Tracking} -%\label{event_data Structure Tracking} -%The purpose of the event\_data structure is to maintain a list of the interesting information associated with each PID/MID/TID/UID tuple seen on the network. It is through this structure that the read \& write times, IATs, and even number of occurances are tracked, along with the request/response IAT pairings. In this manner each tuple has the following information tracked, and both the packet processing is performed and the meaningful data is output from the AnalysisModule code. \textit{\textbf{ADD LIST OF event\_data INFORMATION HERE}}. Although there is a large number of aspects that can be examined when dealing with all of this network information, the current focus of this paper is to examine the possible read/write commands that can occur in via SMB protcols and the IAT times of the request and response packets for these commands. \textit{\textbf{Note:}} Eventually the addition of resource locks WILL be included because it is through this information that we can gain any sort of idea as to the interaction between users/other programs with the resources on the network. +The trace analysis is performed by an AnalysisModule code that both processes the DataSeries(ds) files for extraction of information to an event\_data structure and also outputs meaningful information to a file that can be used for further analysis. \subsection{System Information and Predictions} \label{System Information and Predictions} -It is important to detail out any benchmakring system so that when the results of one's research are being examined, they can be properly understood with the correct background information and understanding that lead the originating author to those results~\cite{Traeger2008}. The following is an explination the UITS system from which trace1 pulls it's packet traffic information along with predicitions of how the data will look along with the reasoning behind the shape of the information. +It is important to detail out any benchmakring system so that when the results of one's research are being examined, they can be properly understood with the correct background information and understanding that lead the originating author to those results~\cite{Traeger2008}. The following is an explination the UITS system from which Trace1 pulls it's packet traffic information along with predicitions of how the data will look along with the reasoning behind the shape of the information. The UITS system consisnts of five Microsoft file server cluster nodes. These blade servers are used to host home directories for all UConn users within a list of 88 departments. These home directories are used to provide personal drive share space to facultiy, staff and students, along with at lest one small group of users. Each server is capable of handling 1Gb/s of traffic in each direction (e.g. outbound and inbound traffic). All together the five blade server system can in theory handle 10Gb/s of recieving and tranmitting data. Some of these blade servers have local storage but the majority do not have any. To the understanding of this paper, the blade servers are purposed purely for dealing with incoming traffic to the SAN storage nodes that sit behind them. This system does not currently implement load balancing, instead the servers are set up to spread the traffic load among four of the active cluster nodes while the fifth node is passive and purposed to take over in the case that any of the other nodes go down (e.g. become inoperable or crash). \\ From the information at hand I theorized the following behaviors and attributes that would be seen in the system. First are the predictions based on what was learned from talking to people within UITS, after that are my general predictions. -From conversations with UITS, the understanding of the file server system behavior is that there are spikes of traffic that tend to happen during the night time. Our assumption is that the majority of this traffic will occur between 2am and 6am because this is when backups occur to the SAN system. It is important to note that, however, it is not expected that we would see any of this traffic as the protocol used is not the SMB/CIFS protocol that is being analyzed by this paper. The reasoning for this is that this traffic would be encrypted, therefore this traffic would appear as some other protocol. Further more, any traffic that does occur during the duration of "day time hours" (i.e. 9am to 5pm) would be soley due to the actions taken by the users of this system (e.g facutly, staff, students). If there is an automatic backup manager we would expect to see traffic caused by it pulling cached information from the machine of users across the network. \\ -Assumptions: -\begin{itemize} -\item Some backup traffic will be seen because traffic will be generated as the data being stored using this "oneline storage" is backed up to the SAN system. Note: Any traffic past moving the data to the SAN system will \textbf{not} be seen. This would be because the traffic would show up as an other protocol. -\item All backup will be performed late night/early morning (e.g. 11pm-5am) -\item One general assumption is that these blade servers are "rock solid" and therefore should \textbf{not} ever go down. If this is the case then the expectation is that we should see at most a transfer rate of 8Gb/s since the fifth server will not be in operation. If we do find that there is a greater rate of transfer of data, then this means that the fifth server is actually helping with the traffic, not just acting as a backup in the case that one of the other blade servers crashes or "goes down". -\end{itemize} +From conversations with UITS, the understanding of the file server system behavior is that there are spikes of traffic that tend to happen during the night time. Our assumption is that the majority of this traffic will occur between 2am and 6am because this is when backups occur to the SAN system. It is important to note that, however, it is not expected that we would see any of this traffic as the traffic would be encrypted; therefore the protocol used is not the SMB/CIFS protocol that is being analyzed by this paper. +Furthermore, any traffic that does occur during the duration of "day time hours" (i.e. 9am to 5pm) would be solely due to the actions taken by the users of this system (e.g faculty, staff, students). If there is an automatic backup manager we would expect to see traffic caused by it pulling cached information from the machine of users across the network. \\ +One general assumption is that these blade servers should not ever fail, thus the greatest transfer rate observed should be 8Gb/s (i.e. 1Gb/s per server). If there is a greater rate than that this means that the fifth server is aiding rather than acting as backup should another server fail. + +\textit{\textbf{SMB Expectations}}: SMB will be heavily used by students to access their network accounts from any networked computer, along with network access to shared file systems and connected printers. Oplocks will be in heavy use and cause a slowdown of the system for multiuser shared storage space. Authentication of network computers could bottleneck during moments of high traffic (e.g. students all logging in for a class). + +Expectation would dicatate that there will be spikes of activity the week leading up the beginning of the semester, the first few days after the semester starts, and during any moments of maintenance that make use of the network; valleys/low points of activity right before (2 days leading up to) the beginning of the semester, holidays (9/1 - labor day), and general progression of less traffic as the semester progresses. Spikes would be expected to be moments either when new files are being added onto the system (mainly writes), or when a large number of users are attempting to access files (mainly reads). Ditches would most likely show periods of low to no activity, although both the IO \& Bytes graphs should be examined to confirm inactivity rather than something else. \subsection{Run Patterns} \label{Run Patterns} -Interpreting the data collected is done by producing three separate graphs showing three important areas of interest for being able to understand the traffic being examined. These areas are: \textbf{1)} the total number of read/write IO events, \textbf{2)} the occurrrences of different file sizes being read/written, \textbf{3)} the total number of bytes (combined read \& write) that are communicated over the traffic captured by the trace1 system. The reason for needing these three areas of information is that combined together one is able to better interpret all the collected and dissected information. By comparing the byte traffic to the IO information this is how we are able to tell not only when the times of greatest times of traffic are but which types of IO interactions dominate these periods. It should be noted that unfortunately the analysis program does not include a granularity to allow knowledge on whether the read, or write, events are responsible for the most data transfer (via communication) but that is one of the many future additions to this work. From early examinations the data leads us to interpret that the majority of reads are due to client exploring the shared drives \& queries about directories and directory contents, along with users copying large directories of data (most likely for their own purposes). The file size information allows for interpretation of the size of the information that is being passed between the UITS servers and clients (e.g. buffer sizes). Although the granualrity for this information is far corser (24 hours versus the 15 minute time window) it still shows which variations of data lengths were most encountered over the period of the given day. This information coupled with the byte and IO information reflects a priliminary protrait of how the UITS file server system is used, which can be compared to the internal network information that the UITS department keeps for their own maintenance and troubleshooting purposes. The hope is to essentially replicate the data that UITS keeps for their own records. +Interpreting the data collected is done by producing three separate graphs showing three important areas of interest for being able to understand the traffic being examined. These areas are: \textbf{1)} the total number of read/write IO events, \textbf{2)} the occurrrences of different file sizes being read/written, \textbf{3)} the total number of bytes (combined read \& write) that are communicated over the traffic captured by the trace1 system. The reason for needing these three areas of information is that combined together one is able to better interpret all the collected information. By comparing the byte traffic to the IO information this is how we are able to tell not only when the times of greatest traffic are but which types of IO interactions dominate these periods. It should be noted that unfortunately the analysis program does not include a granularity to allow knowledge on whether the read, or write, events are responsible for the most data transfer (via communication) but that is one of the many future additions to this work. From early examinations the data leads us to interpret that the majority of reads are due to client exploring the shared drives \& queries about directories and directory contents, along with users copying large directories of data (most likely for their own purposes). The file size information allows for interpretation of the size of the information that is being passed between the UITS servers and clients (e.g. buffer sizes). Although the granualrity for this information is far corser (24 hours versus the tracked 15 minute time window) it still shows which variations of data lengths were most encountered over the period of the given day. This information coupled with the byte and IO information reflects a priliminary protrait of how the UITS file server system is used, which can be compared to the internal network information that the UITS department keeps for their own maintenance and troubleshooting purposes. The hope is to essentially replicate the data that UITS keeps for their own records. \subsection{Locating Performance Bottlenecks} \label{Locating Performance Bottlenecks} @@ -229,8 +206,6 @@ When examinging the data produced from this research, one has to look for a limi \label{Intuition Confirm/Change} In order ot interpret the data being analyzed and dissected, the first step was to understand how to pair the byte throughput and IO event frequency into an understanding of the system. This was achieved by including examination of the data relative to the surrounding behavior. Pairing the information in this manner shows not only the bytes \& IO behavior but preliminary understanding of how much throughput is being generated by each IO event; giving an outline of client behvaior on the system.\\ -Expectation would dicatate that there will be spikes of activity the week leading up the beginning of the semester, the first few days after the semester starts, and during any moments of maintenance that make use of the network; valleys/low points of activity right before (2 days leading up to) the beginning of the semester, holidays (9/1 - labor day), and general progression of less traffic as the semester progresses. Spikes would be expected to be moments either when new files are being added onto the system (mainly writes), or when a large number of users are attempting to access files (mainly reads). Ditches would most likely show periods of low to no activity, although both the IO \& Bytes graphs should be examined to confirm inactivity rather than something else. - \begin{figure*} \includegraphics[width=\textwidth,height=4cm]{./images/IOWeek.png} \caption{R/W IO over three weeks.} @@ -257,36 +232,21 @@ Expectation would dicatate that there will be spikes of activity the week leadin \subsection{Characterizations of Different Behaviors} \label{Characterizations of Different Behaviors} -Different bahvioral situations (seen using pairing of bytes and IO graphs/behavior) are imaged as follows: +Different behavioral situations (seen using pairing of bytes and IO graphs/behavior) are imaged as follows: \textit{Assumptions:} \begin{itemize} \item Bytes - Amount of data being passed \item IOs - Number of interactions occurring on network \end{itemize} -\textit{Combinations:} -\begin{itemize} -\item Large number of IO \& Small number of Bytes: - \begin{itemize} - \item Small number of bytes being pushed over the network \textbf{but} there are a large number of IO events being processed. If bottlenecks are seen they will most likely be due to management (software?) attempting to deal with the large volume of client interactions. Race conditions and concerns about "age of request" could be an issue, but ideally OpLocks and SMB's internal management would tackle these issues. - \end{itemize} -\item Small number of IO \& Large number of Bytes: - \begin{itemize} - \item Few clients interacting \textbf{but} large number of bytes being pushed across the network. If bottlenecks are seen they will most likely be due to physical limitations of the system; wires, switches, etc. - \end{itemize} -\item Similar number of IO \& number of Bytes: - \begin{itemize} - \item Both high - High user traffic and throughput. System is taxed in both aspects; bottlenecks could come from management of clients or from physical limitations of network hardware (e.g. wires). - \item Both low - Low user traffic and throughput. System is at relaxed state; no bottlenecks should be seen. - \end{itemize} -\end{itemize} +Three scenarios observed were (1) a large number of IOs and small number of bytes, (2) a small number of IOs and large number of bytes, and (3) similar number of IOs and bytes transferred. This data is interpreted as follows, the number of bytes directly correlates to the data being transferred over the wire and the number of IOs correlates to the number of clients interacting over the network. Scenario (1) has high client interaction where expected bottlenecks would be due to management software dealing with the large volume of client interactions. Race conditions and age of requests could be an issue, but ideally OpLocks and SMB’s internal management tackle these issues. Scenario (2) has high traffic throughput, where any bottleneck would most likely be due to physical limitations of the system; wires, switches, etc. The last scenario (3) has equal interactions of each, where if both are high the system will taxed and bottlenecks could occur in multiple aspects or both are low and the system will be in a relaxed state. \textbf{ADD FINDINGS FROM BUFFER SIZES; e.g. ~4K size.} %Test test of ref~\ref{fig:IOWeek} but not of the other ref \ref{fig:BytesWeek}. \section{Conclusion} \label{Conclusion} \textit{Do the results show a continuation in the trend of traditional computer science workloads?} -On the outset of this work it was believed that the data collected and analyzed would follow similar behavior patterns seen in previous papers ~\cite{Douceur1999, RuemmlerWilkes1993, Bolosky2007, EllardLedlie2003}. The expectation is that certain aspect of the data, such as transfer/buffer sizes, will produce a bell-shape and be centralized around a larger size than previous papers' findings. The number of I/O operations was expected to peak during noctural hours and fall during day time hours. On top of that the expectation is that a greater number of reads will be seen over the course of a day, where the majority of writes will occur near the expected times of UITS' backup (e.g. 2am to 6am). Granted, one must recall that the expectation is that any backup traffic that is seen will be due to a fetching of user's cahces inorder to preserve fidelity of any shared data.\\ -One oddity was that during the day one would see a greater increase in writes instead of reads. The first assumption was that this is due to the system and how users interact with everything. -I believe that the greater number of writes comes from students doing intro work for different classes in which students are constantly saving their work while reading instructions from a single source. The early traffic is most likely due to professors preparing for classes. One must also recall that this data itself has limited interpretation because only a small three week windows of infomration is being examined. A better, and far more complete, image can be constructed using data captured from the following months, or more ideally, from an entire year's worth of data. An other limitation of the results is the scope of the analysis is curbed and does not yet fully dissect all of the fields being passed in network communication. +On the outset of this work it was believed that the data collected and analyzed would follow similar behavior patterns seen in previous papers ~\cite{Douceur1999, RuemmlerWilkes1993, Bolosky2007, EllardLedlie2003}. The expectation is that certain aspect of the data, such as transfer/buffer sizes, will produce a bell-shape and be centralized around a larger size than previous papers' findings. The number of I/O operations was expected to peak during nocturnal hours and fall during day time hours. On top of that the expectation is that a greater number of reads will be seen over the course of a day, where the majority of writes will occur near the expected times of UITS' backup (e.g. 2am to 6am). Granted, one must recall that the expectation is that any backup traffic that is seen will be due to a fetching of user's caches in order to preserve fidelity of any shared data.\\ +One oddity was that during the day one would see a greater increase in writes instead of reads. The first assumption was that this is due to the system and how users interact with everything. +I believe that the greater number of writes comes from students doing intro work for different classes in which students are constantly saving their work while reading instructions from a single source. The early traffic is most likely due to professors preparing for classes. One must also recall that this data itself has limited interpretation because only a small three week windows of information is being examined. A better, and far more complete, image can be constructed using data captured from the following months, or more ideally, from an entire year's worth of data. Another limitation of the results is the scope of the analysis is curbed and does not yet fully dissect all of the fields being passed in network communication. The future work of this project would be to \begin{itemize} \item 1. Complete the dissection analysis to include all captured fields from the originating pcap files. @@ -303,6 +263,8 @@ The future work of this project would be to \end{itemize} \end{itemize} +The hope is to further the progress made with benchmarks \& tracing in the hope that it too will lend to improving and deepening the knowledge and understanding of these systems so that as a result the technology and methodology is bettered as a whole. + %references section %\bibliographystyle{plain} %\bibliography{body}