diff --git a/sigproc.bib b/sigproc.bib index 95501e9..d975791 100644 --- a/sigproc.bib +++ b/sigproc.bib @@ -552,16 +552,20 @@ year={2016} publisher={North University of Baia Mare} } -@article{seltzer2003nfs, +@inproceedings{seltzer2003nfs, title="{NFS} Tricks and Benchmarking Traps", author={Seltzer, Margo and Ellard, Daniel}, + booktitle={Proceedings of the FREENIX track, USENIX Annual Technical Conference}, year={2003}, publisher={USENIX Association} } -@article{malkani2003passive, +@inproceedings{malkani2003passive, title={Passive {NFS} tracing of email and research workloads}, author={Malkani, Pia and Ellard, Daniel and Ledlie, Jonathan and Seltzer, Margo}, + booktitle={Proceedings of the USENIX Conference on File and Storage Technologies}, + month="March", + pages={203-Ð216}, year={2003}, publisher={USENIX Association} } @@ -663,7 +667,7 @@ year={2016} } @article{ousterhout1985trace, - title={A trace-driven analysis of the UNIX 4.2 BSD file system}, + title={A trace-driven analysis of the {UNIX 4.2 BSD} file system}, author={Ousterhout, John K and Da Costa, Herve and Harrison, David and Kunze, John A and Kupfer, Mike and Thompson, James G}, journal={ACM SIGOPS Operating Systems Review}, volume={19}, @@ -674,7 +678,7 @@ year={2016} } @inproceedings{ramakrishnan1992analysis, - title={Analysis of file I/O traces in commercial computing environments}, + title={Analysis of file {I/O} traces in commercial computing environments}, author={Ramakrishnan, KK and Biswas, Prabuddha and Karedla, Ramakrishna}, booktitle={ACM SIGMETRICS Performance Evaluation Review}, volume={20}, @@ -744,16 +748,16 @@ year={2016} } @misc{SMB1Spec, - author="Microsoft Corporation", - title="[{MS-SMB}]Server Message Block ({SMB})Protocol", + author="{Microsoft Corporation}", + title="[{MS-SMB}]: Server Message Block ({SMB}) Protocol", url="https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-smb/f210069c-7086-4dc2-885e-861d837df688", month=sep, year="2018" } @misc{SMB2Spec, - author="Microsoft Corporation", - title="[{MS-SMB2}]Server Message Block ({SMB})Protocol Versions 2 and 3", + author="{Microsoft Corporation}", + title="[{MS-SMB2}]: Server Message Block ({SMB}) Protocol Versions 2 and 3", url="https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-smb2/5606ad47-5ee0-437a-817e-70c366052962", month=sep, year="2019" diff --git a/trackingPaper.tex b/trackingPaper.tex index 9b8c659..230c659 100644 --- a/trackingPaper.tex +++ b/trackingPaper.tex @@ -128,10 +128,10 @@ \begin{abstract} Storage system traces are important for examining real-world applications, studying potential bottlenecks, as well as driving benchmarks in the evaluation of new system designs. While file system traces have been well-studied in earlier work, it has been some time since the last examination of the SMB network file system. -The purpose of this work is to continue previous SMB studies to better understand the use of the protocol in a real-world production system in use at \textcolor{red}{the University of Connecticut}. +The purpose of this work is to continue previous SMB studies to better understand the use of the protocol in a real-world production system in use at \textcolor{green}{a major research university}.%\textcolor{red}{the University of Connecticut}. The main contribution of our work is the exploration of I/O behavior in modern file system workloads as well as new examinations of the inter-arrival times and run times for I/O events. We further investigate if the recent standard models for traffic remain accurate. -Our findings reveal interesting data relating to the number of read and write events. We notice that the number of read and write events is significantly less than creates and \textcolor{blue}{that average number of bytes exchanged per I/O has reduced.} +Our findings reveal interesting data relating to the number of read and write events. We notice that the number of read and write events is significantly less than creates and \textcolor{green}{the} \textcolor{blue}{average number of bytes exchanged per I/O} \textcolor{green}{is much smaller than what has been seen in previous studies}. %the average of bytes transferred over the wire is much smaller than what has been seen in previous studies. Furthermore we find an increase in the use of metadata for overall network communication that can be taken advantage of through the use of smart storage devices. \end{abstract} @@ -162,7 +162,9 @@ Since an SMB-based trace study has not been undertaken recently, we took a look at its current implementation and use in a large university network. %Due to the sensitivity of the captured information, we ensure that all sensitive information is hashed and that the original network captures are not saved. -Our study is based on network packet traces collected on \textcolor{red}{the University of Connecticut}'s centralized storage facility over a period of three weeks in May 2019. This trace-driven analysis can help in the design of future storage products as well as providing data for future performance benchmarks. +Our study is based on network packet traces collected on \textcolor{green}{a major research university}'s +%\textcolor{red}{the University of Connecticut}'s +centralized storage facility over a period of three weeks in May 2019. This trace-driven analysis can help in the design of future storage products as well as providing data for future performance benchmarks. %Benchmarks are important for the purpose of developing technologies as well as taking accurate metrics. The reasoning behind this tracing capture work is to eventually better develop accurate benchmarks for network protocol evaluation. Benchmarks allow for the stress testing of various aspects of a system (e.g. network, single system). Aggregate data analysis collected from traces can lead to the development of synthetic benchmarks. Traces can also expose systems patterns that can also be reflected in synthetic benchmarks. Finally, the traces themselves can drive system simulations that can be used to evaluate prospective storage architectures. @@ -182,7 +184,9 @@ Benchmarks allow for the stress testing of various aspects of a system (e.g. net % \end{enumerate} %\end{itemize} -We created a new tracing system to collect data from the \textcolor{red}{UConn} storage network system. The tracing system was built around the high-speed PF\_RING packet capture system and required the use of proper hardware and software to handle incoming data\textcolor{blue}{; however interaction with later third-party code did require re-design for processing of the information}. We also created a new trace capture format derived on the DataSeries structured data format developed by HP~\cite{DataSeries}. +We created a new tracing system to collect data from the \textcolor{green}{university} +%\textcolor{red}{UConn} +storage network system. The tracing system was built around the high-speed PF\_RING packet capture system and required the use of proper hardware and software to handle incoming data\textcolor{blue}{; however interaction with later third-party code did require re-design for processing of the information}. We also created a new trace capture format derived on the DataSeries structured data format developed by HP~\cite{DataSeries}. % PF\_RING section %The addition of PF\_RING lends to the tracing system by minimizing the copying of packets which, in turn, allows for more accurate timestamping of incoming traffic packets being captured ~\cite{Orosz2013,skopko2012loss,pfringWebsite,PFRINGMan}. PF\_RING acts as a kernel module that aids in minimizing packet loss/timestamping issues by not passing packets through the kernel data structures~\cite{PFRINGMan}. @@ -240,7 +244,7 @@ The 2004 paper by Anderson et al.~~\cite{anderson2004buttress} has the following %I/O benchmarking widespread practice in storage industry and serves as basis for purchasing decisions, performance tuning studies and marketing campaigns. Issues of inaccuracies in scheduling I/O can result in as much as a factor 3.5 difference in measured response time and factor of 26 in measured queue sizes. These inaccuracies pose too much of an issue to ignore. -Orosz and Skopko examined the effect of the kernel on packet loss in their 2013 paper~\cite{Orosz2013}. Their work showed that when taking network measurements the precision of the timestamping of packets is a more important criterion than low clock offset, especially when measuring packet inter-arrival times and round-trip delays at a single point of the network. One \textcolor{blue}{solution for network capture is the tool Dumpcap, however the} concern \textcolor{blue}{with} Dumpcap is \textcolor{blue}{that it is a} single threaded application and was suspected to be unable to handle new arriving packets due to a small size of the kernel buffer. Work by Dabir and Matrawy, in 2008~\cite{dabir2007bottleneck}, attempted to overcome this limitation by using two semaphores to buffer incoming strings and improve the writing of packet information to disk. +Orosz and Skopko examined the effect of the kernel on packet loss in their 2013 paper~\cite{Orosz2013}. Their work showed that when taking network measurements the precision of the timestamping of packets is a more important criterion than low clock offset, especially when measuring packet inter-arrival times and round-trip delays at a single point of the network. One \textcolor{blue}{solution for network capture is the tool Dumpcap. However the} concern \textcolor{blue}{with} Dumpcap is \textcolor{blue}{that it is a} single threaded application and was suspected to be unable to handle new arriving packets due to \textcolor{green}{the} small size of the kernel buffer. Work by Dabir and Matrawy, in 2008~\cite{dabir2007bottleneck}, attempted to overcome this limitation by using two semaphores to buffer incoming strings and improve the writing of packet information to disk. Narayan and Chandy examined the concerns of distributed I/O and the different models of parallel application I/O. %There are five major models of parallel application I/O. (1) Single output file shared by multiple nodes. (2) Large sequential reads by a single node at the beginning of computation and large sequential writes by a single node at the end of computation. (3) Checkpointing of states. (4) Metadata and read intensive (e.g. small data I/O and frequent directory lookups for reads). @@ -257,7 +261,7 @@ The Server Message Block (SMB) is an application-layer network protocol mainly u The SMB 1.0 protocol~\cite{SMB1Spec} has been found to have high/significant impact on performance due to latency issues. Monitoring revealed a high degree of ``chattiness'' and disregard of network latency between hosts. Solutions to this problem were included in the updated SMB 2.0 protocol which decreases ``chattiness'' by reducing commands and sub-commands from over a hundred to nineteen~\cite{SMB2Spec}. Additional changes, most significantly being increased security, were implemented in SMB 3.0 protocol (previously named SMB 2.2). % XXX citations for SMB specs for different versions? %\textcolor{red}{\textbf{Add information about SMB 2.X/3?}} -The rough order of communication for SMB session file interaction contains about five steps. First is a negotiation where a Microsoft SMB Protocol dialect is determined. Next a session is established to determine the share-level security. After this the Tree ID (TID) is determined for the share to be connected to as well as a file ID (FID) for a file requested by the client. From this establishment, I/O operations are performed using the FID given in the previous step. +The rough order of communication for SMB session file interaction contains five steps. First is a negotiation where a Microsoft SMB Protocol dialect is determined. Next, a session is established to determine the share-level security. After this, the Tree ID (TID) is determined for the share to be connected to as well as a file ID (FID) for a file requested by the client. From this establishment, I/O operations are performed using the FID given in the previous step. \textcolor{green}{The SMB packet header is shown in Figure~\ref{fig:smbPacket}.} % Information relating to the capturing of SMB information The only data that needs to be tracked from the SMB traces are the UID (User ID) and TID for each session. The SMB commands also include a MID (Multiplex ID) value that is used for tracking individual packets in each established session, and a PID (Process ID) that tracks the process running the command or series of commands on a host. @@ -272,11 +276,11 @@ Some nuances of SMB protocol I/O to note are that SMB/SMB2 write requests are th \textcolor{red}{Add writing about the type of packets used by SMB. Include information about the response time of R/W/C/General (to introduce them formally; not sure what this means.... Also can bring up the relation between close and other requests.} -\textcolor{blue}{It is worth noting that for the SMB2 protocol, the close request packet is used by clients to close instances of file that was openned with a previous create request packet.} +\textcolor{blue}{It is worth noting that for the SMB2 protocol, the close request packet is used by clients to close instances of file that \textcolor{green}{were opened} with a previous create request packet.} \begin{figure} \includegraphics[width=0.5\textwidth]{./images/smbPacket.jpg} - \caption{Visualization of SMB Packet} + \caption{SMB Packet \textcolor{green}{Header Format}} \label{fig:smbPacket} \end{figure} @@ -289,6 +293,12 @@ Dealing with timing accuracy and high throughput involves three challenges. (1) Temporal scaling refers to the need to account for the nuances of timing with respect to the run time of commands; consisting of computation, communication and service. A temporally scalable benchmarking system would take these subtleties into account when expanding its operation across multiple machines in a network. While these temporal issues have been tackled for a single processor (and even somewhat for cases of multi-processor), these same timing issues are not properly handled when dealing with inter-network communication. Inaccuracies in packet timestamping can be caused due to overhead in generic kernel-time based solutions, as well as use of the kernel data structures ~\cite{PFRINGMan,Orosz2013}. +\begin{figure*} + \includegraphics[width=\textwidth]{./images/packetcapturetopology.png} + \caption{Visualization of Packet Capturing System} + \label{fig:captureTopology} +\end{figure*} + Spatial scaling refers to the need to account for the nuances of expanding a benchmark to incorporate a number of machines over a network. A system that properly incorporates spatial scaling is one that would be able to incorporate communication (even in varying intensities) between all the machines on a system, thus stress testing all communicative actions and aspects (e.g. resource locks, queueing) on the network. \section{Packet Capturing System} @@ -296,14 +306,14 @@ In this section, we describe the packet capturing system as well as decisions ma % and on the python dissection code we wrote for performing traffic analysis. -\begin{figure*} - \includegraphics[width=\textwidth]{./images/packetcapturetopology.png} - \caption{Visualization of Packet Capturing System} - \label{fig:captureTopology} -\end{figure*} - -\subsection{\textcolor{red}{UITS} System Overview} -We collected traces from \textcolor{red}{the University of Connecticut University Information Technology Services (UITS)} centralized storage server. The \textcolor{red}{UITS system} consists of five Microsoft file server cluster nodes. These blade servers are used to host SMB file shares for various departments at \textcolor{red}{UConn} as well as personal drive share space for faculty, staff and students, along with at least one small group of users. Each server is capable of handling 1~Gb/s of traffic in each direction (e.g. outbound and inbound traffic). Altogether, the five-blade server system can in theory handle 5~Gb/s of data traffic in each direction. +\subsection{\textcolor{green}{University Storage} System Overview} +We collected traces from \textcolor{green}{the university} +%\textcolor{red}{the University of Connecticut University Information Technology Services (UITS)} +centralized storage server%The \textcolor{red}{UITS system} +\textcolor{green}{, which} consists of five Microsoft file server cluster nodes. These blade servers are used to host SMB file shares for various departments at +\textcolor{green}{the university} +%\textcolor{red}{UConn} +as well as personal drive share space for faculty, staff and students, along with at least one small group of users. Each server is capable of handling 1~Gb/s of traffic in each direction (e.g. outbound and inbound traffic). Altogether, the five-blade server system can in theory handle 5~Gb/s of data traffic in each direction. %Some of these blade servers have local storage but the majority do not have any. The blade servers serve as SMB heads, but the actual storage is served by SAN storage nodes that sit behind them. This system does not currently implement load balancing. Instead, the servers are set up to spread the traffic load with a static distribution among four of the active cluster nodes while the fifth node is passive and purposed to take over in the case that any of the other nodes go down (e.g. become inoperable or crash). @@ -330,7 +340,7 @@ The filesize used was in a ring buffer where each file captured was 64000 kB. The \texttt{.pcap} files from \texttt{tshark} do not lend themselves to easy data analysis, so we translate these files into the DataSeries~\cite{DataSeries} format. HP developed DataSeries, an XML-based structured data format, that was designed to be self-descriptive, storage and access efficient, and highly flexible. The system for taking captured \texttt{.pcap} files and writing them into the DataSeries format (i.e. \texttt{.ds}) does so by first creating a structure (based on a pre-written determination of the data desired to capture). Once the code builds this structure, it then reads through the capture traffic packets while dissecting and filling in the prepared structure with the desired information and format. -Due to the fundamental nature of this work, there is no need to track every piece of information that is exchanged, only that information which illuminates the behavior of the clients and servers that interact over the network (i.e. I/O transactions). It should also be noted that all sensitive information being captured by the tracing system is hashed to protect the users whose information is examined by the tracing system. Furthermore, the DataSeries file retains only the first 512 bytes of the SMB packet - enough to capture the SMB header information that contains the I/O information we seek, while the body of the SMB traffic is not retained in order to better ensure security of the university's network communications. \textcolor{blue}{The reasoning for this limit was to allow for capture of longer SMB AndX message chains due to negotiated \textit{MaxBufferSize}.} It is worth noting that in the case of larger SMB headers, some information is lost, however this is a trade-off by the university to provide, on average, the correct sized SMB header but does lead to scenarios where some information may be captured incompletely. \textcolor{blue}{This scenario only occurs in the cases of large AndX Chains in the SMB protocol, since the SMB header for SMB 2 is fixed at 72 bytes. In those scenarios the AndX messages specify only a sinlge SMB header with the rest of the AndX Chain attached in a series of block pairs.} +Due to the fundamental nature of this work, there is no need to track every piece of information that is exchanged, only that information which illuminates the behavior of the clients and servers that interact over the network (i.e. I/O transactions). It should also be noted that all sensitive information being captured by the tracing system is hashed to protect the users whose information is examined by the tracing system. Furthermore, the DataSeries file retains only the first 512 bytes of the SMB packet - enough to capture the SMB header information that contains the I/O information we seek, while the body of the SMB traffic is not retained in order to better ensure \textcolor{green}{the privacy} of the university's network communications. \textcolor{blue}{The reasoning for this limit was to allow for capture of longer SMB AndX message chains due to negotiated \textit{MaxBufferSize}.} It is worth noting that in the case of larger SMB headers, some information is lost, however this is a trade-off by the university to provide, on average, the correct sized SMB header but does lead to scenarios where some information may be captured incompletely. \textcolor{blue}{This scenario only occurs in the cases of large AndX Chains in the SMB protocol, since the SMB header for SMB 2 is fixed at 72 bytes. In those scenarios the AndX messages specify only a single SMB header with the rest of the AndX Chain attached in a series of block pairs.} \subsection{DataSeries Analysis} @@ -379,7 +389,8 @@ Average Write Size (B) & 63 \\ \hline % NOTE: Not sure but this reference keeps referencing the WRONG table Table~\ref{tbl:TraceSummaryTotal} -show a summary of the SMB traffic captured, statistics of the I/O operations, and read/write data exchange observed for the network filesystem. This information is further detailed in Table~\ref{tbl:SMBCommands}, which illustrates that the majority of I/O operations are general (74.87\%). As shown in the bottom part of Table~\ref{tbl:SMBCommands} general I/O includes metadata commands such as connect, close, query info, etc. +show a summary of the SMB traffic captured, statistics of the I/O operations, and read/write data exchange observed for the network filesystem. This information is further detailed in Table~\ref{tbl:SMBCommands}, which illustrates that the majority of I/O operations are general (74.87\%). As shown in %the bottom part of +Table~\ref{tbl:SMBCommands2}, general I/O includes metadata commands such as connect, close, query info, etc. Our examination of the collected network filesystem data revealed interesting patterns for the current use of CIFS/SMB in a large engineering academic setting. The first is that there is a major shift away from read and write operations towards more metadata-based ones. This matches the last CIFS observations made by Leung et.~al.~that files were being generated and accessed infrequently. The change in operations are due to a movement of use activity from reading and writing data to simply checking file and directory metadata. However, since the earlier study, SMB has transitioned to the SMB2 protocol which was supposed to be less "chatty" and thus we would expect fewer general SMB operations. Table~\ref{tbl:SMBCommands} shows a breakdown of SMB and SMB2 usage over the time period of May. From this table, one can see that the SMB2 protocol makes up $99.14$\% of total network operations compared to just $0.86$\% for SMB, indicating that most clients have upgraded to SMB2. However, $74.66$\% of SMB2 I/O are still general operations. Contrary to the purpose of implementing the SMB2 protocol, there is still a large amount of general I/O. %While CIFS/SMB protocol has less metadata operations, this is due to a depreciation of the SMB protocol commands, therefore we would expect to see less total operations (e.g. $0.04$\% of total operations). @@ -387,7 +398,8 @@ Our examination of the collected network filesystem data revealed interesting pa %\textcolor{red}{XXX we are going to get questioned on this. its not likely that there are no IATs for reads and writes} General operations happen at very high frequency with inter arrival times that were found to be relatively short (1317$\mu$s on average), as shown in Table~\ref{tbl:PercentageTraceSummary}. -Taking a deeper look at the SMB2 operations, shown in the bottom half of Table~\ref{tbl:SMBCommands}, we see that $9.06$\% of the general operations are negotiate commands. These are commands sent by the client to notify the server which dialects of the SMB2 protocol the client can understand. The three most common commands are close, tree connect, and query info. +Taking a deeper look at the SMB2 operations, shown in %the bottom half of +Table~\ref{tbl:SMBCommands2}, we see that $9.06$\% of the general operations are negotiate commands. These are commands sent by the client to notify the server which dialects of the SMB2 protocol the client can understand. The three most common commands are close, tree connect, and query info. The latter two relate to metadata information of shares and files accessed, however the close operation relates to the create operations relayed over the network. Note that the create command is also used as an open file. The first thing one will notice is that the number of closes is greater than the total number of create operations; by $9.35$\%. These extra close operations are most likely due to applications doing multiple closes that do not need to be done. \begin{table} @@ -405,12 +417,13 @@ Write Operations & 303 & 7871916 & 7872219 \\ Write \% & 0.01\% & 2.82\% & 2.80\% \\ \hline Combine Protocol Operations & 2421214 & 278998472 & 281419686 \\ Combined Protocols \% & 0.86\% & 99.14\% & 100\% \\ \hline -%\end{tabular} -%\caption{\label{tbl:SMBCommands}Percentage of SMB and SMB2 Protocol Commands on March 15th} -%\end{table} -%\begin{table} -%\centering -%\begin{tabular}{|l|c|c|} +\end{tabular} +\caption{\label{tbl:SMBCommands}Percentage of SMB and SMB2 Protocol Commands on March 15th} +\end{table} + +\begin{table}[] +\centering +\begin{tabular}{|l|c|c|c|} \hline \hline SMB2 General Operation & \multicolumn{2}{|c|}{Occurrences} & Percentage of Total \\ \hline Close & \multicolumn{2}{|c|}{80114256} & 28.71\% \\ @@ -431,7 +444,7 @@ Echo & \multicolumn{2}{|c|}{4715} & 0.002\% \\ Cancel & \multicolumn{2}{|c|}{0} & 0.00\% \\ \hline \end{tabular} -\caption{\label{tbl:SMBCommands}Percentage of SMB and SMB2 Protocol Commands from April 30th, 2019 to May 20th, 2019. Breakdown of General Operations for SMB2} +\caption{\label{tbl:SMBCommands2}Breakdown of General Operations for SMB2 from April 30th, 2019 to May 20th, 2019.} \vspace{-2em} \end{table}