From f2868e3d3205efd7eab75a3b075f8e7dc33a51c1 Mon Sep 17 00:00:00 2001 From: Tony Pham Date: Tue, 16 May 2017 01:06:44 -0400 Subject: [PATCH] Add files and write portion that generates the compressed version of file --- Compress.H | 28 ++++++++++++ Compress.cpp | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++ Huffman.H | 43 ++++++++++++++++++ Huffman.cpp | 15 +++++++ Makefile | 12 +++++ main.cpp | 17 +++++++ output.txt | 2 + test.txt | 1 + test2.txt | 2 + 9 files changed, 244 insertions(+) create mode 100644 Compress.H create mode 100644 Compress.cpp create mode 100644 Huffman.H create mode 100644 Huffman.cpp create mode 100644 Makefile create mode 100644 main.cpp create mode 100644 output.txt create mode 100644 test.txt create mode 100644 test2.txt diff --git a/Compress.H b/Compress.H new file mode 100644 index 0000000..bb47def --- /dev/null +++ b/Compress.H @@ -0,0 +1,28 @@ +#ifndef __COMPRESS_H__ +#define __COMPRESS_H__ + +#include "Huffman.H" + +using namespace std; + +class Compress : public Huffman{ + int _counter; // Counts the number of bits used for compression (Needed for the header) + + map countLetters(); + string generateHeader(); + string generateBody(); +public: + + Compress(string fname) : Huffman(fname), _counter(0){} + ~Compress(){} + + void generateTree(); + void compressFile(string outFile); + +}; + + +template +string preOrder(Ptr curr); + +#endif diff --git a/Compress.cpp b/Compress.cpp new file mode 100644 index 0000000..659cc1d --- /dev/null +++ b/Compress.cpp @@ -0,0 +1,124 @@ +#include "Compress.H" + +#include +#include +#include +#include +#include + +using namespace std; + +map Compress::countLetters(){ + std::ifstream inpFile (_fname); //Will read the file + std::string line; + map freq = map(); + + if (inpFile.is_open()){ + //This part will count the number of occurences + while(getline(inpFile,line)){ + for (char c : line){ + freq[c]++; + } + } + return freq; + } + else{ + throw(std::string("[Error] Could not open file " + _fname + " for countLetters() \n")); + } + +} + +void Compress::generateTree(){ + + map freq = countLetters(); + vector> lst = vector>(freq.size()); + + //Add the char and its frequency into a vector + auto mapit = freq.begin(); + for (auto it = lst.begin(); it != lst.end();it++,mapit++){ + *it = std::make_shared(Node(mapit->second,mapit->first,NULL,NULL)); + } + + while(lst.size() > 1){ //Will combine the two smallest Nodes until there is only one node + std::sort(lst.begin(),lst.end(),[](std::shared_ptr l1, std::shared_ptr l2)->bool{return l1->value < l2->value;}); + + if (lst[0]->value < lst[1]->value){ + lst[1] = std::make_shared(Node(lst[0]->value + lst[1]->value,'\0',lst[0],lst[1])); + } + else{ + lst[1] = std::make_shared(Node(lst[0]->value + lst[1]->value,'\0',lst[1],lst[0])); + } + + lst.erase(lst.begin()); + } + + _tree = lst[0]; +} + +template +string preOrder(Ptr curr){ + if (curr->isLeaf()){ + return "1" + string(1,curr->letter); + } + return preOrder(curr->left) + "0" + preOrder(curr->right); +} + +//NEED NUMBER OF BITS FOR BODY +string Compress::generateBody(){ + ifstream inpFile(_fname); + if(inpFile.is_open()){ + if (_bitMap.size()){ + string line; // Reads input file + string compressed = ""; // Will hold compressed body + string uncompressed = ""; // Will hold binary string representation of compression + while(getline(inpFile,line)){ + for (char c : line){ + uncompressed += _bitMap[c]; + } + } + + _counter = uncompressed.size(); + + //Start to divide the binary string into char + int index = 0; + char comp; + for (char c : uncompressed){ + if (!index){ + comp = 0x0; + } + + comp = comp | atoi(&c); + + if (index == 7){ + compressed += comp; + } + else{ + comp = comp << 1; // Shift over one bit + } + index = (index + 1) % 8; + } + + if(index){ // If body is not a multiple of 8, then the last few bits need to be copied into compressed + compressed += comp; + } + + return compressed; + } + else{ + throw("[Error in generateBody()] _bitMap is empty"); + } + } + else{ + throw("[Error] Could not open file " + _fname + " for generateBody()"); + } +} + +void Compress::compressFile(string outFile){ + string body = generateBody(); + string header = generateHeader(); + cout << header << endl << body << endl; +} + +string Compress::generateHeader(){ + return to_string(_counter) + " " + preOrder(_tree);; +} diff --git a/Huffman.H b/Huffman.H new file mode 100644 index 0000000..383456c --- /dev/null +++ b/Huffman.H @@ -0,0 +1,43 @@ +#ifndef __HUFFMAN_H__ +#define __HUFFMAN_H__ + +#include +#include +#include + +using namespace std; +struct Node; + +class Huffman { +protected: + std::string _fname; + std::shared_ptr _tree; + std::map _bitMap; + + void generateBitMapHelper(shared_ptr curr, string bin); +public: + + Huffman(string fname): _fname(fname) , _bitMap(){} + + virtual ~Huffman(){} + + virtual void generateTree(){};// Will be different for compress and decompress + + void generateBitMap(); + + const map & getBitMap(){ return _bitMap;} + string getFileName(){return _fname;} +}; + +struct Node{ + double value; + char letter; + std::shared_ptr left; + std::shared_ptr right; + + Node(double _value, char _letter, std::shared_ptr _left, std::shared_ptr _right) : value(_value), letter(_letter), left(_left), right(_right) {} + + bool isLeaf(){return (!left && !right);} +}; + +#endif diff --git a/Huffman.cpp b/Huffman.cpp new file mode 100644 index 0000000..dcd6e58 --- /dev/null +++ b/Huffman.cpp @@ -0,0 +1,15 @@ +#include "Huffman.H" + +void Huffman::generateBitMapHelper(std::shared_ptr curr, std::string bin){ + if(curr->isLeaf()){ + _bitMap[curr->letter] = bin; + } + else{ + generateBitMapHelper(curr->left,bin+"0"); + generateBitMapHelper(curr->right,bin+"1"); + } +} + +void Huffman::generateBitMap(){ //Wrapper class to recursively call enumerateTreeHelper with DFS + generateBitMapHelper(_tree,""); +} diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3797698 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +EXFILES= compress +FILES= main.o Huffman.o Compress.o +FLAGS= -std=c++14 -g + +compress: $(FILES) + g++ $(FLAGS) $^ -o $@ + +clean: + -rm -f *.o $(EXFILES) + +%.o: %.cpp + g++ $(FLAGS) -c $< -o $@ diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..53887a9 --- /dev/null +++ b/main.cpp @@ -0,0 +1,17 @@ +#include "Huffman.H" +#include "Compress.H" + +#include + + +int main(int argc, char *argv[]){ + using namespace std; + try{ + shared_ptr conv = make_shared(Compress(argv[1])); + conv->generateTree(); + conv->generateBitMap(); + conv->compressFile("random"); + } + catch(string &e){cout << e;} + return 0; +} diff --git a/output.txt b/output.txt new file mode 100644 index 0000000..be4cc65 --- /dev/null +++ b/output.txt @@ -0,0 +1,2 @@ +836 1 01a01u01y01e01m01.01M01T01j01i01s01b01p01g01r01t01w01f01d01v01c01o01h01l01n +g]H2h}eA.F>=?qݚ&Vc36NuT,w9dO&R{,gQOo-]=