diff --git a/Compress.H b/Compress.H new file mode 100644 index 0000000..bb47def --- /dev/null +++ b/Compress.H @@ -0,0 +1,28 @@ +#ifndef __COMPRESS_H__ +#define __COMPRESS_H__ + +#include "Huffman.H" + +using namespace std; + +class Compress : public Huffman{ + int _counter; // Counts the number of bits used for compression (Needed for the header) + + map countLetters(); + string generateHeader(); + string generateBody(); +public: + + Compress(string fname) : Huffman(fname), _counter(0){} + ~Compress(){} + + void generateTree(); + void compressFile(string outFile); + +}; + + +template +string preOrder(Ptr curr); + +#endif diff --git a/Compress.cpp b/Compress.cpp new file mode 100644 index 0000000..659cc1d --- /dev/null +++ b/Compress.cpp @@ -0,0 +1,124 @@ +#include "Compress.H" + +#include +#include +#include +#include +#include + +using namespace std; + +map Compress::countLetters(){ + std::ifstream inpFile (_fname); //Will read the file + std::string line; + map freq = map(); + + if (inpFile.is_open()){ + //This part will count the number of occurences + while(getline(inpFile,line)){ + for (char c : line){ + freq[c]++; + } + } + return freq; + } + else{ + throw(std::string("[Error] Could not open file " + _fname + " for countLetters() \n")); + } + +} + +void Compress::generateTree(){ + + map freq = countLetters(); + vector> lst = vector>(freq.size()); + + //Add the char and its frequency into a vector + auto mapit = freq.begin(); + for (auto it = lst.begin(); it != lst.end();it++,mapit++){ + *it = std::make_shared(Node(mapit->second,mapit->first,NULL,NULL)); + } + + while(lst.size() > 1){ //Will combine the two smallest Nodes until there is only one node + std::sort(lst.begin(),lst.end(),[](std::shared_ptr l1, std::shared_ptr l2)->bool{return l1->value < l2->value;}); + + if (lst[0]->value < lst[1]->value){ + lst[1] = std::make_shared(Node(lst[0]->value + lst[1]->value,'\0',lst[0],lst[1])); + } + else{ + lst[1] = std::make_shared(Node(lst[0]->value + lst[1]->value,'\0',lst[1],lst[0])); + } + + lst.erase(lst.begin()); + } + + _tree = lst[0]; +} + +template +string preOrder(Ptr curr){ + if (curr->isLeaf()){ + return "1" + string(1,curr->letter); + } + return preOrder(curr->left) + "0" + preOrder(curr->right); +} + +//NEED NUMBER OF BITS FOR BODY +string Compress::generateBody(){ + ifstream inpFile(_fname); + if(inpFile.is_open()){ + if (_bitMap.size()){ + string line; // Reads input file + string compressed = ""; // Will hold compressed body + string uncompressed = ""; // Will hold binary string representation of compression + while(getline(inpFile,line)){ + for (char c : line){ + uncompressed += _bitMap[c]; + } + } + + _counter = uncompressed.size(); + + //Start to divide the binary string into char + int index = 0; + char comp; + for (char c : uncompressed){ + if (!index){ + comp = 0x0; + } + + comp = comp | atoi(&c); + + if (index == 7){ + compressed += comp; + } + else{ + comp = comp << 1; // Shift over one bit + } + index = (index + 1) % 8; + } + + if(index){ // If body is not a multiple of 8, then the last few bits need to be copied into compressed + compressed += comp; + } + + return compressed; + } + else{ + throw("[Error in generateBody()] _bitMap is empty"); + } + } + else{ + throw("[Error] Could not open file " + _fname + " for generateBody()"); + } +} + +void Compress::compressFile(string outFile){ + string body = generateBody(); + string header = generateHeader(); + cout << header << endl << body << endl; +} + +string Compress::generateHeader(){ + return to_string(_counter) + " " + preOrder(_tree);; +} diff --git a/Huffman.H b/Huffman.H new file mode 100644 index 0000000..383456c --- /dev/null +++ b/Huffman.H @@ -0,0 +1,43 @@ +#ifndef __HUFFMAN_H__ +#define __HUFFMAN_H__ + +#include +#include +#include + +using namespace std; +struct Node; + +class Huffman { +protected: + std::string _fname; + std::shared_ptr _tree; + std::map _bitMap; + + void generateBitMapHelper(shared_ptr curr, string bin); +public: + + Huffman(string fname): _fname(fname) , _bitMap(){} + + virtual ~Huffman(){} + + virtual void generateTree(){};// Will be different for compress and decompress + + void generateBitMap(); + + const map & getBitMap(){ return _bitMap;} + string getFileName(){return _fname;} +}; + +struct Node{ + double value; + char letter; + std::shared_ptr left; + std::shared_ptr right; + + Node(double _value, char _letter, std::shared_ptr _left, std::shared_ptr _right) : value(_value), letter(_letter), left(_left), right(_right) {} + + bool isLeaf(){return (!left && !right);} +}; + +#endif diff --git a/Huffman.cpp b/Huffman.cpp new file mode 100644 index 0000000..dcd6e58 --- /dev/null +++ b/Huffman.cpp @@ -0,0 +1,15 @@ +#include "Huffman.H" + +void Huffman::generateBitMapHelper(std::shared_ptr curr, std::string bin){ + if(curr->isLeaf()){ + _bitMap[curr->letter] = bin; + } + else{ + generateBitMapHelper(curr->left,bin+"0"); + generateBitMapHelper(curr->right,bin+"1"); + } +} + +void Huffman::generateBitMap(){ //Wrapper class to recursively call enumerateTreeHelper with DFS + generateBitMapHelper(_tree,""); +} diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3797698 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +EXFILES= compress +FILES= main.o Huffman.o Compress.o +FLAGS= -std=c++14 -g + +compress: $(FILES) + g++ $(FLAGS) $^ -o $@ + +clean: + -rm -f *.o $(EXFILES) + +%.o: %.cpp + g++ $(FLAGS) -c $< -o $@ diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..53887a9 --- /dev/null +++ b/main.cpp @@ -0,0 +1,17 @@ +#include "Huffman.H" +#include "Compress.H" + +#include + + +int main(int argc, char *argv[]){ + using namespace std; + try{ + shared_ptr conv = make_shared(Compress(argv[1])); + conv->generateTree(); + conv->generateBitMap(); + conv->compressFile("random"); + } + catch(string &e){cout << e;} + return 0; +} diff --git a/output.txt b/output.txt new file mode 100644 index 0000000..be4cc65 --- /dev/null +++ b/output.txt @@ -0,0 +1,2 @@ +836 1 01a01u01y01e01m01.01M01T01j01i01s01b01p01g01r01t01w01f01d01v01c01o01h01l01n +g]H2h}eA.F>=?qݚ&Vc36NuT,w9dO&R{,gQOo-]=