-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add files and write portion that generates the compressed version of …
…file
- Loading branch information
Tony Pham
authored and
Tony Pham
committed
May 16, 2017
1 parent
455d6d9
commit f2868e3
Showing
9 changed files
with
244 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
#ifndef __COMPRESS_H__ | ||
#define __COMPRESS_H__ | ||
|
||
#include "Huffman.H" | ||
|
||
using namespace std; | ||
|
||
class Compress : public Huffman{ | ||
int _counter; // Counts the number of bits used for compression (Needed for the header) | ||
|
||
map<char,int> countLetters(); | ||
string generateHeader(); | ||
string generateBody(); | ||
public: | ||
|
||
Compress(string fname) : Huffman(fname), _counter(0){} | ||
~Compress(){} | ||
|
||
void generateTree(); | ||
void compressFile(string outFile); | ||
|
||
}; | ||
|
||
|
||
template <class Ptr> | ||
string preOrder(Ptr curr); | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
#include "Compress.H" | ||
|
||
#include <fstream> | ||
#include <iostream> | ||
#include <vector> | ||
#include <map> | ||
#include <stdlib.h> | ||
|
||
using namespace std; | ||
|
||
map<char,int> Compress::countLetters(){ | ||
std::ifstream inpFile (_fname); //Will read the file | ||
std::string line; | ||
map<char,int> freq = map<char,int>(); | ||
|
||
if (inpFile.is_open()){ | ||
//This part will count the number of occurences | ||
while(getline(inpFile,line)){ | ||
for (char c : line){ | ||
freq[c]++; | ||
} | ||
} | ||
return freq; | ||
} | ||
else{ | ||
throw(std::string("[Error] Could not open file " + _fname + " for countLetters() \n")); | ||
} | ||
|
||
} | ||
|
||
void Compress::generateTree(){ | ||
|
||
map<char,int> freq = countLetters(); | ||
vector<shared_ptr<Node>> lst = vector<shared_ptr<Node>>(freq.size()); | ||
|
||
//Add the char and its frequency into a vector | ||
auto mapit = freq.begin(); | ||
for (auto it = lst.begin(); it != lst.end();it++,mapit++){ | ||
*it = std::make_shared<Node>(Node(mapit->second,mapit->first,NULL,NULL)); | ||
} | ||
|
||
while(lst.size() > 1){ //Will combine the two smallest Nodes until there is only one node | ||
std::sort(lst.begin(),lst.end(),[](std::shared_ptr<Node> l1, std::shared_ptr<Node> l2)->bool{return l1->value < l2->value;}); | ||
|
||
if (lst[0]->value < lst[1]->value){ | ||
lst[1] = std::make_shared<Node>(Node(lst[0]->value + lst[1]->value,'\0',lst[0],lst[1])); | ||
} | ||
else{ | ||
lst[1] = std::make_shared<Node>(Node(lst[0]->value + lst[1]->value,'\0',lst[1],lst[0])); | ||
} | ||
|
||
lst.erase(lst.begin()); | ||
} | ||
|
||
_tree = lst[0]; | ||
} | ||
|
||
template <class Ptr> | ||
string preOrder(Ptr curr){ | ||
if (curr->isLeaf()){ | ||
return "1" + string(1,curr->letter); | ||
} | ||
return preOrder(curr->left) + "0" + preOrder(curr->right); | ||
} | ||
|
||
//NEED NUMBER OF BITS FOR BODY | ||
string Compress::generateBody(){ | ||
ifstream inpFile(_fname); | ||
if(inpFile.is_open()){ | ||
if (_bitMap.size()){ | ||
string line; // Reads input file | ||
string compressed = ""; // Will hold compressed body | ||
string uncompressed = ""; // Will hold binary string representation of compression | ||
while(getline(inpFile,line)){ | ||
for (char c : line){ | ||
uncompressed += _bitMap[c]; | ||
} | ||
} | ||
|
||
_counter = uncompressed.size(); | ||
|
||
//Start to divide the binary string into char | ||
int index = 0; | ||
char comp; | ||
for (char c : uncompressed){ | ||
if (!index){ | ||
comp = 0x0; | ||
} | ||
|
||
comp = comp | atoi(&c); | ||
|
||
if (index == 7){ | ||
compressed += comp; | ||
} | ||
else{ | ||
comp = comp << 1; // Shift over one bit | ||
} | ||
index = (index + 1) % 8; | ||
} | ||
|
||
if(index){ // If body is not a multiple of 8, then the last few bits need to be copied into compressed | ||
compressed += comp; | ||
} | ||
|
||
return compressed; | ||
} | ||
else{ | ||
throw("[Error in generateBody()] _bitMap is empty"); | ||
} | ||
} | ||
else{ | ||
throw("[Error] Could not open file " + _fname + " for generateBody()"); | ||
} | ||
} | ||
|
||
void Compress::compressFile(string outFile){ | ||
string body = generateBody(); | ||
string header = generateHeader(); | ||
cout << header << endl << body << endl; | ||
} | ||
|
||
string Compress::generateHeader(){ | ||
return to_string(_counter) + " " + preOrder(_tree);; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#ifndef __HUFFMAN_H__ | ||
#define __HUFFMAN_H__ | ||
|
||
#include <map> | ||
#include <memory> | ||
#include <string> | ||
|
||
using namespace std; | ||
struct Node; | ||
|
||
class Huffman { | ||
protected: | ||
std::string _fname; | ||
std::shared_ptr<Node> _tree; | ||
std::map<char,std::string> _bitMap; | ||
|
||
void generateBitMapHelper(shared_ptr<Node> curr, string bin); | ||
public: | ||
|
||
Huffman(string fname): _fname(fname) , _bitMap(){} | ||
|
||
virtual ~Huffman(){} | ||
|
||
virtual void generateTree(){};// Will be different for compress and decompress | ||
|
||
void generateBitMap(); | ||
|
||
const map<char,string> & getBitMap(){ return _bitMap;} | ||
string getFileName(){return _fname;} | ||
}; | ||
|
||
struct Node{ | ||
double value; | ||
char letter; | ||
std::shared_ptr<Node> left; | ||
std::shared_ptr<Node> right; | ||
|
||
Node(double _value, char _letter, std::shared_ptr<Node> _left, std::shared_ptr<Node> _right) : value(_value), letter(_letter), left(_left), right(_right) {} | ||
|
||
bool isLeaf(){return (!left && !right);} | ||
}; | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
#include "Huffman.H" | ||
|
||
void Huffman::generateBitMapHelper(std::shared_ptr<Node> curr, std::string bin){ | ||
if(curr->isLeaf()){ | ||
_bitMap[curr->letter] = bin; | ||
} | ||
else{ | ||
generateBitMapHelper(curr->left,bin+"0"); | ||
generateBitMapHelper(curr->right,bin+"1"); | ||
} | ||
} | ||
|
||
void Huffman::generateBitMap(){ //Wrapper class to recursively call enumerateTreeHelper with DFS | ||
generateBitMapHelper(_tree,""); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
EXFILES= compress | ||
FILES= main.o Huffman.o Compress.o | ||
FLAGS= -std=c++14 -g | ||
|
||
compress: $(FILES) | ||
g++ $(FLAGS) $^ -o $@ | ||
|
||
clean: | ||
-rm -f *.o $(EXFILES) | ||
|
||
%.o: %.cpp | ||
g++ $(FLAGS) -c $< -o $@ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
#include "Huffman.H" | ||
#include "Compress.H" | ||
|
||
#include <iostream> | ||
|
||
|
||
int main(int argc, char *argv[]){ | ||
using namespace std; | ||
try{ | ||
shared_ptr<Compress> conv = make_shared<Compress>(Compress(argv[1])); | ||
conv->generateTree(); | ||
conv->generateBitMap(); | ||
conv->compressFile("random"); | ||
} | ||
catch(string &e){cout << e;} | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
836 1 01a01u01y01e01m01.01M01T01j01i01s01b01p01g01r01t01w01f01d01v01c01o01h01l01n | ||
���g]��H2h�}e���A.��F����>�=?qݚ&��Vc�3�6N�u�T,w9�d�O�&�R{�,g�Q�O��o�-]�=���<�Q��pJ4~�nw2pC.�?���c� |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
abc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
hello there my name is tony pham how are you friend. new line. This is just a test to see if the number of bits will actually be less than the compressed version. | ||
Might as well give it a try and find out |