Skip to content

Commit

Permalink
Add files and write portion that generates the compressed version of …
Browse files Browse the repository at this point in the history
…file
  • Loading branch information
Tony Pham authored and Tony Pham committed May 16, 2017
1 parent 455d6d9 commit f2868e3
Show file tree
Hide file tree
Showing 9 changed files with 244 additions and 0 deletions.
28 changes: 28 additions & 0 deletions Compress.H
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef __COMPRESS_H__
#define __COMPRESS_H__

#include "Huffman.H"

using namespace std;

class Compress : public Huffman{
int _counter; // Counts the number of bits used for compression (Needed for the header)

map<char,int> countLetters();
string generateHeader();
string generateBody();
public:

Compress(string fname) : Huffman(fname), _counter(0){}
~Compress(){}

void generateTree();
void compressFile(string outFile);

};


template <class Ptr>
string preOrder(Ptr curr);

#endif
124 changes: 124 additions & 0 deletions Compress.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#include "Compress.H"

#include <fstream>
#include <iostream>
#include <vector>
#include <map>
#include <stdlib.h>

using namespace std;

map<char,int> Compress::countLetters(){
std::ifstream inpFile (_fname); //Will read the file
std::string line;
map<char,int> freq = map<char,int>();

if (inpFile.is_open()){
//This part will count the number of occurences
while(getline(inpFile,line)){
for (char c : line){
freq[c]++;
}
}
return freq;
}
else{
throw(std::string("[Error] Could not open file " + _fname + " for countLetters() \n"));
}

}

void Compress::generateTree(){

map<char,int> freq = countLetters();
vector<shared_ptr<Node>> lst = vector<shared_ptr<Node>>(freq.size());

//Add the char and its frequency into a vector
auto mapit = freq.begin();
for (auto it = lst.begin(); it != lst.end();it++,mapit++){
*it = std::make_shared<Node>(Node(mapit->second,mapit->first,NULL,NULL));
}

while(lst.size() > 1){ //Will combine the two smallest Nodes until there is only one node
std::sort(lst.begin(),lst.end(),[](std::shared_ptr<Node> l1, std::shared_ptr<Node> l2)->bool{return l1->value < l2->value;});

if (lst[0]->value < lst[1]->value){
lst[1] = std::make_shared<Node>(Node(lst[0]->value + lst[1]->value,'\0',lst[0],lst[1]));
}
else{
lst[1] = std::make_shared<Node>(Node(lst[0]->value + lst[1]->value,'\0',lst[1],lst[0]));
}

lst.erase(lst.begin());
}

_tree = lst[0];
}

template <class Ptr>
string preOrder(Ptr curr){
if (curr->isLeaf()){
return "1" + string(1,curr->letter);
}
return preOrder(curr->left) + "0" + preOrder(curr->right);
}

//NEED NUMBER OF BITS FOR BODY
string Compress::generateBody(){
ifstream inpFile(_fname);
if(inpFile.is_open()){
if (_bitMap.size()){
string line; // Reads input file
string compressed = ""; // Will hold compressed body
string uncompressed = ""; // Will hold binary string representation of compression
while(getline(inpFile,line)){
for (char c : line){
uncompressed += _bitMap[c];
}
}

_counter = uncompressed.size();

//Start to divide the binary string into char
int index = 0;
char comp;
for (char c : uncompressed){
if (!index){
comp = 0x0;
}

comp = comp | atoi(&c);

if (index == 7){
compressed += comp;
}
else{
comp = comp << 1; // Shift over one bit
}
index = (index + 1) % 8;
}

if(index){ // If body is not a multiple of 8, then the last few bits need to be copied into compressed
compressed += comp;
}

return compressed;
}
else{
throw("[Error in generateBody()] _bitMap is empty");
}
}
else{
throw("[Error] Could not open file " + _fname + " for generateBody()");
}
}

void Compress::compressFile(string outFile){
string body = generateBody();
string header = generateHeader();
cout << header << endl << body << endl;
}

string Compress::generateHeader(){
return to_string(_counter) + " " + preOrder(_tree);;
}
43 changes: 43 additions & 0 deletions Huffman.H
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#ifndef __HUFFMAN_H__
#define __HUFFMAN_H__

#include <map>
#include <memory>
#include <string>

using namespace std;
struct Node;

class Huffman {
protected:
std::string _fname;
std::shared_ptr<Node> _tree;
std::map<char,std::string> _bitMap;

void generateBitMapHelper(shared_ptr<Node> curr, string bin);
public:

Huffman(string fname): _fname(fname) , _bitMap(){}

virtual ~Huffman(){}

virtual void generateTree(){};// Will be different for compress and decompress

void generateBitMap();

const map<char,string> & getBitMap(){ return _bitMap;}
string getFileName(){return _fname;}
};

struct Node{
double value;
char letter;
std::shared_ptr<Node> left;
std::shared_ptr<Node> right;

Node(double _value, char _letter, std::shared_ptr<Node> _left, std::shared_ptr<Node> _right) : value(_value), letter(_letter), left(_left), right(_right) {}

bool isLeaf(){return (!left && !right);}
};

#endif
15 changes: 15 additions & 0 deletions Huffman.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "Huffman.H"

void Huffman::generateBitMapHelper(std::shared_ptr<Node> curr, std::string bin){
if(curr->isLeaf()){
_bitMap[curr->letter] = bin;
}
else{
generateBitMapHelper(curr->left,bin+"0");
generateBitMapHelper(curr->right,bin+"1");
}
}

void Huffman::generateBitMap(){ //Wrapper class to recursively call enumerateTreeHelper with DFS
generateBitMapHelper(_tree,"");
}
12 changes: 12 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
EXFILES= compress
FILES= main.o Huffman.o Compress.o
FLAGS= -std=c++14 -g

compress: $(FILES)
g++ $(FLAGS) $^ -o $@

clean:
-rm -f *.o $(EXFILES)

%.o: %.cpp
g++ $(FLAGS) -c $< -o $@
17 changes: 17 additions & 0 deletions main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "Huffman.H"
#include "Compress.H"

#include <iostream>


int main(int argc, char *argv[]){
using namespace std;
try{
shared_ptr<Compress> conv = make_shared<Compress>(Compress(argv[1]));
conv->generateTree();
conv->generateBitMap();
conv->compressFile("random");
}
catch(string &e){cout << e;}
return 0;
}
2 changes: 2 additions & 0 deletions output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
836 1 01a01u01y01e01m01.01M01T01j01i01s01b01p01g01r01t01w01f01d01v01c01o01h01l01n
���g]��H2h�}e���A.��F����>�=?qݚ&��Vc�3�6N�u�T,w9�d�O�&�R{�,g�Q�O��o�-]�=���<�Q��pJ4~�nw2pC.�?���c�
1 change: 1 addition & 0 deletions test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
abc
2 changes: 2 additions & 0 deletions test2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
hello there my name is tony pham how are you friend. new line. This is just a test to see if the number of bits will actually be less than the compressed version.
Might as well give it a try and find out

0 comments on commit f2868e3

Please sign in to comment.