Skip to content

Add files and write portion that generates the compressed version of … #1

Merged
merged 1 commit into from
May 16, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions Compress.H
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef __COMPRESS_H__
#define __COMPRESS_H__

#include "Huffman.H"

using namespace std;

class Compress : public Huffman{
int _counter; // Counts the number of bits used for compression (Needed for the header)

map<char,int> countLetters();
string generateHeader();
string generateBody();
public:

Compress(string fname) : Huffman(fname), _counter(0){}
~Compress(){}

void generateTree();
void compressFile(string outFile);

};


template <class Ptr>
string preOrder(Ptr curr);

#endif
124 changes: 124 additions & 0 deletions Compress.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#include "Compress.H"

#include <fstream>
#include <iostream>
#include <vector>
#include <map>
#include <stdlib.h>

using namespace std;

map<char,int> Compress::countLetters(){
std::ifstream inpFile (_fname); //Will read the file
std::string line;
map<char,int> freq = map<char,int>();

if (inpFile.is_open()){
//This part will count the number of occurences
while(getline(inpFile,line)){
for (char c : line){
freq[c]++;
}
}
return freq;
}
else{
throw(std::string("[Error] Could not open file " + _fname + " for countLetters() \n"));
}

}

void Compress::generateTree(){

map<char,int> freq = countLetters();
vector<shared_ptr<Node>> lst = vector<shared_ptr<Node>>(freq.size());

//Add the char and its frequency into a vector
auto mapit = freq.begin();
for (auto it = lst.begin(); it != lst.end();it++,mapit++){
*it = std::make_shared<Node>(Node(mapit->second,mapit->first,NULL,NULL));
}

while(lst.size() > 1){ //Will combine the two smallest Nodes until there is only one node
std::sort(lst.begin(),lst.end(),[](std::shared_ptr<Node> l1, std::shared_ptr<Node> l2)->bool{return l1->value < l2->value;});

if (lst[0]->value < lst[1]->value){
lst[1] = std::make_shared<Node>(Node(lst[0]->value + lst[1]->value,'\0',lst[0],lst[1]));
}
else{
lst[1] = std::make_shared<Node>(Node(lst[0]->value + lst[1]->value,'\0',lst[1],lst[0]));
}

lst.erase(lst.begin());
}

_tree = lst[0];
}

template <class Ptr>
string preOrder(Ptr curr){
if (curr->isLeaf()){
return "1" + string(1,curr->letter);
}
return preOrder(curr->left) + "0" + preOrder(curr->right);
}

//NEED NUMBER OF BITS FOR BODY
string Compress::generateBody(){
ifstream inpFile(_fname);
if(inpFile.is_open()){
if (_bitMap.size()){
string line; // Reads input file
string compressed = ""; // Will hold compressed body
string uncompressed = ""; // Will hold binary string representation of compression
while(getline(inpFile,line)){
for (char c : line){
uncompressed += _bitMap[c];
}
}

_counter = uncompressed.size();

//Start to divide the binary string into char
int index = 0;
char comp;
for (char c : uncompressed){
if (!index){
comp = 0x0;
}

comp = comp | atoi(&c);

if (index == 7){
compressed += comp;
}
else{
comp = comp << 1; // Shift over one bit
}
index = (index + 1) % 8;
}

if(index){ // If body is not a multiple of 8, then the last few bits need to be copied into compressed
compressed += comp;
}

return compressed;
}
else{
throw("[Error in generateBody()] _bitMap is empty");
}
}
else{
throw("[Error] Could not open file " + _fname + " for generateBody()");
}
}

void Compress::compressFile(string outFile){
string body = generateBody();
string header = generateHeader();
cout << header << endl << body << endl;
}

string Compress::generateHeader(){
return to_string(_counter) + " " + preOrder(_tree);;
}
43 changes: 43 additions & 0 deletions Huffman.H
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#ifndef __HUFFMAN_H__
#define __HUFFMAN_H__

#include <map>
#include <memory>
#include <string>

using namespace std;
struct Node;

class Huffman {
protected:
std::string _fname;
std::shared_ptr<Node> _tree;
std::map<char,std::string> _bitMap;

void generateBitMapHelper(shared_ptr<Node> curr, string bin);
public:

Huffman(string fname): _fname(fname) , _bitMap(){}

virtual ~Huffman(){}

virtual void generateTree(){};// Will be different for compress and decompress

void generateBitMap();

const map<char,string> & getBitMap(){ return _bitMap;}
string getFileName(){return _fname;}
};

struct Node{
double value;
char letter;
std::shared_ptr<Node> left;
std::shared_ptr<Node> right;

Node(double _value, char _letter, std::shared_ptr<Node> _left, std::shared_ptr<Node> _right) : value(_value), letter(_letter), left(_left), right(_right) {}

bool isLeaf(){return (!left && !right);}
};

#endif
15 changes: 15 additions & 0 deletions Huffman.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "Huffman.H"

void Huffman::generateBitMapHelper(std::shared_ptr<Node> curr, std::string bin){
if(curr->isLeaf()){
_bitMap[curr->letter] = bin;
}
else{
generateBitMapHelper(curr->left,bin+"0");
generateBitMapHelper(curr->right,bin+"1");
}
}

void Huffman::generateBitMap(){ //Wrapper class to recursively call enumerateTreeHelper with DFS
generateBitMapHelper(_tree,"");
}
12 changes: 12 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
EXFILES= compress
FILES= main.o Huffman.o Compress.o
FLAGS= -std=c++14 -g

compress: $(FILES)
g++ $(FLAGS) $^ -o $@

clean:
-rm -f *.o $(EXFILES)

%.o: %.cpp
g++ $(FLAGS) -c $< -o $@
17 changes: 17 additions & 0 deletions main.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#include "Huffman.H"
#include "Compress.H"

#include <iostream>


int main(int argc, char *argv[]){
using namespace std;
try{
shared_ptr<Compress> conv = make_shared<Compress>(Compress(argv[1]));
conv->generateTree();
conv->generateBitMap();
conv->compressFile("random");
}
catch(string &e){cout << e;}
return 0;
}
2 changes: 2 additions & 0 deletions output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
836 1 01a01u01y01e01m01.01M01T01j01i01s01b01p01g01r01t01w01f01d01v01c01o01h01l01n
���g]��H2h�}e���A.��F����>�=?qݚ&��Vc�3�6N�u�T,w9�d�O�&�R{�,g�Q�O��o�-]�=���<�Q��pJ4~�nw2pC.�?���c�
1 change: 1 addition & 0 deletions test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
abc
2 changes: 2 additions & 0 deletions test2.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
hello there my name is tony pham how are you friend. new line. This is just a test to see if the number of bits will actually be less than the compressed version.
Might as well give it a try and find out