From b124ca3432738179b242f17bc2eec12b77cab828 Mon Sep 17 00:00:00 2001 From: cag-uconn Date: Tue, 29 Aug 2023 10:59:17 -0400 Subject: [PATCH] . --- assembler/README.md | 37 +- assembler/assembler.c | 47 +- assembler/file_parser.c | 1060 +++++++++++++++++---------------------- assembler/file_parser.h | 9 +- 4 files changed, 517 insertions(+), 636 deletions(-) diff --git a/assembler/README.md b/assembler/README.md index c841ef1..3e88b66 100644 --- a/assembler/README.md +++ b/assembler/README.md @@ -1,41 +1,52 @@ -Mips Assembler +RISC-V Assembler ============== -An assembler for a subset of the MIPS like instruction set architecture +An assembler for a subset of the RISC-V instruction set architecture # How to use -The assembler will take a file written in assembly language as input on the command line and will produce an output file containing the MIPS machine code. The input file should be in ASCII text. Each line in the input assembly file contains either a mnemonic, a section header (such as .data) or a label (jump or branch target. The maximum length of a line is 4 bytes. Section headers such as .data and .text should be in a line by themselves with no other assembly mnemonic. Similarly, branch targets such as loop: will be on a line by themselves with no other assembly mnemonic. The input assembly file should only contain one data section and one text section. The first section in the file will be the .text section, followed by the .data section. +The assembler will take a file written in assembly language as an input on the command line, and will produce an output file containing the RISC-V machine code. The input file should be in ASCII text. Each line in the input assembly file contains either a mnemonic, a section header (such as .data) or a label (jump or branch target). The maximum length of a line is 4 bytes. Section headers such as .data and .text should be in a line by themselves with no other assembly mnemonic. Similarly, branch targets such as loop: will be on a line by themselves with no other assembly mnemonic. The input assembly file should only contain one data section and one text section. The first section in the file will be the .text section, followed by the .data section. The assembler supports the following instruction set: R-Type - add - sub -- and -- or -- sll - slt +- sll - srl -- jr +- and +- or +- xor I-Type -- lw -- sw -- andi +- jalr - addi - slti +- andi - ori -- lui +- xori +- slli +- srli +- lw + +S-Type +- sw + +B-Type - beq - bne +- blt +- bge J-Type -- j - jal +U-Type +- lui + # Run to compile the assembler - $ ./make + $ make to run the assembler on a nop.asm assembly file to write machine code in nop.out $ ./assembler nop.asm nop.out diff --git a/assembler/assembler.c b/assembler/assembler.c index 08d3ef6..1114641 100644 --- a/assembler/assembler.c +++ b/assembler/assembler.c @@ -14,27 +14,33 @@ int search(char *instruction); // Array that holds the supported instructions char *instructions[] = { - "la", // 0 - "lui", // 1 - "lw", // 2 - "sw", // 3 - "add", // 4 - "sub", // 5 - "addi", // 6 + "add", // 0 + "sub", // 1 + "slt", // 3 + "sll", // 4 + "srl", // 5 + "and", // 6 "or", // 7 - "and", // 8 - "ori", // 9 - "andi", // 10 - "slt", // 11 - "slti", // 12 - "sll", // 13 - "srl", // 14 - "beq", // 15 - "bne", //16 - "j", //17 - "jr", //18 - "jal" //19 - }; + "xor", // 8 + "jalr", // 9 + "addi", // 19 + "slti", // 11 + "andi", // 12 + "ori", // 12 + "xori", // 13 + "slli", // 14 + "srli", // 15 + "lw", // 16 + "beq", // 17 + "bne", // 18 + "blt", // 19 + "bge", // 20 + "sw", // 21 + "jal", // 22 + "lui" // 23 +}; + + // Size of array size_t inst_len = sizeof(instructions)/sizeof(char *); @@ -44,7 +50,6 @@ int search(char *instruction) { int found = 0; for (int i = 0; i < inst_len; i++) { - if (strcmp(instruction, instructions[i]) == 0) { found = 1; return i; diff --git a/assembler/file_parser.c b/assembler/file_parser.c index 60c845e..25ce8c7 100644 --- a/assembler/file_parser.c +++ b/assembler/file_parser.c @@ -17,113 +17,148 @@ int search(char *instruction); int incr = 0; // Struct that stores registers and their respective binary reference +//Fall 2023 update: changes for RISC-V ISA struct { + const char *reg; const char *name; char *address; } registerMap[] = { - { "zero", "00000" }, - { "at", "00001" }, - { "v0", "00010" }, - { "v1", "00011" }, - { "a0", "00100" }, - { "a1", "00101" }, - { "a2", "00110" }, - { "a3", "00111" }, - { "t0", "01000" }, - { "t1", "01001" }, - { "t2", "01010" }, - { "t3", "01011" }, - { "t4", "01100" }, - { "t5", "01101" }, - { "t6", "01110" }, - { "t7", "01111" }, - { "s0", "10000" }, - { "s1", "10001" }, - { "s2", "10010" }, - { "s3", "10011" }, - { "s4", "10100" }, - { "s5", "10101" }, - { "s6", "10110" }, - { "s7", "10111" }, - { "t8", "11000" }, - { "t9", "11001" }, - { "ra", "11111" }, - { NULL, 0 } }; + { "x0", "zero", "00000" }, + { "x1", "ra", "00001" }, + { "x2", "sp", "00010" }, + { "x3", "gp", "00011" }, + { "x4", "tp", "00100" }, + { "x5", "t0", "00101" }, + { "x6", "t1", "00110" }, + { "x7", "t2", "00111" }, + { "x8", "s0", "01000" }, + { "x9", "s1", "01001" }, + { "x10", "a0", "01010" }, + { "x11", "a1", "01011" }, + { "x12", "a2", "01100" }, + { "x13", "a3", "01101" }, + { "x14", "a4", "01110" }, + { "x15", "a5", "01111" }, + { "x16", "a6", "10000" }, + { "x17", "a7", "10001" }, + { "x18", "s2", "10010" }, + { "x19", "s3", "10011" }, + { "x20", "s4", "10100" }, + { "x21", "s5", "10101" }, + { "x22", "s6", "10110" }, + { "x23", "s7", "10111" }, + { "x24", "s8", "11000" }, + { "x25", "s9", "11001" }, + { "x26", "s10", "11010" }, + { "x27", "s11", "11011" }, + { "x28", "t3", "11100" }, + { "x29", "t4", "11101" }, + { "x30", "t5", "11110" }, + { "x31", "t6", "11111" }, + { NULL, NULL, 0 } +}; // Struct for R-Type instructions mapping for the 'function' field in the instruction struct { const char *name; - char *function; + char *f7; + char *f3; + char *op; } rMap[] = { - { "add", "100000" }, - { "sub", "100001" }, - { "and", "100100" }, - { "or", "100101" }, - { "sll", "000000" }, - { "slt", "101010" }, - { "srl", "000010" }, - { "jr", "001000" }, - { NULL, 0 } }; + { "add", "0000000", "000", "0110011" }, + { "sub", "0100000", "000", "0110011" }, + { "slt", "0000000", "010", "0110011" }, + { "sll", "0000000", "001", "0110011" }, + { "srl", "0000000", "101", "0110011" }, + { "and", "0000000", "111", "0110011" }, + { "or", "0000000", "110", "0110011" }, + { "xor", "0000000", "100", "0110011" }, + { NULL, 0 } +}; // Struct for I-Type instructions struct { const char *name; - char *address; + char *f3; + char *op; } iMap[] = { - { "lw", "100011" }, - { "sw", "101011" }, - { "andi", "001100" }, - { "ori", "001101" }, - { "lui", "001111" }, - { "beq", "000100" }, - { "bne", "000101" }, - { "slti", "001010" }, - { "addi", "001000" }, - { NULL, 0 } }; + { "jalr", "000", "1100111" }, + { "addi", "000", "0010011" }, + { "slti", "010", "0010011" }, + { "andi", "111", "0010011" }, + { "ori", "110", "0010011" }, + { "xori", "100", "0010011" }, + { "slli", "001", "0010011" }, + { "srli", "101", "0010011" }, + { "lw", "010", "0000011" }, + { NULL, 0, 0 } +}; + +// Struct for B-Type instructions +struct { + const char *name; + char *f3; + char *op; +} bMap[] = { + { "beq", "000", "1100011" }, + { "bne", "001", "1100011" }, + { "blt", "100", "1100011" }, + { "bge", "101", "1100011" }, + { NULL, 0, 0 } +}; + +//Struct for S-type instructions +struct { + const char *name; + char *f3; + char *op; +} sMap[] = { + { "sw", "010", "0100011" }, + { NULL, 0, 0} +}; + // Struct for J-Type instructions struct { const char *name; - char *address; + char *op; } jMap[] = { - { "j", "000010" }, - { "jal", "000011" }, - { NULL, 0 } }; + { "jal", "1101111" }, + { NULL, 0 } +}; + +// Struct for U-Type instructions +struct { + const char *name; + char *op; +} uMap[] = { + { "lui", "0110111" }, + { NULL, 0 } +}; int memory_location = 0; void parse_file(FILE *fptr, int pass, char *instructions[], size_t inst_len, hash_table_t *hash_table, FILE *Out) { - + char line[MAX_LINE_LENGTH + 1]; char *tok_ptr, *ret, *token = NULL; int32_t line_num = 1; int32_t instruction_count = 0x00000000; int data_reached = 0; - //FILE *fptr; - - /*fptr = fopen(src_file, "r"); - if (fptr == NULL) { - fprintf(Out, "unable to open file %s. aborting ...\n", src_file); - exit(-1); - }*/ while (1) { - if ((ret = fgets(line, MAX_LINE_LENGTH, fptr)) == NULL) - break; + if ((ret = fgets(line, MAX_LINE_LENGTH, fptr)) == NULL) break; line[MAX_LINE_LENGTH] = 0; - tok_ptr = line; if (strlen(line) == MAX_LINE_LENGTH) { - fprintf(Out, - "line %d: line is too long. ignoring line ...\n", line_num); + fprintf(Out, "line %d: line is too long. ignoring line ...\n", line_num); line_num++; continue; } /* parse the tokens within a line */ while (1) { - - token = parse_token(tok_ptr, " \n\t$,", &tok_ptr, NULL); + token = parse_token(tok_ptr, " \n\t$,()", &tok_ptr, NULL); /* blank line or comment begins here. go to the next line */ if (token == NULL || *token == '#') { @@ -139,7 +174,6 @@ void parse_file(FILE *fptr, int pass, char *instructions[], size_t inst_len, has * increment by 4. */ int x = search(token); - //int x = (binarySearch(instructions, 0, inst_len, token)); if (x >= 0) { if (strcmp(token, "la") == 0) instruction_count = instruction_count + 8; @@ -149,9 +183,8 @@ void parse_file(FILE *fptr, int pass, char *instructions[], size_t inst_len, has // If token is ".data", reset instruction to .data starting address else if (strcmp(token, ".data") == 0) { - if(incr==1) - fprintf(Out, "11111111111111111111111111111111\n"); - incr++; + if(incr==1) fprintf(Out, "11111111111111111111111111111111\n"); + incr++; instruction_count = 0x00002000; data_reached = 1; } @@ -286,8 +319,8 @@ void parse_file(FILE *fptr, int pass, char *instructions[], size_t inst_len, has } } } - // If second pass, then interpret + else if (pass == 2) { printf("############ Pass 2 ##############\n"); @@ -311,527 +344,250 @@ void parse_file(FILE *fptr, int pass, char *instructions[], size_t inst_len, has // Determine instruction type inst_type = instruction_type(token); - if (inst_type == 'r') { - - // R-Type with $rd, $rs, $rt format - if (strcmp(token, "add") == 0 || strcmp(token, "sub") == 0 - || strcmp(token, "and") == 0 - || strcmp(token, "or") == 0 || strcmp(token, "slt") == 0) { - - // Parse the instructio - get rd, rs, rt registers - char *inst_ptr = tok_ptr; - char *reg = NULL; - // Create an array of char* that stores rd, rs, rt respectively - char **reg_store; - reg_store = malloc(3 * sizeof(char*)); - if (reg_store == NULL) { - fprintf(Out, "Out of memory\n"); - exit(1); - } - - for (int i = 0; i < 3; i++) { - reg_store[i] = malloc(2 * sizeof(char)); - if (reg_store[i] == NULL) { - fprintf(Out, "Out of memory\n"); - exit(1); - } - } - - // Keeps a reference to which register has been parsed for storage - int count = 0; - while (1) { - - reg = parse_token(inst_ptr, " $,\n\t", &inst_ptr, NULL); - - if (reg == NULL || *reg == '#') { - break; - } - - strcpy(reg_store[count], reg); - count++; - free(reg); - } + if (inst_type == 'r') { - // Send reg_store for output - // rd is in position 0, rs is in position 1 and rt is in position 2 - rtype_instruction(token, reg_store[1], reg_store[2], reg_store[0], 0, Out); + // Parse the instructio - get rd, rs, rt registers + char *inst_ptr = tok_ptr; + char *reg = NULL; - // Dealloc reg_store - for (int i = 0; i < 3; i++) { - free(reg_store[i]); - } - free(reg_store); + // Create an array of char* that stores rd, rs, rt respectively + char **reg_store; + reg_store = malloc(3 * sizeof(char*)); + if (reg_store == NULL) { + fprintf(Out, "Out of memory\n"); + exit(1); } - // R-Type with $rd, $rs, shamt format - else if (strcmp(token, "sll") == 0 || strcmp(token, "srl") == 0) { - - // Parse the instructio - get rd, rs, rt registers - char *inst_ptr = tok_ptr; - char *reg = NULL; - - // Create an array of char* that stores rd, rs and shamt - char **reg_store; - reg_store = malloc(3 * sizeof(char*)); - if (reg_store == NULL) { + for (int i = 0; i < 3; i++) { + reg_store[i] = malloc(2 * sizeof(char)); + if (reg_store[i] == NULL) { fprintf(Out, "Out of memory\n"); exit(1); } + } - for (int i = 0; i < 3; i++) { - reg_store[i] = malloc(2 * sizeof(char)); - if (reg_store[i] == NULL) { - fprintf(Out, "Out of memory\n"); - exit(1); - } - } - - // Keeps a reference to which register has been parsed for storage - int count = 0; - while (1) { - - reg = parse_token(inst_ptr, " $,\n\t", &inst_ptr, NULL); + // Keeps a reference to which register has been parsed for storage + int count = 0; + while (1) { - if (reg == NULL || *reg == '#') { - break; - } + reg = parse_token(inst_ptr, " $,\n\t()", &inst_ptr, NULL); - strcpy(reg_store[count], reg); - count++; - free(reg); + if (reg == NULL || *reg == '#') { + break; } - // Send reg_store for output - // rd is in position 0, rs is in position 1 and shamt is in position 2 - rtype_instruction(token, "00000", reg_store[1], reg_store[0], atoi(reg_store[2]), Out); - - // Dealloc reg_store - for (int i = 0; i < 3; i++) { - free(reg_store[i]); - } - free(reg_store); + strcpy(reg_store[count], reg); + count++; + free(reg); } - else if (strcmp(token, "jr") == 0) { + // Send reg_store for output + // rd is in position 0, rs1 is in position 1 and rs2 is in position 2 + rtype_instruction(token, reg_store[1], reg_store[2], reg_store[0], Out); - // Parse the instruction - rs is in tok_ptr - char *inst_ptr = tok_ptr; - char *reg = NULL; - reg = parse_token(inst_ptr, " $,\n\t", &inst_ptr, NULL); - - rtype_instruction(token, reg, "00000", "00000", 0, Out); + // Dealloc reg_store + for (int i = 0; i < 3; i++) { + free(reg_store[i]); } - } - - // I-Type - else if (inst_type == 'i') { + free(reg_store); - // la is pseudo instruction for lui and ori - // Convert to lui and ori and pass those instructions - if (strcmp(token, "la") == 0) { + // I-Type or S-type + } else if ((inst_type == 'i') || (inst_type == 's')) { + // Parse the instruction - rt, rs, immediate + char *inst_ptr = tok_ptr; + char *reg = NULL; - // Parse the instruction - get register & immediate - char *inst_ptr = tok_ptr; - char *reg = NULL; + // Create an array of char* that stores rt, rs, immediate + char **reg_store; + reg_store = malloc(3 * sizeof(char*)); + if (reg_store == NULL) { + fprintf(Out, "Out of memory\n"); + exit(1); + } - // Create an array of char* that stores rd, rs and shamt - char **reg_store; - reg_store = malloc(2 * sizeof(char*)); - if (reg_store == NULL) { + for (int i = 0; i < 3; i++) { + reg_store[i] = malloc(3 * sizeof(char)); + if (reg_store[i] == NULL) { fprintf(Out, "Out of memory\n"); exit(1); } + } - for (int i = 0; i < 2; i++) { - reg_store[i] = malloc(2 * sizeof(char)); - if (reg_store[i] == NULL) { - fprintf(Out, "Out of memory\n"); - exit(1); - } - } - - // Keeps a reference to which register has been parsed for storage - int count = 0; - while (1) { - - reg = parse_token(inst_ptr, " $,\n\t", &inst_ptr, NULL); - - if (reg == NULL || *reg == '#') { - break; - } + // Keeps a reference to which register has been parsed for storage + int count = 0; + while (1) { - strcpy(reg_store[count], reg); - count++; - free(reg); - } + reg = parse_token(inst_ptr, " $,\n\t()", &inst_ptr, NULL); - // Interpret la instruction. - // The register is at reg_store[0] and the variable is at reg_store[1] - - printf("\n reg_store[0]: %s\n",reg_store[0]); - // Find address of label in hash table - int *address = hash_find(hash_table, reg_store[1], strlen(reg_store[1])+1); - - // Convert address to binary in char* - char addressBinary[33]; - getBin(*address, addressBinary, 32); - - // Get upper and lower bits of address - char upperBits[16]; - char lowerBits[16]; - - for (int i = 0; i < 32; i++) { - if (i < 16) - lowerBits[i] = addressBinary[i]; - else - upperBits[i-16] = addressBinary[i]; + if (reg == NULL || *reg == '#') { + break; } - // Call the lui instruction with: lui $reg, upperBits - // Convert upperBits binary to int - int immediate = getDec(upperBits); - //immediate = memory_location; - //memory_location++; - printf("\n LA1: %d \n",immediate); - itype_instruction("lui", "00000", reg_store[0], immediate, Out); - - // Call the ori instruction with: ori $reg, $reg, lowerBits - // Convert lowerBits binary to int - immediate = getDec(lowerBits); - printf("\n LA2: %d",immediate); - itype_instruction("ori", reg_store[0], reg_store[0], immediate, Out); - - // Dealloc reg_store - for (int i = 0; i < 2; i++) { - free(reg_store[i]); - } - free(reg_store); + strcpy(reg_store[count], reg); + count++; + free(reg); } - // I-Type $rt, i($rs) - else if (strcmp(token, "lw") == 0 || strcmp(token, "sw") == 0) { - - // Parse the instructio - rt, immediate and rs - char *inst_ptr = tok_ptr; - char *reg = NULL; - // - // Create an array of char* that stores rd, rs, rt respectively - char **reg_store; - reg_store = malloc(3 * sizeof(char*)); - if (reg_store == NULL) { - fprintf(Out, "Out of memory\n"); - exit(1); - } - - for (int i = 0; i < 3; i++) { - reg_store[i] = malloc(2 * sizeof(char)); - if (reg_store[i] == NULL) { - fprintf(Out, "Out of memory\n"); - exit(1); - } - } - - // Keeps a reference to which register has been parsed for storage - int count = 0; - while (1) { - - reg = parse_token(inst_ptr, " $,\n\t()", &inst_ptr, NULL); - - if (reg == NULL || *reg == '#') { - break; - } - - strcpy(reg_store[count], reg); - count++; - free(reg); + int immediate; + //Handle i + if (inst_type == 'i'){ + // rd in position 0, immediate in position 1, and rs1 in position 2 for lw + if (strcmp(token, "lw") == 0){ + immediate = strtol(reg_store[1], NULL, 0); + itype_instruction(token, reg_store[2], reg_store[0], immediate, Out); + //Other i type have rd position 0, rs1 position 1, immediate position 2 + }else{ + immediate = strtol(reg_store[2], NULL, 0); + itype_instruction(token, reg_store[1], reg_store[0], immediate, Out); } - // rt in position 0, immediate in position 1 and rs in position2 - int immediate = atoi(reg_store[1]); - itype_instruction(token, reg_store[2], reg_store[0], immediate, Out); + //Handle s + }else{ + immediate = strtol(reg_store[1], NULL, 0); + stype_instruction(token, reg_store[2], reg_store[0], immediate, Out); + } - // Dealloc reg_store - for (int i = 0; i < 3; i++) { - free(reg_store[i]); - } - free(reg_store); + // Dealloc reg_store + for (int i = 0; i < 3; i++) { + free(reg_store[i]); } + free(reg_store); - // I-Type rt, rs, im - else if (strcmp(token, "andi") == 0 || strcmp( token, "ori") == 0 - || strcmp(token, "slti") == 0 || strcmp(token, "addi") == 0) { + //B-Type + } else if (inst_type == 'b') { + // Parse the instruction - rt, rs, immediate + char *inst_ptr = tok_ptr; + char *reg = NULL; - // Parse the instruction - rt, rs, immediate - char *inst_ptr = tok_ptr; - char *reg = NULL; + // Create an array of char* that stores rt, rs + char **reg_store; + reg_store = malloc(2 * sizeof(char*)); + if (reg_store == NULL) { + fprintf(Out, "Out of memory\n"); + exit(1); + } - // Create an array of char* that stores rt, rs - char **reg_store; - reg_store = malloc(3 * sizeof(char*)); - if (reg_store == NULL) { + for (int i = 0; i < 2; i++) { + reg_store[i] = malloc(3 * sizeof(char)); + if (reg_store[i] == NULL) { fprintf(Out, "Out of memory\n"); exit(1); } + } - for (int i = 0; i < 3; i++) { - reg_store[i] = malloc(2 * sizeof(char)); - if (reg_store[i] == NULL) { - fprintf(Out, "Out of memory\n"); - exit(1); - } - } - - // Keeps a reference to which register has been parsed for storage - int count = 0; - while (1) { - - reg = parse_token(inst_ptr, " $,\n\t", &inst_ptr, NULL); + // Keeps a reference to which register has been parsed for storage + int count = 0; + while (1) { - if (reg == NULL || *reg == '#') { - break; - } + reg = parse_token(inst_ptr, " $,\n\t()", &inst_ptr, NULL); - strcpy(reg_store[count], reg); - count++; - free(reg); + if (reg == NULL || *reg == '#') { + break; } - // rt in position 0, rs in position 1 and immediate in position 2 - int immediate = atoi(reg_store[2]); - itype_instruction(token, reg_store[1], reg_store[0], immediate, Out); + strcpy(reg_store[count], reg); + count++; + free(reg); + + if (count == 2) break; - // Dealloc reg_store - for (int i = 0; i < 3; i++) { - free(reg_store[i]); - } - free(reg_store); } - // I-Type $rt, immediate - else if (strcmp(token, "lui") == 0) { + //Getting the label + reg = parse_token(inst_ptr, " $,\n\t()", &inst_ptr, NULL); + printf("label is %s\n", reg); - // Parse the insturction, rt - immediate - char *inst_ptr = tok_ptr; - char *reg = NULL; + // Find hash address for a register and put in an immediate + int *address = hash_find(hash_table, reg, strlen(reg)+1); - // Create an array of char* that stores rs, rt - char **reg_store; - reg_store = malloc(2 * sizeof(char*)); - if (reg_store == NULL) { - fprintf(Out, "Out of memory\n"); - exit(1); - } + // rs1 in position 0, rs2 in position 1 + int immediate = (*address - instruction_count + 4) >> 1; //What in tarnation? + btype_instruction(token, reg_store[1], reg_store[0], immediate, Out); - for (int i = 0; i < 2; i++) { - reg_store[i] = malloc(2 * sizeof(char)); - if (reg_store[i] == NULL) { - fprintf(Out, "Out of memory\n"); - exit(1); - } - } + // Dealloc reg_store + for (int i = 0; i < 2; i++) { + free(reg_store[i]); + } + free(reg_store); + + + } else if (inst_type == 'j') { + // Parse the instruction - rd, immediate + char *inst_ptr = tok_ptr; + char *reg = NULL, *label = NULL; - // Keeps a reference to which register has been parsed for storage - int count = 0; - while (1) { + //Get rd + reg = parse_token(inst_ptr, " $,\n\t()", &inst_ptr, NULL); - reg = parse_token(inst_ptr, " $,\n\t", &inst_ptr, NULL); + //Get label + label = parse_token(inst_ptr, " $,\n\t()", &inst_ptr, NULL); + printf("reg is %s\n", reg); + printf("label is %s\n", label); - if (reg == NULL || *reg == '#') { - break; - } + + // Find hash address for a register and put in an immediate + int *address = hash_find(hash_table, label, strlen(label)+1); - strcpy(reg_store[count], reg); - count++; - free(reg); - } + int immediate = (*address - instruction_count + 4) >> 1; //What in tarnation? + printf("\n %d %d\n", immediate, instruction_count); + jtype_instruction(token, reg, immediate, Out); - // rt in position 0, immediate in position 1 - int immediate = atoi(reg_store[1]); - itype_instruction(token, "00000", reg_store[0], immediate, Out); + //Free memory + free(reg); + free(label); - //OK: I changes i < 3 to i < 2 in line 654 below! - // Dealloc reg_store - for (int i = 0; i < 2; i++) { - free(reg_store[i]); - } - free(reg_store); + } else if (inst_type == 'u') { + // Parse the instruction - rd, immediate + char *inst_ptr = tok_ptr; + char *reg = NULL; + + // Create an array of char* that stores rd, immediate + char **reg_store; + reg_store = malloc(2 * sizeof(char*)); + if (reg_store == NULL) { + fprintf(Out, "Out of memory\n"); + exit(1); } - // I-Type $rs, $rt, label - else if (strcmp(token, "beq") == 0) { - printf("\n In BEQ\n"); - // Parse the instruction - rs, rt - char *inst_ptr = tok_ptr; - char *reg = NULL; - - // Create an array of char* that stores rs, rt - char **reg_store; - reg_store = malloc(2 * sizeof(char*)); - if (reg_store == NULL) { + for (int i = 0; i < 2; i++) { + reg_store[i] = malloc(2 * sizeof(char)); + if (reg_store[i] == NULL) { fprintf(Out, "Out of memory\n"); exit(1); } - - for (int i = 0; i < 2; i++) { - reg_store[i] = malloc(2 * sizeof(char)); - if (reg_store[i] == NULL) { - fprintf(Out, "Out of memory\n"); - exit(1); - } - } - - // Keeps a reference to which register has been parsed for storage - int count = 0; - while (1) { - - reg = parse_token(inst_ptr, " $,\n\t", &inst_ptr, NULL); - - if (reg == NULL || *reg == '#') { - break; - } - - strcpy(reg_store[count], reg); - count++; - free(reg); - - if (count == 2) - break; - } - - reg = parse_token(inst_ptr, " $,\n\t", &inst_ptr, NULL); - - // Find hash address for a register and put in an immediate - int *address = hash_find(hash_table, reg, strlen(reg)+1); - - int immediate = *address - instruction_count; //+ - printf("\n OFFSET pre = %d\n ", immediate); - - // OK: this offset modification is wrong! - /* - if(immediate < 0) - immediate = immediate*-1; - printf("\n OFFSET = %d\n ", immediate); - */ - - // Send instruction to itype function - itype_instruction(token, reg_store[0], reg_store[1], immediate, Out); - - // Dealloc reg_store - for (int i = 0; i < 2; i++) { - free(reg_store[i]); - } - free(reg_store); } - // I-Type $rs, $rt, label - else if (strcmp(token, "bne") == 0) { - - printf("\n In BNE\n"); - // Parse the instruction - rs, rt - char *inst_ptr = tok_ptr; - char *reg = NULL; - - // Create an array of char* that stores rs, rt - char **reg_store; - reg_store = malloc(2 * sizeof(char*)); - if (reg_store == NULL) { - fprintf(Out, "Out of memory\n"); - exit(1); - } - - for (int i = 0; i < 2; i++) { - reg_store[i] = malloc(2 * sizeof(char)); - if (reg_store[i] == NULL) { - fprintf(Out, "Out of memory\n"); - exit(1); - } - } - // Keeps a reference to which register has been parsed for storage - int count = 0; - while (1) { - - reg = parse_token(inst_ptr, " $,\n\t", &inst_ptr, NULL); - - if (reg == NULL || *reg == '#') { - break; - } - - strcpy(reg_store[count], reg); - count++; - free(reg); - - if (count == 2) - break; - } - - reg = parse_token(inst_ptr, " $,\n\t", &inst_ptr, NULL); - - // Find hash address for a register and put in an immediate - int *address = hash_find(hash_table, reg, strlen(reg)+1); - printf("\n %d %d\n",*address,instruction_count); - //int immediate = *address + instruction_count; - int immediate = *address - instruction_count; //+ - - // OK: this offset modification is wrong! - /* - if(immediate < 0) - immediate = immediate*-1; - printf("\n OFFSET = %d\n ", immediate); - */ - - // Send instruction to itype function - itype_instruction(token, reg_store[0], reg_store[1], immediate, Out); - - // Dealloc reg_store - for (int i = 0; i < 2; i++) { - free(reg_store[i]); - } - free(reg_store); - } - } - - // J-Type - else if (inst_type == 'j') { + // Keeps a reference to which register has been parsed for storage + int count = 0; + while (1) { - // Parse the instruction - get label - char *inst_ptr = tok_ptr; + reg = parse_token(inst_ptr, " $,\n\t()", &inst_ptr, NULL); - // If comment, extract the label alone - char *comment = strchr(inst_ptr, '#'); - if (comment != NULL) { - - int str_len_count = 0; - for (int i = 0; i < strlen(inst_ptr); i++) { - if (inst_ptr[i] != ' ') - str_len_count++; - else - break; + if (reg == NULL || *reg == '#') { + break; } - char new_label[str_len_count+1]; - for (int i = 0; i < str_len_count; i++) - new_label[i] = inst_ptr[i]; - new_label[str_len_count] = '\0'; - - strcpy(inst_ptr, new_label); + strcpy(reg_store[count], reg); + count++; + free(reg); } - else { printf("NO COMMENT\n"); - inst_ptr[strlen(inst_ptr)-1] = '\0'; - } + // rd in position 0, immediate in position 1 + int immediate = strtol(reg_store[1], NULL, 0); + utype_instruction(token, reg_store[0], immediate, Out); - // Find hash address for a label and put in an immediate - int *address = hash_find(hash_table, inst_ptr, strlen(inst_ptr)+1); - - // Send to jtype function - jtype_instruction(token, *address, Out); + // Dealloc reg_store + for (int i = 0; i < 2; i++) { + free(reg_store[i]); + } + free(reg_store); } - } - if (strcmp(token, "nop") == 0) { - fprintf(Out, "00000000000000000000000000000000\n"); + if (strcmp(token, "nop") == 0) { + fprintf(Out, "00000000000000000000000000000000\n"); + } } } @@ -848,7 +604,6 @@ void parse_file(FILE *fptr, int pass, char *instructions[], size_t inst_len, has // Variable is array if (strstr(var_tok_ptr, ":")) { - // Store the number in var_tok and the occurance in var_tok_ptr var_tok = parse_token(var_tok_ptr, ":", &var_tok_ptr, NULL); @@ -866,18 +621,14 @@ void parse_file(FILE *fptr, int pass, char *instructions[], size_t inst_len, has // Variable is a single variable else { - printf("\n%s\n ", var_tok_ptr); - char * pch; - pch = strtok (var_tok_ptr," "); - pch = strtok (NULL, " "); - var_value = atoi(pch); - printf ("\n INTGER: %d\n",var_value); - // Extract variable value - //sscanf(var_tok_ptr, "%*s %d", &var_value); //.word 10 - - // Variable is in var_value. Send to binary rep function + printf("\n%s\n ", var_tok_ptr); + char * pch; + pch = strtok (var_tok_ptr," "); + pch = strtok (NULL, " "); + var_value = strtol(pch, NULL, 0); + printf ("\n INTGER: %d\n",var_value); word_rep(var_value, Out); - //printf("\n VAR: %d\n, var_value"); + } } @@ -900,7 +651,6 @@ void parse_file(FILE *fptr, int pass, char *instructions[], size_t inst_len, has } } } - free(token); } } @@ -938,28 +688,36 @@ int binarySearch(char *instructions[], int low, int high, char *string) { // Determine Instruction Type char instruction_type(char *instruction) { - if (strcmp(instruction, "add") == 0 || strcmp(instruction, "sub") == 0 - || strcmp(instruction, "and") == 0 || strcmp(instruction, "or") - == 0 || strcmp(instruction, "sll") == 0 || strcmp(instruction, - "slt") == 0 || strcmp(instruction, "srl") == 0 || strcmp( - instruction, "jr") == 0) { - - return 'r'; - } - - else if (strcmp(instruction, "lw") == 0 || strcmp(instruction, "sw") == 0 - || strcmp(instruction, "andi") == 0 || strcmp(instruction, "ori") - == 0 || strcmp(instruction, "lui") == 0 || strcmp(instruction, - "beq") == 0 || strcmp(instruction,"bne") == 0 || strcmp(instruction, "slti") == 0 || strcmp( - instruction, "addi") == 0 || strcmp(instruction, "la") == 0) { - - return 'i'; - } - - else if (strcmp(instruction, "j") == 0 || strcmp(instruction, "jal") == 0) { - return 'j'; - } - + if (strcmp(instruction, "add") == 0 || + strcmp(instruction, "sub") == 0 || + strcmp(instruction, "sll") == 0 || + strcmp(instruction, "srl") == 0 || + strcmp(instruction, "and") == 0 || + strcmp(instruction, "or") == 0 || + strcmp(instruction, "xor") == 0 || + strcmp(instruction, "slt") == 0) return 'r'; + + else if (strcmp(instruction, "jalr") == 0 || + strcmp(instruction, "addi") == 0 || + strcmp(instruction, "slti") == 0 || + strcmp(instruction, "andi") == 0 || + strcmp(instruction, "ori") == 0 || + strcmp(instruction, "xori") == 0 || + strcmp(instruction, "slli") == 0 || + strcmp(instruction, "srli") == 0 || + strcmp(instruction, "lw") == 0 ) return 'i'; + + else if (strcmp(instruction, "beq") == 0 || + strcmp(instruction, "bne") == 0 || + strcmp(instruction, "blt") == 0 || + strcmp(instruction, "bge") == 0 ) return 'b'; + + else if (strcmp(instruction, "sw") == 0) return 's'; + + else if (strcmp(instruction, "jal") == 0) return 'j'; + + else if (strcmp(instruction, "lui") == 0) return 'u'; + // Failsafe return statement return 0; } @@ -968,8 +726,9 @@ char instruction_type(char *instruction) { char *register_address(char *registerName) { size_t i; + //Fall 2023 update: Now also check for the register name for (i = 0; registerMap[i].name != NULL; i++) { - if (strcmp(registerName, registerMap[i].name) == 0) { + if ( (strcmp(registerName, registerMap[i].name) == 0) || (strcmp(registerName, registerMap[i].reg) == 0) ) { return registerMap[i].address; } } @@ -978,72 +737,159 @@ char *register_address(char *registerName) { } // Write out the R-Type instruction -void rtype_instruction(char *instruction, char *rs, char *rt, char *rd, int shamt, FILE *Out) { +void rtype_instruction(char *instruction, char *rs1, char *rs2, char *rd, FILE *Out) { - // Set the instruction bits - char *opcode = "000000"; + //Stores the opcode and func bits + char *opcode = NULL, *f3 = NULL, *f7 = NULL; + for (int i = 0; rMap[i].name != NULL; i++) { + if (strcmp(instruction, rMap[i].name) == 0) { + opcode = rMap[i].op; + f3 = rMap[i].f3; + f7 = rMap[i].f7; + break; + } + } + + //Stores the binary representation of registers + char *rdBin, *rs1Bin, *rs2Bin; + rdBin = register_address(rd); + rs1Bin = register_address(rs1); + rs2Bin = register_address(rs2); + + // Print out the instruction to the file + fprintf(Out, "%s%s%s%s%s%s\n", f7, rs2Bin, rs1Bin, f3, rdBin, opcode); +} + +// Write out the I-Type instruction +void itype_instruction(char *instruction, char *rs1, char *rd, int immediate, FILE *Out) { + + int imm = immediate; + + //Stores the opcode and func bits + char *opcode = NULL, *f3 = NULL; + for (int i = 0; iMap[i].name != NULL; i++) { + if (strcmp(instruction, iMap[i].name) == 0) { + opcode = iMap[i].op; + f3 = iMap[i].f3; + break; + } + } - char *rdBin = "00000"; - if (strcmp(rd, "00000") != 0) - rdBin = register_address(rd); + //Clear the upper 7 immediate bits for SLLI or SRLI + /* + if ( strcmp(instruction, "slli") == 0 || + strcmp(instruction, "slri") == 0 ) imm &= 0x01FFFFFF; + */ + // Set the instruction bits + char *rs1Bin, *rdBin; + rs1Bin = register_address(rs1); + rdBin = register_address(rd); - char *rsBin = "00000"; - if (strcmp(rs, "00000") != 0) - rsBin = register_address(rs); + // Convert immediate to binary string + char iimmediate[13]; + getBin(imm, iimmediate, 12); + printf("%s\n", iimmediate); - char *rtBin = "00000"; - if (strcmp(rt, "00000") != 0) - rtBin = register_address(rt); + // Print out the instruction to the file + fprintf(Out, "%s%s%s%s%s\n", iimmediate, rs1Bin, f3, rdBin, opcode); +} - char *func = NULL; - char shamtBin[6]; - // Convert shamt to binary and put in shamtBin as a char* - getBin(shamt, shamtBin, 5); - size_t i; - for (i = 0; rMap[i].name != NULL; i++) { - if (strcmp(instruction, rMap[i].name) == 0) { - func = rMap[i].function; +// Write out the I-Type instruction +void stype_instruction(char *instruction, char *rs1, char *rs2, int immediate, FILE *Out) { + + //Stores the opcode and func bits + char *opcode = NULL, *f3 = NULL; + for (int i = 0; sMap[i].name != NULL; i++) { + if (strcmp(instruction, sMap[i].name) == 0) { + opcode = sMap[i].op; + f3 = sMap[i].f3; + break; } } + // Set the instruction bits + char *rs1Bin, *rs2Bin; + rs1Bin = register_address(rs1); + rs2Bin = register_address(rs2); + + //B-type instructions break up immediates weirdly + int upperimm_bits, lowerimm_bits; + upperimm_bits = ((immediate & 0xFE0) >> 5); + lowerimm_bits = immediate & 0x01F; + + // Convert immediate to binary string + char immediate_upper[8], immediate_lower[6]; + getBin(upperimm_bits, immediate_upper, 7); + getBin(lowerimm_bits, immediate_lower, 5); + // Print out the instruction to the file - fprintf(Out, "%s%s%s%s%s%s\n", opcode, rsBin, rtBin, rdBin, shamtBin, func); + fprintf(Out, "%s%s%s%s%s%s\n", immediate_upper, rs2Bin, rs1Bin, f3, immediate_lower, opcode); } // Write out the I-Type instruction -void itype_instruction(char *instruction, char *rs, char *rt, int immediateNum, FILE *Out) { +void btype_instruction(char *instruction, char *rs1, char *rs2, int immediate, FILE *Out) { + + //Stores the opcode and func bits + char *opcode = NULL, *f3 = NULL; + for (int i = 0; bMap[i].name != NULL; i++) { + if (strcmp(instruction, bMap[i].name) == 0) { + opcode = bMap[i].op; + f3 = bMap[i].f3; + break; + } + } // Set the instruction bits - char *rsBin = "00000"; - if (strcmp(rs, "00000") != 0) - rsBin = register_address(rs); + char *rs1Bin, *rs2Bin; + rs1Bin = register_address(rs1); + rs2Bin = register_address(rs2); + + //B-type instructions break up immediates weirdly + int upperimm_bits, lowerimm_bits; + upperimm_bits = ((immediate & 0x800) >> 5) | + (immediate & 0x3F0) >> 4; + + lowerimm_bits = ((immediate & 0x400) >> 10) | + ((immediate & 0x00F) << 1); + + // Convert immediate to binary string + char immediate_upper[8], immediate_lower[6]; + getBin(upperimm_bits, immediate_upper, 7); + getBin(lowerimm_bits, immediate_lower, 5); - char *rtBin = "00000"; - if (strcmp(rt, "00000") != 0) - rtBin = register_address(rt); + // Print out the instruction to the file + fprintf(Out, "%s%s%s%s%s%s\n", immediate_upper, rs2Bin, rs1Bin, f3, immediate_lower, opcode); +} + +// Write out the J-Type instruction +void utype_instruction(char *instruction, char *rd, int immediate, FILE *Out) { + // Set the instruction bits char *opcode = NULL; - char immediate[17]; + // Get opcode bits size_t i; - for (i = 0; iMap[i].name != NULL; i++) { - if (strcmp(instruction, iMap[i].name) == 0) { - opcode = iMap[i].address; + for (i = 0; uMap[i].name != NULL; i++) { + if (strcmp(instruction, uMap[i].name) == 0) { + opcode = uMap[i].op; } } + char *rdBin; + rdBin = register_address(rd); + // Convert immediate to binary - getBin(immediateNum, immediate, 16); + char immediateStr[21]; + getBin(immediate, immediateStr, 20); - // Print out the instruction to the file - //printf("\n IMM: %s \n",immediate); - fprintf(Out, "%s%s%s%s\n", opcode, rsBin, rtBin, immediate); + // Print out instruction to file + fprintf(Out, "%s%s%s\n", immediateStr, rdBin, opcode); } // Write out the J-Type instruction -void jtype_instruction(char *instruction, int immediate, FILE *Out) { +void jtype_instruction(char *instruction, char *rd, int immediate, FILE *Out) { // Set the instruction bits char *opcode = NULL; @@ -1052,16 +898,23 @@ void jtype_instruction(char *instruction, int immediate, FILE *Out) { size_t i; for (i = 0; jMap[i].name != NULL; i++) { if (strcmp(instruction, jMap[i].name) == 0) { - opcode = jMap[i].address; + opcode = jMap[i].op; } } + char *rdBin; + rdBin = register_address(rd); + int jump_bits = (immediate & 0x80000) | + ((immediate & 0x003FF) << 9) | + ((immediate & 0x00400) >> 2) | + ((immediate & 0x7F800) >> 11); + // Convert immediate to binary - char immediateStr[27]; - getBin(immediate, immediateStr, 26); + char immediateStr[21]; + getBin(jump_bits, immediateStr, 20); // Print out instruction to file - fprintf(Out, "%s%s\n", opcode, immediateStr); + fprintf(Out, "%s%s%s\n", immediateStr, rdBin, opcode); } // Write out the variable in binary @@ -1144,8 +997,16 @@ void ascii_rep(char string[], FILE *Out) { sep_str = NULL; } -void getBin(int num, char *str, int padding) { - +void getBin(int num, char *str, int size) { + int n = num; + char *lsb = str; + for (int i = 0; i < size; i++){ + if (n & 1) lsb[size-i-1] = '1'; else lsb[size-i-1] = '0'; + n >>= 1; + } + str[size] = '\0'; +} +/* *(str + padding) = '\0'; long pos; @@ -1162,6 +1023,7 @@ void getBin(int num, char *str, int padding) { while (mask >>= 1) *str++ = !!(mask & num) + '0'; } +*/ // Convert a binary string to a decimal value int getDec(char *bin) { diff --git a/assembler/file_parser.h b/assembler/file_parser.h index b0ceae3..01771b7 100644 --- a/assembler/file_parser.h +++ b/assembler/file_parser.h @@ -16,9 +16,12 @@ void parse_file(FILE *fptr, int pass, char *instructions[], size_t inst_len, has int binarySearch(char *instructions[], int low, int high, char *string); char instruction_type(char *instruction); char *register_address(char *registerName); -void rtype_instruction(char *instruction, char *rs, char *rt, char *rd, int shamt, FILE *Out); -void itype_instruction(char *instruction, char *rs, char *rt, int immediate, FILE *Out); -void jtype_instruction(char *instruction, int immediate, FILE *Out); +void rtype_instruction(char *instruction, char *rs1, char *rs2, char *rd, FILE *Out); +void itype_instruction(char *instruction, char *rs1, char *rd, int immediate, FILE *Out); +void stype_instruction(char *instruction, char *rs1, char *rs2, int immediate, FILE *Out); +void btype_instruction(char *instruction, char *rs1, char *rs2, int immediate, FILE *Out); +void utype_instruction(char *instruction, char *rd, int immediate, FILE *Out); +void jtype_instruction(char *instruction, char *rd, int immediate, FILE *Out); void word_rep(int binary_rep, FILE *Out); void ascii_rep(char string[], FILE *Out); void getBin(int num, char *str, int padding);