Return Styles: Pseud0ch, Terminal, Valhalla, NES, Geocities, Blue Moon. Entire thread

My first Brainfuck

Name: The Amazing Anus !Anus3nMVO2 2009-08-31 1:43

I was bored as fuck so I decided to implement a Brainfuck interpreter in C.

/* brainfuck.c ... version 0.9
 *
 * usage: brainfuck <file>
 *
 */

 
#include <stdio.h>

/* 64kb -1b each
 *
 * The Brainfuck spec says that memory should be 30000 bytes, but I prefer to
 * use a whole 64kb for both code and memory.
 *
 */
#define BRAINF_CODESIZE   65535
#define BRAINF_MEMSIZE   65535


FILE* fp;

unsigned char* code, *memory;
short unsigned int codeptr, memptr;


int main(int argc, char* argv[])
{
     if (argc != 2)
          exit(1);

     if ((fp = fopen(argv[1], "r")) == NULL)
          exit(1);

     code = malloc(BRAINF_CODESIZE);
     memory = malloc(BRAINF_MEMSIZE);

     memset(code, 0, BRAINF_CODESIZE);
     memset(memory, 0, BRAINF_MEMSIZE);

     codeptr = 0;
     memptr = 0;

     while (!feof(fp))
     {
          fread(code, BRAINF_CODESIZE -1, 1, fp);
     }


     /*** Interpreting begins ***/
     while (code[codeptr])
     {
          switch (code[codeptr])
          {
               case '>':
                    ++memptr;
                    ++codeptr;
                    break;

               case '<':
                    --memptr;
                    ++codeptr;
                    break;

               case '+':
                    ++memory[memptr];
                    ++codeptr;
                    break;

               case '-':
                    --memory[memptr];
                    ++codeptr;
                    break;

               case '.':
                    putchar(memory[memptr]);
                    ++codeptr;
                    break;

               case ',':
                    memory[memptr] = getchar();
                    ++codeptr;
                    break;

               /* :NOTE: Some kind of error handling should take
                * place if no ']' is found.
                */
               case '[':
                    if (!memory[memptr])
                    {
                         while ((code[codeptr] != ']') && (code[codeptr]))
                         {
                              ++codeptr;
                         }
                    }
                    else
                         ++codeptr;
                    break;


               /* :NOTE: Some kind of error handling should take
                * place if no '[' is found.
                */
               case ']':
                    if (memory[memptr])
                    {
                         while ((code[codeptr] != '[') && (codeptr))
                              --codeptr;
                    }
                    else
                         ++codeptr;
                    break;

               /* invalid instruction
                * For now we will just continue executing when this happens.
                * It should ignore characters 10, 13, & 32 anyways.
                */
               default:
                    ++codeptr;
                    break;
          }
     }

     free(code);
}

Name: Anonymous 2009-08-31 1:54

I was bored as fuck so I decided to read /prog/

Name: Anonymous 2009-08-31 2:01

fuck off and die, you dirty tripfag

Name: The Amazing Anus !Anus3nMVO2 2009-08-31 2:05

Please give me feedback if I did anything wrong (eg. that is poor style), or what you would have done that would have been more efficient or readable or whatever.

Name: Anonymous 2009-08-31 2:14

>>1
Your implementation of the loop is kind of pathetic. Instead, you should push the location of the last '[' onto a stack, so you can jump directly to it.

Tomorrow I'll see if I can find the one I wrote for an embedded MIPS emulator a few years ago, and show you how it's done.

Name: Anonymous 2009-08-31 2:15

Now write an ANSI C interpreter in brainfuck.

Name: The Amazing Anus !Anus3nMVO2 2009-08-31 2:18

>>5
Ok, thanks!

Name: Anonymous 2009-08-31 2:19

>>4
Your pointer semantics are counter-intuitive. I'm no fan of K&R, but that's another argument for another day.

Name: Anonymous 2009-08-31 2:33

fucking fail.  Loops are done so horridly I want to shoot myself.  Not only are they ugly as fuck but you can't nest them like in BF.
[ is while(memory[memptr]){
] is }

if you had something like this:
[[[ ]]]
if the first bracket failed you wouldn't just to the first ] you see, you would go to the matching one.

learn2jumpstack

Name: The Amazing Anus !Anus3nMVO2 2009-08-31 3:04

>>9
Ah yes, thanks for pointing that out.  I see I'll need to redesign this thing more than I thought.

Name: Anonymous 2009-08-31 3:48


; Common lisp

(defconstant +bf-memory+ 30000)              
(defparameter *memory* (make-array +bf-memory+      
                                   :initial-element 0     
                                   :element-type '(unsigned byte 8)))
(defun bf (str &optional (index 0)
           (position 0))
  (loop for c across (subseq str index)
     for index from index
     do    
     (case c                               
       (#\+ (incf (aref *memory* position)))
       (#\- (decf (aref *memory* position)))    
       (#\> (when (= (incf position) +bf-memory+)
              (setq position 0)))
       (#\< (if (zerop position)
                (setq position (1- +bf-memory+))
                (decf position)))                       
       (#\. (princ (code-char (aref *memory* position))))          
       (#\, (setf (aref *memory* position) (char-code (read-char))))
       (#\[ (unless (zerop (aref *memory* position))            
              (loop until (zerop (bf str (1+ index) position)))))
       (#\] (return (aref *memory* position))))))


Written just now. Use like (bf "++<>+>-"). Memory is circular.

Name: Anonymous 2009-08-31 6:32

If this was inspired by Xarn's latest blog post, why didn't you just look at the via-C compiler he posted for direction?

Name: Haxus the Brainfucker 2009-08-31 13:58

Haxus the Brainfucker

Name: xarnic dongs 2009-08-31 14:03

xarnic dongs

Name: Anonymous 2009-08-31 16:02

- Codesize and memsize should probably be 65536 instead of 65535. Remember that a size of N means that you can use elements 0 .. (N-1).
- You're relying on integer wraparound at 65536, which may or may not be correct depending on architecture. (Actually, you're relying on integer wraparound at 65535; see above.) You can use uint16_t instead if you need an unsigned integer that is always 16 bits.
- You're not checking the result of the two malloc calls. Shame on you.
- The code reading is wrong. It may read, say, 1024 bytes in the first iteration, stop on a (temporary) read error, notice that it's not yet at EOF, start again, and overwrite these 1024 bytes in the next fread call. Either stop in case of a read error and remove the loop, or handle it properly and keep tabs on the number of bytes successfully read. Also, the -1 here is wrong (the second argument to fread is a size, not a maximum value, so there's no need to subtract 1 from the real size).
- The '[' and ']' handlers are wrong, as has already been pointed out. >>5 is a good solution, but it requires you to keep a more complex state than you're currently using; if you want your implementation to be as simple and stateless as possible, you could use something like the following:


   case '[':
        if (!memory[memptr])
        {
             int bracketdepth = 1;
             while (backetdepth != 0)
             {
                 ++codeptr;
                 if (code[codeptr] == ']') bracketdepth--;
                 if (code[codeptr] == '[') bracketdepth++;
             }
             ++codeptr;
        }
        else
            ++codeptr;
        break;


It IS still a bit ugly though (although that may be more or less the point of anything brainfuck-related ;) )
- A minor stylistic issue: your code becomes somewhat clearer if you increment codeptr at the very end of the main loop, instead of at each branch. This has the downside of making [ and ] somewhat more complicated, so you have to decide for yourself whether it's actually an improvement.
- I'm pretty sure all characters other than +-<>.,[] are well-defined as being comments, so your default: clause is perfectly fine.
- Finally, you don't return a value (in particular, the value 0) at the end of main, something the compiler will probably have warned you about.

Name: Anonymous 2009-08-31 16:27

Remember that nice Brainfuck implementations really only need to guarantee 9999 cells, so unless you're writing it specifically to accommodate some non-conforming Brainfuck code, you don't even have to fuck around with mallocating.

Name: The Amazing Anus !Anus3nMVO2 2009-08-31 16:37

>>15
ooh very good!  I'll fix all of these things today.

Name: Anonymous 2009-08-31 16:58

You can use uint16_t instead if you need an unsigned integer that is always 16 bits.
This doesn't work for me.

brainfuck.c [Warning] data definition has no type or storage class

Apparently uint16_t isn't defined anywhere for my shitty setup.

Name: Anonymous 2009-08-31 17:33

>>18
You need to include <stdint.h> to use these types. For details, see http://linux.die.net/man/3/uint16_t .

Name: The Amazing Anus !Anus3nMVO2 2009-08-31 18:54

>>8
You're right.  I should have named "code" and "memory" with the letters "ptr" at the end, instead of naming the array index like that, shouldn't I?  Please explain anything else I did wrong with pointer semantics.

>>19
Thanks.

Just knocked off a few of those things.  I'll fix the whole nested brackets thing later.

Name: Anonymous 2009-08-31 19:04

>>20
>I should have named "code" and "memory" with the letters "ptr" at the end, instead of naming the array index like that, shouldn't I?

I (>>15) would suggest keeping "code" and "memory" as they are, and replacing "codeptr" and "memoryptr" by "codeindex" and "memoryindex", respectively. The ptr variables are wrong, because they aren't pointers, whereas "code" and "memory" describe their functions perfectly.

Name: Anonymous 2009-08-31 23:52

>>20
I mean the way you've declared your pointers.

unsigned char* bob, jill;

This is counter-intuitive, because it creates one unsigned char and one pointer to an unsigned char. jill is a regular unsigned char. What I mean is you should be defining your pointers as such:

unsigned char *bob, *jill;

Name: Anonymous 2009-09-01 7:36

>>22
Of course, the simple solution to this problem is to never declare multiple variables in a single declaration, which is often considered bad style anyway.

Name: Anonymous 2009-09-01 7:56

>>23
True, but after I switched to >>22's way of things, I stopped making that mistake altogether.

Name: Anonymous 2009-09-01 8:36

>>23
That still doesn't cover the usage semantics. You dereference by using unsigned char bobval = *bob; and then you have a straight-up variable that's copied from the data that bob points to.

Pointers are not proper types, they are variable modifiers. They are used on a per-variable basis, and as such, should always be hugging the variable they modify semantically.

Name: Anonymous 2009-09-01 8:45

>>25
What load of bollocks.
Pointers are not proper types,
In C, there are complete and incomplete types. Pointers belong to the former.
[pointers] should always be hugging the variable they modify semantically.
Pointers might 'hug' (what fucked up term is that? where did you find the relation between point and hug?), the address of something that is not an object. Consider the pointer to the incomplete type int[]: int (*foo)[];

Name: Anonymous 2009-09-01 9:46

>>26
Not bollocks. Pointers are not proper types. You cannot define a purely pointer variable. There must be some type to point to, if nothing else, void *. They might be complete or incomplete, it doesn't really matter. That's irrelevant to what I was saying.

There is no relationship between point and hug. There is a spacial relationship between the operator and the operand which constitutes hugging.

Name: Anonymous 2009-09-01 14:53

>>22
unsigned char* bob, jill;
Where the fuck are you looking?
>>1
unsigned char* code, *memory;

Name: The Amazing Anus !Anus3nMVO2 2009-09-02 19:37

Ok, finally got around to fixing it.

/* brainfuck.c ... version 0.91
 *
 * usage: brainfuck <file>
 *
 */

#include <stdio.h>
#include <stdint.h>

/* 64kb each
 *
 * The Brainfuck spec says that memory should be 30000 bytes, but I prefer to
 * use a whole 64kb for both code and memory.
 *
 */
#define BRAINF_CODESIZE   65536
#define BRAINF_MEMSIZE   65536


FILE* fp;

uint8_t* code, *memory;
uint16_t codeindex, memindex;
int16_t bracketdepth;


int main(int argc, char* argv[])
{
     if (argc != 2)
     {
          printf("usage: brainfuck <file>\n");
          exit(1);
     }

     if ((fp = fopen(argv[1], "r")) == NULL)
          exit(1);

     if ((code = malloc(BRAINF_CODESIZE)) == NULL)
          exit(1);

     if ((memory = malloc(BRAINF_MEMSIZE)) == NULL)
          exit(1);

     memset(code, 0, BRAINF_CODESIZE);
     memset(memory, 0, BRAINF_MEMSIZE);

     codeindex = 0;
     memindex = 0;

     fread(code, BRAINF_CODESIZE, 1, fp);

     if (ferror(fp))
          exit(1);

     bracketdepth = 0;


     /*** Interpreting begins.  Ends on NULL in codebyte. ***/
     while (code[codeindex])
     {
          switch (code[codeindex])
          {
               case '>':
                    ++memindex;
                    ++codeindex;
                    break;

               case '<':
                    --memindex;
                    ++codeindex;
                    break;

               case '+':
                    ++memory[memindex];
                    ++codeindex;
                    break;

               case '-':
                    --memory[memindex];
                    ++codeindex;
                    break;

               case '.':
                    putchar(memory[memindex]);
                    ++codeindex;
                    break;

               case ',':
                    memory[memindex] = getchar();
                    ++codeindex;
                    break;


               case '[':
                    if (!memory[memindex])
                    {
                         ++bracketdepth;
                         while (bracketdepth)
                         {
                              if (!code[++codeindex])  /* Exit if NULL */
                                   exit(1);

                              if (code[codeindex] == '[')
                                   ++bracketdepth;

                              if (code[codeindex] == ']')
                                   --bracketdepth;
                         }
                    }
                    else
                         ++codeindex;
                    break;


               case ']':
                    if (memory[memindex])
                    {
                         --bracketdepth;
                         while (bracketdepth)
                         {
                              if (codeindex-- == 0)  /* Exit if need to go */
                                   exit(1);          /* past first codebyte */

                              if (code[codeindex] == '[')
                                   ++bracketdepth;

                              if (code[codeindex] == ']')
                                   --bracketdepth;
                         }
                    }
                    else
                         ++codeindex;
                    break;


               /* non-instruction character */
               default:
                    ++codeindex;
                    break;
          }
     }

     free(code);

     return 0;
}


Again, tell me if I did anything wrong (eg. that is poor style), or what I could have done better, like more efficiently or would be more readable.


And here's a little program I found that will print out the Fibonacci numbers:

>++++++++++>+>+[
    [+++++[>++++++++<-]>.<++++++[>--------<-]+<<<]>.>>[
        [-]<[>+<-]>>[<<+>+>-]<[>+<-[>+<-[>+<-[>+<-[>+<-[>+<-
            [>+<-[>+<-[>+<-[>[-]>+>+<<<-[>+<-]]]]]]]]]]]+>>>
    ]<<<
]


That code doesn't end itself so you'll have to terminate the process yourself if you run it.

Name: Anonymous 2009-09-02 19:41

>>11
That's a pretty clean implementation. Good Job!

Name: Anonymous 2009-09-02 22:40

>>28
You are a dense person.

Name: Anonymous 2009-09-02 23:13

>>31
i am actually quite sparse

Name: Anonymous 2009-09-02 23:52

>>28
unsigned char* is a problem semantically. I used an example to illustrate why this is a problem. Since you had difficulty understanding this, I must conclude that IHBT.

>>29
Global variables are considered poor form in many cases, just a note. It's fine for this particular program for a number of reasons, but that may not always be the case.

You could also abstract the logic into separate function(s) so as to implement some OOP PRINCIPLES, though that's largely superfluous and stupid in this case.

Having different exit codes to indicate the exit point is generally a good idea.

Another idea if you're interested would be checking the validity of the Brainfuck program first and issue warnings of some sort if it lacks validity. That's a bit harder than simply executing the program, but as far as lexical processing, it's hard to get any easier than that.

Name: Anonymous 2009-09-03 3:07

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

int main(int argc, char **argv)
{
    if (!(argc == 2 || argc == 3))
    {
    fprintf(stderr, "Usage: ./brainfuck source [memory]\n");
    exit(1);
    }

    FILE *inFP;

    if ((inFP = fopen(argv[1], "r")) == NULL)
    {
    fprintf(stderr, "Cannot open source file\n");
    exit(1);
    }

    char *sigma = "+-<>.,[]";
    int lb = 0, rb = 0, count = 0, i;
    char temp;

    while ((temp = fgetc(inFP)) != EOF)
    {
    for (i = 0; i < strlen(sigma); i++)
        if (sigma[i] == temp)
        count++;
    if (temp == '[')
        lb++;
    if (temp == ']')
    {
        rb++;
        if (lb - rb < 0)
        {
        fprintf(stderr, "Syntax error: bracket underflow\n");
        exit(1);
        }
    }
    }
    if (lb - rb > 0)
    {
    fprintf(stderr, "Syntax error: bracket overflow\n");
    exit(1);
    }

    rewind(inFP);

    char *source = (char *)malloc(sizeof(char) * count);
    int pos = 0;
    while ((temp = fgetc(inFP)) != EOF)
    for (i = 0; i < strlen(sigma); i++)
        if (sigma[i] == temp)
        source[pos++] = temp;

    int msize = 30000;
    if (argc == 3)
    msize = atoi(argv[2]);
    char *memory = (char *)malloc(sizeof(int) * msize);
    for (i = 0; i < msize; i++)
    memory[i] = 0;
    char *mptr = memory;
    char *sptr = source;
    char **stack = (char **)malloc(sizeof(char *) * lb);
    int stack_size = 0;
    while (sptr < source + count)
    {
    temp = *sptr;
    if (temp == '+') (*mptr)++;
    if (temp == '-') (*mptr)--;
    if (temp == '>') mptr++;
    if (temp == '<') mptr--;
    if (temp == ',') *mptr = fgetc(stdin);
    if (temp == '.') fputc(*mptr, stdout);
    if (temp == '[')
    {
        if (*mptr > 0)
        stack[stack_size++] = sptr;
        else
        while (*sptr != ']')
            sptr++;
    }
    if (temp == ']')
    {
        if (*mptr > 0)
        sptr = stack[stack_size-1];
        else
        --stack_size;
    }
    sptr++;
    }
   
    free(stack);
    free(memory);
    free(source);
}

Name: The Amazing Anus !Anus3nMVO2 2009-09-03 3:32

>>34
Congrats anon, your code is faster than both mine and some random interpreter I found online after coding mine.

$ time bf bench.bf
real    0.709 secs

$ time qdbf bench.bf
real    0.220 secs

$time bfanon bench.bf
real    0.190 secs


bf is my own interpreter.  qdbf is some shit I found online.  bfanon is what I called yours >>34.

Name: The Amazing Anus !Anus3nMVO2 2009-09-03 4:02

>>35
So as you can see, my interpreter sucks ass.

HOWEVER, when purely performing +-<> operations, my bf and the qdbf can crank out about 60,000 instructions in ~0.03 seconds.  Yours (bfanon) takes 0.15 seconds to do the exact same thing.

That makes >>34-chan's interpreter 5 times slower at executing instructions than the other 2.  Which is funny because mine takes 0.709 seconds to do the little benchmark thingy I wrote, whereas anon's does it fastest with qdbf close behind.  I guess this just means that my bracket code really sucks.

Name: Anonymous 2009-09-03 6:44

>>29
All your global variables are in fact local to the main function. This is pretty pedantic in this specific case, since main is the only function, but it's a bad habit to develop - avoid global variables unless you really, really can't.

The bracketdepth variable doesn't even need to be declared in the whole main function - you only need it in the [ and ] branches, without saving its state between two executions of [ or ]. In general, try to have a variable declared in the smallest possible scope; that way, I don't have to read the code in detail to find out whether the variable is used outside the relevant blocks. This greatly enhances readability and maintainability.

Oh, and you forgot to free(memory) at the end.


>>36
So as you can see, my interpreter sucks ass.
It's slowest, but that doesn't mean it sucks ass. For instance, it's also the simplest and easiest to read interpreter. Running speed is not the main goal of an interpreter, after all. If you want raw speed, write a compiler instead.

Name: Anonymous 2009-09-03 9:05

Let's see some benchmarks.

#include <stdio.h>
#include <stdlib.h>
#define CODESIZE 50000
int main(int argc, char** argv) {
    FILE *f = fopen(*++argv, "r");
    char c, d, code[CODESIZE];
    int syms[256], *j, count = 0, pos = 0, l;
    for(j = syms+256; j > syms; *--j = 0);
    syms['>'] = 1;
    syms['<'] = 1;
    syms['+'] = 1;
    syms['-'] = 1;
    syms['.'] = 1;
    syms[','] = 1;
    syms['['] = 1;
    syms[']'] = 1;
    for(;;) {
        while(count < pos) {
            for(;;) {
                c = fgetc(f);
                if(c == EOF) exit(1); /* Unexpected EOF */
                if(syms[c]) break;
            }
            *(code+count++) = c;
        }
        if(count > pos) c = *(code+pos++);
        else {
            for(;;) {
                c = fgetc(f);
                if(c == EOF) exit(0); /* End of program */
                if(syms[c]) break;
            }
            *(code+count++) = c;
            ++pos;
        }
        switch(c) {
            case '+': ++*j; break;
            case '-': --*j; break;
            case '.': fputc(*j, stdout); break;
            case '<': --j; break;
            case '>': ++j; break;
            case ',': *j = fgetc(stdin); break;
            case '[':
                if(!*j) {
                    l = 1;
                    for(;;) {
                        if(count > pos) d = *(code+pos++);
                        else {
                            for(;;) {
                                d = fgetc(f);
                                if(d == EOF) exit(1); /* Unexpected EOF */
                                if(syms[d]) break;
                            }
                            *(code+count++) = d;
                            ++pos;
                        }
                        if(d == '[') ++l;
                        else if(d == ']') {
                            if(--l < 0) exit(2); /* Unmatched open bracket */
                            else if(l == 0) break;
                        }
                    }
                }
                break;
            case ']':
                if(*j) {
                    l = 1;
                    --pos;
                    for(;;) {
                        if(pos <= 0) exit(3); /* Unmatched close bracket */
                        d = *(code+--pos);
                        if(d == ']') ++l;
                        else if(d == '[') {
                            if(--l < 0) exit(3); /* Unmatched close bracket */
                            else if(l == 0) break;
                        }
                    }
                }
                break;
            default: break;
        }
    }
}

Name: Anonymous 2009-09-03 9:11

>>38
What's this? Is this qdbf, or your attempt to make a fast implementation?

Name: Anonymous 2009-09-03 9:26

>>39
Neither. It was just a learning exercise because I wasn't familiar with the semantics of brainfuck before now.

Newer Posts
Don't change these.
Name: Email:
Entire Thread Thread List