Claudio Cherubino's blog Life of a Googler

12Feb/092

Brainfuck interpreter in F#

Brainfuck is an esoteric programming language whose grammar consists of only eight commands, written as a single character each.

Besides this minimalistic structure, the language is Turing-complete but writing (and reading) Brainfuck code is very hard.

The Brainfuck machine uses a finite-length tape (usually 30000 byte cells long) and two pointers: an instruction pointer and a data pointer.

The former scans the source code and executes one instruction at a time, while the latter is used to increase or decrease the value of the cell that it is pointing.

This structure can be represented by the following F# type, called BFState:

type BFState =
  { mutable program : string ;        // program being interpreted
    mutable memory : int[] ;          // memory
    mutable pc : int ;                // current program counter
    mutable pos : int }               // current pointer position

We define then a function to initialize our Brainfuck machine given the size of the memory tape and the source code, that basically zeroes all memory cells and the two pointers:

let initState memSize code  =
  { program = code;
    memory = Array.zero_create memSize;
    pc = 0;
    pos = 0; }

We reach the end of the program when the program counter steps beyond the end of the code (i.e. the length of the string containing the source):

let isEnd (state : BFState) =
  state.pc >= state.program.Length

The following two functions are used to move the program counter one step towards the end or the beginning of the source code:

let nextCommand (state : BFState) =
  { program = state.program ; memory = state.memory ; pc = state.pc + 1 ; pos = state.pos }

let previousCommand (state : BFState) =
  { program = state.program ; memory = state.memory ; pc = state.pc - 1 ; pos = state.pos }

getMem and setMem are two auxiliary functions that can be used to retrieve or set the value of the memory cell pointed by the data pointer:

let getMem (state : BFState) =
  state.memory.[state.pos]

let setMem (state : BFState) value =
  state.memory.[state.pos] <- value
  state.memory

Similarly, we have two functions to read and write from the console, which is the behavior of the ',' and '.' commands respectively:

let outputByte (state : BFState) =
  Console.Write (Convert.ToChar(state.memory.[state.pos]))
  nextCommand state

let readByte (state : BFState) =
  state.memory.[state.pos] <- Console.Read()
  nextCommand state

When the command to perform is '[', if the byte at the data pointer is zero we have to jump to the first matching ']' character following the current position:

let rec moveToForwardMatch (state : BFState) =
  match state.program.[state.pc] with
  | ']' -> (nextCommand state)
  | _ -> moveToForwardMatch (nextCommand state)

The complementary command is ']', that goes back to the matching '[' character when the byte at the data pointer is non-zero:

let rec moveToPreviousMatch (state : BFState) =
  match state.program.[state.pc] with
  | '[' -> (nextCommand state)
  | _ -> moveToPreviousMatch (previousCommand state)

We have now all the tools required to parse Brainfuck code, so we only need to write the logic to select the action to perform according to the command analyzed in a single step.

Each command takes a BFState and returns a new BFState. Any character different from the eight allowed will be simply ignored:

let step (state : BFState) =
  match state.program.[state.pc] with
  | '+' -> { program = state.program ; memory = setMem state (getMem state + 1) ; pc = state.pc + 1 ; pos = state.pos }
  | '-' -> { program = state.program ; memory = setMem state (getMem state - 1) ; pc = state.pc + 1 ; pos = state.pos }
  | '<' -> { program = state.program ; memory = state.memory ; pc = state.pc + 1 ; pos = state.pos - 1}
  | '>' -> { program = state.program ; memory = state.memory ; pc = state.pc + 1 ; pos = state.pos + 1}
  | '[' -> if (state.memory.[state.pos] = 0) then moveToForwardMatch state else nextCommand state
  | ']' -> if (state.memory.[state.pos] <> 0) then moveToPreviousMatch state else nextCommand state
  | '.' -> outputByte state
  | ',' -> readByte state
  | _ -> nextCommand state // ignore any non-command character

The parser, which could be the only publicly available function, will take care of initializing the state and recursively step through the code, one instruction at a time, until the end of the source:

let parse memSize code =
  let rec run (state : BFState) =
    if (not (isEnd state)) then
       run (step state)
  initState memSize code |> run

You can find a repository of Brainfuck sample programs at this site: http://esoteric.sange.fi/brainfuck/bf-source/prog/, the following code contains the classic Hello World! and how to parse it using the interpreter we just wrote:

let code = ">+++++++++[<++++++++>-]<.>+++++++[<++++>-]<+.+++++++..+++.>>>++++++++[<++++>-]<.>>>++++++++++[<+++++++++>-]<---.<<<<.+++.------.--------.>>+."

parse 30000 code

The whole Brainfuck interpreter code can be found just below, feel free to take it and dissect it:

#light
open System

type BFState =
  { mutable program : string ;        // program being interpreted
    mutable memory : int[] ;          // memory
    mutable pc : int ;                // current program counter
    mutable pos : int }               // current pointer position

let initState memSize code  =
  { program = code;
    memory = Array.zero_create memSize;
    pc = 0;
    pos = 0; }

let isEnd (state : BFState) =
  state.pc >= state.program.Length

let nextCommand (state : BFState) =
  { program = state.program ; memory = state.memory ; pc = state.pc + 1 ; pos = state.pos }

let previousCommand (state : BFState) =
  { program = state.program ; memory = state.memory ; pc = state.pc - 1 ; pos = state.pos }

let getMem (state : BFState) =
  state.memory.[state.pos]

let setMem (state : BFState) value =
  state.memory.[state.pos] <- value
  state.memory

let rec moveToForwardMatch (state : BFState) =
  match state.program.[state.pc] with
  | ']' -> (nextCommand state)
  | _ -> moveToForwardMatch (nextCommand state)

let rec moveToPreviousMatch (state : BFState) =
  match state.program.[state.pc] with
  | '[' -> (nextCommand state)
  | _ -> moveToPreviousMatch (previousCommand state)

let outputByte (state : BFState) =
  Console.Write (Convert.ToChar(state.memory.[state.pos]))
  nextCommand state

let readByte (state : BFState) =
  state.memory.[state.pos] <- Console.Read()
  nextCommand state

let step (state : BFState) =
  match state.program.[state.pc] with
  | '+' -> { program = state.program ; memory = setMem state (getMem state + 1) ; pc = state.pc + 1 ; pos = state.pos }
  | '-' -> { program = state.program ; memory = setMem state (getMem state - 1) ; pc = state.pc + 1 ; pos = state.pos }
  | '<' -> { program = state.program ; memory = state.memory ; pc = state.pc + 1 ; pos = state.pos - 1}
  | '>' -> { program = state.program ; memory = state.memory ; pc = state.pc + 1 ; pos = state.pos + 1}
  | '[' -> if (state.memory.[state.pos] = 0) then moveToForwardMatch state else nextCommand state
  | ']' -> if (state.memory.[state.pos] <> 0) then moveToPreviousMatch state else nextCommand state
  | '.' -> outputByte state
  | ',' -> readByte state
  | _ -> nextCommand state // ignore any non-command character

let parse memSize code =
  let rec run (state : BFState) =
    if (not (isEnd state)) then
       run (step state)
  initState memSize code |> run
Comments (2) Trackbacks (0)
  1. Excellent article!
    Now show us how to do the opposite :)

  2. It is straightforward, just read the code bottom-up! :)


Leave a comment

(required)

No trackbacks yet.