# unlambda v1 interpreter in sed
# 2003-02-19 - Laurent Vogel - http://lvogel/free.fr
# GPL version 2 or later (see http://www.gnu.org/copyleft/gpl.html)
# This is an interpreter for unlambda version 1.
# Unlambda is an esoteric language invented by David Madore in 1999.
# You should really refer to the unlambda page for the complete description,
# of the language, including a tutorial and other things:
# http://www.eleves.ens.fr:8080/home/madore/programs/unlambda/unlambda.html
# but nevertheless here is a sketchy description of unlambda:
#
# The only data type in unlambda are functions. A function, when applied
# to another function, yields a third function, and so on:
# `<f><g> is the notation for the application of <f> to <g>.
# When evaluating `<f><g>, <f> is first evaluated, then <g>
# (unless <f> did evaluate to the builtin d) then <f> is applied to <g>.
# The unlambda builtin functions are:
# i the identity function: `i<a> returns <a>
# k the constant generator function: `k<a> returns a function <f>
# such as `<f><b> returns <a>. In short: ``k<a><b> returns <a>
# s the substitution function: ```s<a><b><c> returns ``<a><c>`<b><c>
# v a shortcut for ```s``s`kskk``s``s`kskk: `v<a> returns v
# .x (for any ASCII char x): `.x<a> outputs x then returns <a>
# r another notation for .<newline>
# d `d<a> does not evaluate <a>, but ``d<a><b> evaluates `<a><b>
# c ("call with current continuation") is difficult to explain:
# `c<a> returns `<a><cont> for a new function <cont> called a
# continuation; if later `<cont><b> is evaluated, then it is as if at
# the time `c<a> was evaluated <b> was returned instead of `<a><cont>.
# An unlambda program consists of one expression. Spaces and comments
# (everything between a sharp sign '#' and the end of line) are allowed
# and ignored.
#
# This version of the interpreter does not implement the unlambda v2
# builtins. There are no unlambda v2 programs small enough anyway.
#
# The following Programs from the CUAN (Comprehensive Unlambda Archive
# Network) run successfully:
# Hello, Square, power2, count, count2, trivial, trivial2, trivial3,
# fibo, Triangle, pattern(*)
# Jean.Marot/Quine, Quine2
# Denis.Auroux/q2, q3, q5
# Panu.Kalliokoski/Quine2
# (*) modified to print stars and newlines.
# Bigger programs were not tested.
# Sed note
# --------
# Some attempt has been made to make this script portable. It is known to
# run on the following sed interpreters:
# GNU sed 3.02 and newer: if the last line of the unlambda program does
# not end with a newline, then no newlines are ever printed. I consider
# this to be a bug in GNU sed.
# HHsed: runs up to seven times faster than GNU sed-4.0.5 on my machine,
# once the following two lines are commented out in sedexec.c:
# if (++jumpcnt >= JUMPLIMIT)
# ABORT2(INFLOOP, lnum);
# You will also need to increase MAXBUF for some programs.
# When parsing the program, the hold buffer contains:
# the line number, then a space, then the program parsed up to now
x
# Initialize the hold buffer
1 s/^/0 /
# Increment line number
s/ /i /
:inc
s/^[^ ]*.i/&0123456789i0_/
s/^i/1/
s/\(.\)i[^_]*\1\(i*.\)[^_]*_/\2/
/^[^ ]*i/ b inc
# Back to the pattern space
x
# Obtain a newline at the beginning of the pattern space. Successfully
# parsed input will go left of the newline. A space is added first for
# buggy sed whose G does not add a newline over an empty pattern space.
s/^/ /
G
s/^\([^\n]*\)\(\n\).*/\2\1/
:parse
# Remove space and TABs
s/\(\n\)[ ]*/\1/
# Remove comments
s/\n#.*//
# Rename '.x' for some special chars x into ',y', y being a letter.
# Special chars, and reason why:
# - ':@();|<>_':!#{} used as special markers in [^x]* sed patterns
# - '`': made special to allow s/`i//g
/\n\.[:@();|<>_`!#{}]/ {
s//&:a@b(c)d;e|f<g>h_i`j!k#l{m}n~/
s/\(\n\)\.\(.\)[^~]*\2\([^~]\)[^~]*~/,\3\1/
}
# .<newline>
s/\n\.$/r/
# .x in the default case
s/\(\n\)\(\..\)/\2\1/
# Plain tokens
s/\(\n\)\([cdikrsv`]*\)/\2\1/
# Illegal input
/\n[^cdikrsv.`# ]/ {
# remove anything before the first offending char
s/.*\n//
# print an error message including the line number
x
s/^\([0-9]*\).*/Syntax error line \1:/p
x
q
}
# Continue while not at the end of this line
/\n./ b parse
# Accumulate to the hold buffer
H
# Go on parsing while the last line is not reached
$ !d
# Get back program from the hold buffer, and remove the line number.
# The hold buffer will now contain pending output. Since some sed
# interpreters have weird behaviour when the hold buffer is empty,
# make sure there is always a space in it.
s/.*/ /
x
s/[^ ]* //
s/\n//g
# Verify that the program contains one, and only one, expression.
# This is done by skipping one expression to the right. After skipping
# error messages are printed if the program is too short or too long,
# else we come back at :valid
s/^/?>/
s/$/|##a/
b skip
:valid
# Initialize the continuation database.
# A continuation is represented in the pattern space as
# |[I];[N];{A};{B}
# [I] is the continuation unique identifier ([I] matches /^aa*$/);
# [N] is the reference counter of the continuation (matches /^a*$/);
# [N] is '' when only one continuation reference exists, and it
# is increased by one 'a' per additional reference. The refcounts
# are updated when copying or deleting parts of the pattern space.
# {A} and {B} are the 'before' and 'after' part of the unlambda
# state at the time c was applied (i.e. the unlambda state was
# {A}`c{expr}{B})
# Continuations are deleted when no longer referenced, but the
# unique identifier remains in the base in the form |[i] ready
# to be re-used to serve as the unique identifier of another
# continuation. For instance, after evaluating `cc
# `cc -> `c<c1> -> `<c1><c2> -> <c2>
# we obtain the following pattern space:
# <%aa%|aa;;;|a|
# where 'aa' is the ID of a continuation referenced once (%aa%)
# and 'a' is a free ID.
# (see also the explanation on continuations below)
# Position the evaluation cursor at the beginning of the unlambda program.
s/^/_/
# Meaning of special chars:
# Evaluation steps
# ----------------
# '_' is the evaluation cursor, at the left of the expression to evaluate.
# 'x>' is the skip right operator (x is any char except @).
# branching to :skip will drop '>x' to the right skipping exactly
# one (balanced) unlambda expression.
# '<' is the move left operator. Branching to :left will move '<' to the
# left until ':' is reached
# ':' is used to block <
#
# They are used as follows: '_' first before the expression to eval.
# during eval it may be necessary to leave a token 'x:' and skip right over
# expressions. We then use 'y>' and branch to :skip which moves '>y'
# farther to the right. '>y' proceeds and ultimately should return with a
# '<' which goes back to some 'z:' yielding 'z:<' which itself eventually:
# - either drops a '_' and branches to :eval
# - or keeps a '<' and branches to :left
# - or goes right by dropping a 't>' and branching to :skip
#
# Because of this, treatment of builtin 'x' will typically involve code
# fragments in different places:
# - handling '_`x' located after :eval
# - handling '>x' located after :skip
# - handling 'x:<' located after :left
#
# At any time there is at most one of '_', '<' and '>' in the pattern space
#
# other special chars
# -------------------
# '(...)' are used to delimit zones to delete (by branching to :delete) or
# to scan after copy to increase the refcount of any continuation
# references found in them (by branching to :incref).
# A '<' must be present somewhere in the data outside '(...)', as
# both :delete and :incref will branch to :left when done.
# '|' is used to delimit continuations in the continuation database.
# ';' is used to delimit fields in the continuation database, and to
# serve as marker for copies during the execution of complex builtins.
# '#' is used to delimit the skip reference free IDs
# '!' is used by :skip internally
# '{}' are used to note 'skip references', a way of caching the execution
# of skip to speed things up (three times faster under HHsed)
#
# Encoding of unlambda expressions
# --------------------------------
# '`' is the apply operator
# cdikrsv are builtins
# '.x' and ',x' are the printing builtins
# 'D<expr>' is the promise `d<expr>
# 'K<expr>' is the result of evaluating `k<expr>
# 'S<expr>' is the result of evaluating `s<expr>
# 'T<e1><e2>' is the result of evaluating ``s<e1><e2>
# '%aaa%' is a continuation reference
# eval/apply
:eval
# `i is a no-op
s/`i//g
# Uncomment the following line when debugging to print the state from
# time to time (the state is not printed after each eval, because
# several evaluations might occur between two jumps to :eval)
#p
# Expressions other than apply evaluate to themselves, with or without
# skip reference
s/_\([^{`]\)/<\1/
s/_\({[^,]*,[^`]\)/<\1/
# kill the skip reference
s/_\(`*\){\([^,]*,\)\(.*\)}\2\([^#]*#\)/_\1\3\4\2/
# <expr> is not evaluated when evaluating `d<expr>
s/_`d/<D/
/</ b left
# ``dAB - go evaluate B with 'e>', leaving '`:' to evaluate `AB
s/_`D/`:e>/
# ``kAB - leave A, go delete B with 'K>'
s/_`K/K>/
# ``sAB - replace S with T, go evaluate B with 'e>'
s/_`S/Te>/
# ```sABC - leave '`:`' behind to form the first ``A of ``AC`BC,
# and go over A activate 'f>' that will transform BC in C`BC
s/_`T/`:`f>/
/>/ b skip
# `k and `s just eval their argument and store it in K<expr> or S<expr>.
# the '<' ultimately resulting from evaluating the argument will move
# through 'K' or 'S', giving 'K<expr>' or 'S<expr>' as return value.
s/_`k/K_/
s/_`s/S_/
# These evaluate their argument, leaving an 'x:' token to do the proper
# job after '<' returns. See at the bottom of the file, after :left,
# what is done then.
s/_``/`:_`/
s/_`\([rc]\)/\1:_/
# .x becomes x.: to distinguish easily z.:< (.z) from z:< (delete)
s/_`\([.,]\)\(.\)/\2\1:_/
# Prepare leaving 'v', evaluate expr, then delete it using 'z:<'
s/_`v/vz:_/
s/_`\(%a*%\)/\1:_/
/_/ b eval
# Skip right 1 expression. Starting with '>@' we move this token to
# the right, adding one '@' per '`' or 'T', going over any number of
# 'S', 'D', 'K', and removing one @ per builtin, until no more @ is left.
# Note: since ',' is always followed by a lowercase letter, any number of
# ',' can also be skipped together with 'S', 'D', 'K' (because one cannot
# encounter ',`', ',T' ...)
# Some caching is implemented: long expressions are surrounded by opening
# and closing "skip references": {ab, ... }ab, . At any time, for each
# skip-ref ID "ab", either there is exactly one opening and one closing
# skip-ref with this ID in the entire pattern space, or there are no
# skip-refs with this ID and this ID is in the free-skip-ref-ID-list.
# skip-refs are mutated when copying (see :incref below) and go to the
# free list when deleted (see :delete)
#
:skip
# remember the start of the skip
s/>/!>@/
:skiploop
# increase depth for each '`' or 'T'
s/>\(>*@@*\)\([SDK]*[`T]\)/\2>>\1@/
# decrease depth for each builtin
s/>\(>*@*\)@\([SDK,]*[a-z]\)/\2>>\1/
s/>\(>*@*\)@\([SDK]*\..\)/\2>>\1/
s/>\(>*@*\)@\([SDK]*%a*%\)/\2>>\1/
# jump over a skip ref
s/>\(>*@*\)@\([SDK]*\){\([^,]*,\)\(.*\)}\3/\2{\3\4}\3>\1/
# if the overall number of skip steps done this time is big, recurse.
# There is a tradeof here between doing the least steps in :skip
# but increasing the number of skiprefs makes it slower to match
# them in pairs, and also increases the size of the pattern space,
# thus making *all* failing pattern matching slower.
# the number of '>' below can be changed. The optimum value probably
# depends on the sed interpreter and the machine.
/>>>>>>>>>>>>>>>>>>>>>*@/ {
s/>>*\(@@*\)/\1!>@/
b skiploop
}
# If we haven't finished skipping one expression at the end of the
# pattern space, either there is a bug in this script, or the program
# does not start with a complete unlambda expression.
/@|/ {
s/.*/Syntax error: unexpected end of file/
q
}
/>>*@@*[^@]/ b skiploop
# end of recurse
/@!/ {
# we need a new skip ref.
/##/ {
:newskref
# copy the next one in place
s/#\([^#][^;]*\)/\1,&/
# generate the next one
s/$/;/
:inca
s/#;/#a/
s/#[^#;]*[^#;];/&abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ;a~/
s/#\([^#;]*\)\([^#;]\);[^~]*\2\(;*[^~]\)[^~]*~/#\1\3/
/#[^;]*;/ b inca
# if coming from incref, go back there
/^</ b mutate
}
# get a skip ref among the free skip ref list and replace the !
s/@\(@*\)!\([^@!>]*\)>>*\([^#]*#\)\([^,]*,\)/{\4\2}\4>\1\3/
b skiploop
}
# end of skip - get the action char from last !
s/\(.\)!\([^>]*\)>*/\2>\1/
# After :skip
# '>e' for eval
s/>e/_/
# Evaluate the expression, then delete it with 'z:<'
s/>K/z:_/
# >f, >g, >h are part of the ```s[A][B][C] evaluation explained below.
s/>g/g:_/
/_/ b eval
# ```s[A][B][C] evaluation
# ------------------------
# _`T[A][B][C] -> `:`f>[A][B][C] -> `:`[A]>f[B][C]
# the remaining task is to evaluate [C] and replace [B][C] by [C]`[B][C].
# >f[B][C] -> f:g>[B][C] -> f:[B]>g[C] -> (eval C) f:[B]g:<[C]
# -> f:[B];h>[C] -> f:[B];[C]>h -> <[C]`[B]([C]) -> (incref) <[C]`[B][C]
# Note that ';' can only be inserted after [C] was evaluated, because
# evaluating [C] can need ';' too.
/>f/ {
s//f:g>/
b skip
}
/>h/ {
s//;/
s/f:\([^;:]*\);\([^;:]*\);/<\2`\1(\2)/
b incref
}
# delete
/>z/ {
s//)</
b delete
}
# Continuation (1) - `c<expr>
# ---------------------------
# `c<expr> first evaluates the expression, then surrounds it with ';'
# _`c[E] -> c:_[E] -> (eval E) c:<[E] -> ;[E]c> -> ;[E];
# At this stage the pattern space is either:
# {A};[E];{B}|...|[I]|...
# if there is a free ID [I], or:
# {A};[E];{B}|[I-1];...
# if all IDs from 1 to I-1 are taken.
# {A} and {B} denote the before and after parts of the unlambda state.
# The unlambda state will then become
# ({A})`:<[E]%[I]%({B})|...
# with a new continuation (added, or replacing the free ID) being:
# |[I];;{A};{B}
# Finally we jump to incref over ({A}) and ({B}) as they have been copied.
#
/>c/ {
s//;/
# try reusing a free ID
/^\([^;]*\);\([^;]*\);\([^|]*\)\(.*\)|\(a*\)|/ {
s//(\1)`:<\2%\5%(\3)\4|\5;;\1;\3|/
b incref
}
# else create our ID as being 1 plus the biggest (leftmost) ID
s/^\([^;]*\);\([^;]*\);\([^|]*\)|\(a*\)/(\1)`:<\2%\4a%(\3)|\4a;;\1;\3|\4/
b incref
}
# Continuation (2) - `<cont><expr>
# --------------------------------
# _`%[I]%[E] -> %[I]%_[E] -> (eval E) %[I]%<[E] -> ;[I];C>[E] -> ;[I];[E];
# At this stage the pattern space is:
# {a};[I];[E];{b}|...|[I];[N];{A};{B}|...
# where {a} and {b} represent parts of the current unlambda state,
# We first erase {a} and {b}, so that the refcount [N] of continuation [I]
# has the right value:
# -> C:<[I];[E]({a}{b})|...|[I];[N];{A};{B}|...
# -> C:<[I];[E]|...|[I];[N];{A};{B}|...
# Now, if '[N]' equals '', we will remove the continuation from the base,
# keeping [I] as a free identifier.
# -> [I];[E]|...|[I];;{A};{B}|...
# -> {A}<[E]{B}|...|[I]|...
# else if '[N]' is '[n]a', the continuation stays, so me must incref over
# the copied {A} and {B}:
# -> [I];[E]|...|[I];[n]a;{A};{B}|...
# -> ({A})<[E]({B})|...|[I];[n];{A};{B}|...
#
/>C/ {
s//;/
# first erase current state
s/\([^;]*\);\(a*;[^;]*\);\([^|]*\)/C:<\2(\1\3)/
b delete
}
# Called before running to check that the program contains exactly one
# expression
/>?/ {
/>?|/ {
s/>?//
b valid
}
s/.*>?//
s/|.*//
# revert ',y' into '.x'
s/,[a-n]/&:a@b(c)d;e|f<g>h_i`j!k#l{m}n~/g
s/,\([a-n]\)[^~]*\([^~]\)\1[^~]*~/.\2/g
# exit with error message
i\
Syntax error: trailing garbage
q
}
b done
# Increment the refcount of any continuation whose reference lays
# between ( ). Called for each copied zone after copy. Remove ( )
# and branch to :left when done.
:incref
s/(\([^{%)]*%\(a*\)%[^%)]*\)\(.*|\2;\)/\1(\3a/
s/(\([^{%)]*\))/\1/
# skip-ref mutation: {ab,...}ab, -> {cd,...}cd,
/([^{%)]*{/ {
# generate a new skip-ref if needed
/##/ {
s/^/</
b newskref
:mutate
s/^<//
}
# replace current skip ref by a new one for the copy
# the [^)]* is essential here, in case the original is still
# on the right from here (needed when copying continuation
# parts as the continuation refcounts are only updated to
# the right).
s/(\([^{%)]*{\)\([^,]*,\)\([^)]*}\)\2\([^#]*#\)\([^,]*,\)/\1\5(\3\5\4/
}
/(/ b incref
b left
# Delete between ( and ) included. If continuation references are found,
# decrement their refcount and mark them for deletion too (but keeping
# the free ID) if it goes below zero; branch to :left when done
:delete
s/([^}%)]*%\(a*\)%[^%)]*\(.*|\1;\)a/(\2/
s/([^}%)]*%\(a*\)%[^%)]*\(.*\)|\1;;\([^|]*\)|/(\2|\1(\3)|/
s/([^}%)]*)//
s/([^}%)]*}\([^,]*,\)\([^#]*#\)/(\2\1/
/(/ b delete
# < moves left over commands.
:left
s/:\([^:]*\)</:<\1/
/:</ !b done
# After left
# Delete one expression:
# z:<[A] -> (z>[A] -> ([A]>z -> ([A])< -> (delete) <
s/z:</(z>/
# Part of ```s[A][B][C] handling, see explanation above.
s/g:</;h>/
# Continuation handling
s/c:</;c>/
s/%\(a*\)%:</;\1;C>/
/>/ b skip
# `: means re-eval ` when next eval is done
/`:</ {
s//_`/
b eval
}
# Output functions all leave a '<'
# Print the hold buffer and empty it.
/r:</ {
s//</
x
s/.//
p
s/.*/ /
x
}
# Append the char to the hold buffer
/\(.\)\.:</ {
s//<\1/
H
s/<./</
x
s/\n[^<]*<\(.\).*/\1/
x
}
# Append the converted char to the hold buffer
/\(.\),:</ {
s//<\1/
H
s/<./</
x
# handle ',y' -> x conversion
s/\(\n\)[^<]*<\(.\).*/\1\2:a@b(c)d;e|f<g>h_i`j!k#l{m}n/
s/\n\(.\).*\(.\)\1.*/\2/
x
}
# Part of `<cont><expr> handling - see explanation above
/C:</ {
# if the reference count does not goes below zero, keep the continuation
# and incref over the duplicated parts.
/C:<\(a*;\)\([^|]*\)\(.*\)|\1a\(a*;\)\([^;]*\);\([^|]*\)/ {
s//(\5)<\2(\6)\3|\1\4\5;\6/
b incref
}
# else remove the continuation but keep the free ID.
s/C:<\(a*\);\([^|]*\)\(.*\)|\1;;\([^;]*\);\([^|]*\)/\4<\2\5\3|\1/
}
/</ b left
:done
# flush output if pending
x
/../ {
# remove the dumb first char
s/^ //
p
}
# quit
d
### colorized by sedsed, a debugger and code formatter for sed scripts
### original script: ftp://quatramaran.ens.fr/pub/madore/unlambda/contrib/unlambda.sed