#! /bin/sed -f # Remove overstrikes produced by troff, replacing them with # *...* and _..._ which are suitable, for example, for # enclosing man page extracts in source code. # Converting backspaces to degree signs makes it easier to # look at the sed script with less. y//°/ h # --------------------------- first part, underlines /°/ !b justify /_°./ !b bold # Add a _ at the beginning of underlined sequences # \1 is a non-underlined character in the second regexp s/^_°./_&/g s/\([^°].\)\(_°.\)/\1_\2/g # Remove the underlining sequence from all the characters # but the last : rem_ s/_°\(.\)\(_°.\)/\1\2/ t rem_ # Remove the underlining sequence from the last character # too, and add a _ after it. s/_°\(.\)/\1_/g # --------------------------- second part, boldfaces /°/ !b finish : bold # Add a * at the beginning of boldfaced sequences # \1 is a non-boldfaced character in the second regexp s/^\(.\)°\1/*&/g s/\([^°].\)\(.\)°\2/\1*\2°\2/g # Remove the boldfacing sequence from all the characters # but the last : rembold s/\(.\)°\1\(.\)°\2/\1\2°\2/ t rembold # Remove the boldfacing sequence from the last character # too, and add a * after it. s/\(.\)°\1/\1*/g # --------------------------- finishing touches... # *as*(*1*) --> *as(1)* s/\*\([a-zA-Z0-9]*\)\*(\*\([a-zA-Z0-9]*\)\*)/*\1(\2)*/g : finish # _abc_ _def_ ---> _abc def_ # *abc* *def* ---> *abc def* # ^^^ s/\([*_]\)\( \+\)\1/\2/g # Re-align TP paragraphs whose first line would be mis-aligned. /^ [*_]/ { s/\( [*_].[*_] \) /\1/ s/\( [*_]..[*_] \) /\1/ s/\( [*_]...[*_] \) /\1/ s/\( [*_]....[*_] \) /\1/ } # ---------------------------- third part, justification : justify # In hold space, remove the sequences the easy way. If the # line was not 65 characters long, we're done. x s/.°\(.\)/\1/g /^.\{65\}$/ !{ x b } x # We had a 65 characters long line. Re-justify it to # 71 chars per line, to compensate for the *'s and _'s we # introduced # No spaces apart from the left margin? If so, nothing to do. /^ *[^ ]* / !b # If the line is too long, shorten it. This is a rare # case (>3 bold-faces or underlines), so don't go to great # lengths to ensure uniform spacing (like we do below). : reduce /^.\{72\}/ { s/^\( *[^ ].* \) /\1/ t reduce } : widen /^.\{71\}/ !{ # Add a tilde (i.e. another space) before the last group of spaces # in the line. The `^ *[^ ]' in the regexp makes sure that the # left margin is not touched. s/^\( *[^ ].*\)\( \+\)/\1°\2/ t tilde # If not found, restart from the end of the line. y/°/ / b widen # Change the group of spaces to tildes. : tilde s/° /°°/ t tilde b widen } # The line is long enough; we still have to convert tildes back to spaces y/°/ / ### colorized by sedsed, a sed script debugger/indenter/tokenizer/HTMLizer ### original script: http://pcsiwa.rett.polimi.it/~paolob/seders/scripts/untroff.sed