#!/usr/bin/awk -f #- # Copyright (C) 2006-2008 Oliver Fromme # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # Script to generate a binary file from an ASCII description. # This is a simplified and stripped-down awk version of bincodec.c. # It has not been written with efficiency in mind, but it # works fine for small files (< 100K) like fonts and icons. # function err (msg) { print msg > "/dev/stderr" exit 1 } function short_read (result, eof_allowed) { if (result == 0) { if (eof_allowed) exit(0) err("Unexpected end of input, " \ "format spec requires more data!") } err("Error reading from stdin!") } # # Get a numerical value from the format string and return it. # The variable chars_used is set to the number of characters # swallowed from the string. # The value can be any one of the following: # - A decimal value which must NOT begin with "0". # - A hexadecimal value starting with "0x". # - A binary value starting with "0b". # - An octal value starting with "0". # function getvalue (str , c, tmp) { chars_used = 0 while (chars_used < length(str)) { c = substr(str, ++chars_used, 1) if (c == " " || c == "\t" || c == "\n" || c == "\r") continue if (c >= "1" && c <= "9") { tmp = substr(str, chars_used) match(tmp, /[0-9]+/) chars_used += RLENGTH - 1 return tmp + 0 } if (c != "0") err("Unrecognized numerical value (begins with '" c \ "') at format string position " fp "!") c = substr(str, ++chars_used, 1) if (c == "b") { tmp = 0 while (chars_used < length(str)) { c = substr(str, ++chars_used, 1) if (c != "0" || c != "1") { chars_used-- break } tmp = tmp * 2 + c } return tmp } if (c == "x") { tmp = substr(str, chars_used + 1) match(tmp, /[0-9A-Fa-f]+/) chars_used += RLENGTH return tmp + 0 } tmp = 0 for (;;) { if (c < "0" || c > "7") { chars_used-- break } tmp = tmp * 8 + c if (chars_used >= length(str)) break c = substr(str, ++chars_used, 1) } return tmp } err("Unexpected end of format string (expected numerical value)!") } # # The putdata() function is used for binary I/O. # It writes to stdout. The data size is specified # in bytes (at most 8 bytes). # # TODO: Currently only little-endian byte order # is supported (a.k.a. intel byte order). # function putdata (value, size) { if (value < 0) value = (256 ^ size) + value while (size--) { printf "%c", value % 256 value = int(value / 256) } } # # Return the next word from stdin (words are white-space separated). # Skips comments. Updates the global variable 'lineno'. # function getword (eof_allowed , result) { curword++ while (curword > numwords) { if ((result = getline) <= 0) short_read(result, eof_allowed) lineno++ result = $0 if ((c = index(result, COMMENT)) > 0) result = substr(result, 1, c - 1) numwords = split(result, words) curword = 1 } return words[curword] } # # Functions to encode a value read from stdin. # function encode_bin (size, eof_allowed , word, i, c, tmp) { word = getword(eof_allowed) size *= 8 if (length(word) != size) err("Line " lineno ": Expected " size \ " digits for binary value, got " length(word) "!") tmp = 0 for (i = 1; i < size; i += 8) { c = substr(word, i, 8) if (!(c in bin2dec)) err("Line " lineno ": Illegal binary byte '" c "'!") tmp = (tmp * 256) + bin2dec[c] } return tmp } function encode_hex (size, eof_allowed) { word = getword(eof_allowed) if (word ~ /^0[Xx]/) word = substr(word, 3) size *= 2 if (length(word) != size) err("Line " lineno ": Expected " size \ " digits for hex value, got " length(word) "!") tmp = 0 for (i = 1; i <= size; i++) { c = substr(word, i, 1) if (c in hex2dec) tmp = (tmp * 16) + hex2dec[c] else err("Line " lineno ": Character '" c \ "' illegal for hex value!") } return tmp } function encode_udec (size, eof_allowed) { word = getword(eof_allowed) if (word !~ /^[0-9]+$/) err("Line " lineno ": '" word \ "' is not a valid unsigned decimal value!") return word + 0 } function encode_sdec (size, eof_allowed) { word = getword(eof_allowed) if (word !~ /^[-+]?[0-9]+$/) err("Line " lineno ": '" word \ "' is not a valid signed decimal value!") return word + 0 } function encode_oct (size, eof_allowed) { word = getword(eof_allowed) if (length(word) < 1) err("Line " lineno ": Expected octal value, got nothing!") tmp = 0 for (i = 1; i <= length(word); i++) { c = substr(word, i, 1) if (c >= "0" && c <= "7") tmp = (tmp * 8) + hex2dec[c] else err("Line " lineno ": Character '" c \ "' illegal for octal value!") } if (tmp >= 256 ^ size) err("Line " lineno ": Octal value " word \ " too large for " size " bytes!") return tmp } # # This is the usage() function from the C program # from which this awk script was derived. # I'm keeping this as a reference for future improvements. # #void #usage(const char *msg) #{ # if (msg) # warnx(msg); # fprintf(stderr, "Usage: %s [-d] [-b ] [-f ]\n", me); # fprintf(stderr, "-d decode (default is to encode)\n"); # fprintf(stderr, "-b specify characters for binary digits 0 and 1 " # "('%c' and '%c')\n", DEFAULT_CHAR_0, DEFAULT_CHAR_1); # fprintf(stderr, "-f specify format for encoding and decoding " # "(default is \"%s\")\n", DEFAULT_FORMAT); # fprintf(stderr, "Input is read from stdin, then encoded " # "(or decoded if the -d flag is present),\n" # "and finally written to stdout.\n"); # exit(EX_USAGE); #} # # # Begin of the main program. # BEGIN { CHAR_0 = "." CHAR_1 = "x" COMMENT = ";" MAX_LOOP_LEVELS = 42 # Arbitrary. # Opcodes for the compiled format string. OP_LOOP = 1 OP_ENDLOOP = 2 OP_GETBIN = 11 OP_GETHEX = 12 OP_GETUDEC = 13 OP_GETSDEC = 14 OP_GETOCT = 15 OP_ADD = 20 OP_SUB = 21 OP_RSUB = 22 OP_MUL = 23 OP_DIV = 24 OP_MOD = 25 OP_AND = 26 OP_OR = 27 OP_XOR = 28 OP_ASSIGN = 29 lineno = 0 curword = 0 numwords = 0 # # Create lookup tables for hex and binary values. # for (i = 0; i < 16; i++) { hex2dec[sprintf("%x", i)] = i hex2dec[sprintf("%X", i)] = i } mod = 256 while (mod > 1) { bit = mod / 2 for (i = 0; i < 256; i++) dec2bin[i] = dec2bin[i] \ ((i % mod < bit) ? CHAR_0 : CHAR_1) mod = bit } for (i = 0; i < 256; i++) bin2dec[dec2bin[i]] = i if (ARGC != 2 || ARGV[1] ~ /^-/) err("Usage: bincoder.awk ") format = ARGV[1] ARGC = 1 # # Parse and compile the format string. # loop = -1 fp = 1 ip = 0 for (;;) { if ((ch = substr(format, fp++, 1)) == "") if (loop >= 0) err("Missing ')' at end of format string!") else break # # Ignore any whitespace and commas # between storage objects. # if (index(" \t\r\n,", ch)) continue # # Parse first character of storage object format: # '(' - begin loop (followed by loop count) # ')' - end loop # 'B' - Byte # 'W' - Word (2 bytes) # 'L' - Longword (4 bytes) # 'Q' - Quadword (8 bytes) # Loops can be nested. The loop count is either a # decimal value, a variable name (lower-case letter), # or an asterisk ('*') which means that the loop # extends until EOF. At most one asterisk loop is # allowed, which must be top-level (i.e. not nested # within another loop), and it must be the last # object in the format string. A loop count of # zero is permitted, causing the loop to be skipped. # if (ch == "(") { # Begin loop. if (++loop >= MAX_LOOP_LEVELS) err("maximum nested loop levels " \ MAX_LOOP_LEVELS "exceeded at " \ "format string position " (fp-1) "!") ch = substr(format, fp, 1) if (ch == "*") { optype[ip] = 0 fp++ } else if (ch >= "a" && ch <= "z") { optype[ip] = 1 opargs[ip] = ch fp++ } else { optype[ip] = 2 opargs[ip] = getvalue(substr(format, fp)) fp += chars_used } opcode[ip++] = OP_LOOP continue } if (ch == ")") { # End loop. if (loop-- < 0) err("Parentheses are not balanced, " \ "missing '(' for ')' at format string " \ "position " (fp-1) "!") opcode[ip++] = OP_ENDLOOP continue } if (ch == "B") # Byte optype[ip] = 1 else if (ch == "W") # Word (2 bytes) optype[ip] = 2 else if (ch == "L") # Long (4 bytes) optype[ip] = 4 else if (ch == "Q") # Quadword (8 bytes) optype[ip] = 8 else err("Unexpected character '" ch \ "' at format string position " (fp-1) "!") # # Check if the object specifier (Byte, Word etc.) # is followed by an optional base specifier: # 'b' - binary representation # 'x' - sedecimal ("hex") representation # 'u' - unsigned decimal representation # 'i' - signed decimal representation # 'o' - octal representation # If no base specifier is present, the default # is to assume binary representation. # ch = substr(format, fp++, 1) if (ch == "b") opcode[ip++] = OP_GETBIN else if (ch == "x") opcode[ip++] = OP_GETHEX else if (ch == "u") opcode[ip++] = OP_GETUDEC else if (ch == "i") opcode[ip++] = OP_GETSDEC else if (ch == "o") opcode[ip++] = OP_GETOCT else { opcode[ip++] = OP_GETBIN fp-- } # # Finally, an optional arithmetic expression may # be present (including variable assignment). # It consists of a sequence of pairs, # which is always executed from left to right. # can be one of the usual arithmetic operators: # '+' - add # '-' - subtract # '_' - reverse subtract # '*' - multiply by # '/' - divide by # '%' - remainder of division by (i.e. modulo) # '&' - binary "and" with # '|' - binary "or" with # '^' - binary "xor" (exclusive-or) with # All operators work on unsigned 64bit integer values # (even if the current object is smaller than 64bit). # can be a number (decimal, or hexadecimal if # beginning with "0x", or binary if beginning with # "0b", or octal if beginning with "0") or a variable # name (lower-case letter). # # If is '=', it must be followed by a variable # name, to which the current value is assigned. # # For example: "Wx=i+0x18*55=k" reads a word ('W') # using hex representation ('x') -- i.e. 4 digits --, # then assigns it to variable i ("=i"), then adds 0x18 # ("+0x18"), then multiplies it by 55 ("*55"), and # finally assigns the result to variable k ("=k"). # # Note that the syntax currently has no negation or # other unary operators. Negation can be achieved # using a temporary variable, e.g.: "=t-t-t" # or by reverse subtraction from 0: "_0" # for (;;) { ch = substr(format, fp++, 1) if (ch == "+") opcode[ip] = OP_ADD else if (ch == "-") opcode[ip] = OP_SUB else if (ch == "_") opcode[ip] = OP_RSUB else if (ch == "*") opcode[ip] = OP_MUL else if (ch == "/") opcode[ip] = OP_DIV else if (ch == "%") opcode[ip] = OP_MOD #else if (ch == "&") opcode[ip] = OP_AND #else if (ch == "|") opcode[ip] = OP_OR #else if (ch == "^") opcode[ip] = OP_XOR else if (ch == "=") { opcode[ip] = OP_ASSIGN ch = substr(format, fp++, 1) if (ch < "a" || ch > "z") err("Illegal variable '" ch \ "' in assignment (not a " \ "letter) at format string " \ "position " (fp-1) "!") opargs[ip++] = ch continue } else { fp-- break } ch = substr(format, fp, 1) if (ch >= "a" && ch <= "z") { optype[ip] = 1 opargs[ip++] = ch fp++ } else { optype[ip] = 2 opargs[ip++] = getvalue(substr(format, fp)) fp += chars_used } } } progsize = ip # # Execute the compiled format specification. # ip = -1 loop = -1 skip = -1 eof_allowed = 0 for (;;) { ip++ if (ip >= progsize) { getword(1) # Try dummy read. err("Reached end of format spec, but there's " \ "still input left (line number " lineno ")!") } op = opcode[ip] if (op == OP_LOOP) { loop++ if (skip >= 0) continue if (optype[ip] == 0) loopcount[loop] = -1 else if (optype[ip] == 1) loopcount[loop] = vars[opargs[ip]] + 0 else loopcount[loop] = opargs[ip] if (loopcount[loop] == 0) { skip = loop loopcount[loop] = 1 } else { loopstart[loop] = ip if (loopcount[loop] == -1) eof_allowed = 1 } continue } if (op == OP_ENDLOOP) { if (skip >= 0) { if (loop-- == skip) skip = -1 continue } if (loopcount[loop] < 0 || --loopcount[loop]) { ip = loopstart[loop] if (loopcount[loop] == -1) eof_allowed = 1 } else loop-- continue } if (skip >= 0) continue if (op >= OP_GETBIN && op < OP_ADD) { size = optype[ip] if (op == OP_GETBIN) value = encode_bin(size, eof_allowed) else if (op == OP_GETHEX) value = encode_hex(size, eof_allowed) else if (op == OP_GETUDEC) value = encode_udec(size, eof_allowed) else if (op == OP_GETSDEC) value = encode_sdec(size, eof_allowed) else value = encode_oct(size, eof_allowed) putdata(value, size) eof_allowed = 0 continue } eof_allowed = 0 if (op == OP_ASSIGN) { vars[opargs[ip]] = value continue } if (optype[ip] == 1) arg = vars[opargs[ip]] + 0 else arg = opargs[ip] if (op == OP_ADD) value += arg else if (op == OP_SUB) value -= arg else if (op == OP_RSUB) value = arg else if (op == OP_MUL) value *= arg else if (op == OP_DIV) value /= arg else if (op == OP_MOD) value %= arg #else if (op == OP_AND) value &= arg #else if (op == OP_OR) value |= arg #else if (op == OP_XOR) value ^= arg } } # End.