diff options
-rwxr-xr-x | ansi2unicode.sh | 1 | ||||
-rw-r--r-- | insertbom.c | 81 |
2 files changed, 82 insertions, 0 deletions
diff --git a/ansi2unicode.sh b/ansi2unicode.sh index 246530e..bb8e0fb 100755 --- a/ansi2unicode.sh +++ b/ansi2unicode.sh @@ -17,4 +17,5 @@ for i in $@ ; do TMP="$i.$$" iconv -f $FROM -t $TO "$i" > "$TMP" mv "$TMP" "$i" + insertbom "$i" done diff --git a/insertbom.c b/insertbom.c new file mode 100644 index 0000000..ff5833e --- /dev/null +++ b/insertbom.c @@ -0,0 +1,81 @@ +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <sys/stat.h> + +static off_t Size(const char *name) +{ + struct stat st = {0}; + + stat(name, &st); + + return st.st_size; +} + +static void Handle(const char *name) +{ + FILE *fp = NULL; + off_t len = Size(name); + unsigned char *buff = NULL; + int write = 0; + + if (len < 3) + { + printf("%s: too short\n", name); + return; + } + + buff = malloc(len + 2); + fp = fopen(name, "rb"); + fread(buff, 1, len, fp); + fclose(fp); + + if ((buff[0] == 0xffu && buff[1] == 0xfeu) || + (buff[0] == 0xfeu && buff[1] == 0xffu)) + { + printf("%s: already has BOM\n", name); + free(buff); + return; + } + + if (buff[0] == 0 && isprint(buff[1])) + { + printf("%s: Guessing BE UTF-16\n", name); + write = 1; + memmove(buff, buff + 2, len); + buff[0] = 0xfe; + buff[1] = 0xff; + } + else if (buff[1] == 0 && isprint(buff[0])) + { + printf("%s: Guessing LE UTF-16\n", name); + write = 1; + memmove(buff, buff + 2, len); + buff[0] = 0xff; + buff[1] = 0xfe; + } + else + { + printf("%s: Leaving alone\n", name); + } + + if (write) + { + fp = fopen(name, "wb"); + fwrite(buff, 1, len + 2, fp); + fclose(fp); + } + + free(buff); +} + +int main(int argc, char *argv[]) +{ + int f; + + for(f = 1; f < argc; f++) + { + Handle(argv[f]); + } +} |