|
| 1 | +--Converts ANSI file to UTF-8 |
| 2 | +--Usage: luajit ansitoutf8.lua inputfile.iss [encoding] > outputfile.iss |
| 3 | +--Default encoding is 1252 |
| 4 | + |
| 5 | +local ffi = require("ffi") |
| 6 | +ffi.cdef[[ |
| 7 | + int MultiByteToWideChar(unsigned int CodePage, unsigned int dwFlags, const char* lpMultiByteStr, int cbMultiByte, wchar_t* lpWideCharStr, int cchWideChar); |
| 8 | + int WideCharToMultiByte(unsigned int CodePage, unsigned int dwFlags, wchar_t* lpWideCharStr, int cchWideChar, char* lpMultiByteStr, int cbMultiByte, char* lpDefaultChar, int* lpUsedDefaultChar); |
| 9 | +]] |
| 10 | + |
| 11 | +CP_UTF8 = 65001 |
| 12 | + |
| 13 | +function ansitoutf8(str, codepage) |
| 14 | + local widestr = ffi.new("wchar_t[?]", 1024) |
| 15 | + local utf8str = ffi.new("char[?]", 1024) |
| 16 | + local useddc = ffi.new("int[?]", 1) |
| 17 | + |
| 18 | + ffi.C.MultiByteToWideChar(codepage, 0, str, #str, widestr, 1024) |
| 19 | + ffi.C.WideCharToMultiByte(CP_UTF8, 0, widestr, -1, utf8str, 1024, nil, useddc) |
| 20 | + |
| 21 | + return ffi.string(utf8str) |
| 22 | +end |
| 23 | + |
| 24 | +function removeBOM(s) |
| 25 | + if s:sub(1, 3) == string.char(0xEF, 0xBB, 0xBF) then |
| 26 | + return s:sub(4) |
| 27 | + else |
| 28 | + return s |
| 29 | + end |
| 30 | +end |
| 31 | + |
| 32 | +args = {...} |
| 33 | +filename = args[1] |
| 34 | +encoding = tonumber(args[2]) or 1252 |
| 35 | + |
| 36 | +if filename == nil then |
| 37 | + print "Usage: luajit ansitoutf8.lua filename [encoding]" |
| 38 | + os.exit() |
| 39 | +end |
| 40 | + |
| 41 | +f = io.open(filename, "r") |
| 42 | + |
| 43 | +for l in f:lines() do |
| 44 | + io.write(ansitoutf8(removeBOM(l), encoding), "\n") |
| 45 | +end |
0 commit comments