Julius 4.2
|
00001 00035 /* 00036 * Copyright (c) 1991-2011 Kawahara Lab., Kyoto University 00037 * Copyright (c) 2000-2005 Shikano Lab., Nara Institute of Science and Technology 00038 * Copyright (c) 2005-2011 Julius project team, Nagoya Institute of Technology 00039 * All rights reserved 00040 */ 00041 00042 #include "app.h" 00043 00044 #ifdef CHARACTER_CONVERSION 00045 00046 #ifdef USE_WIN32_MULTIBYTE 00047 00048 #include <windows.h> 00049 #include <winnls.h> 00050 #include "libjcode/jlib.h" 00051 00052 static boolean euctosjis = FALSE; 00053 static boolean only_euc_conv = FALSE; 00054 00055 static unsigned int from_cp; 00056 static unsigned int to_cp; 00057 00067 boolean 00068 charconv_win32_setup(char *fromcode, char *tocode, boolean *enable_conv) 00069 { 00070 unsigned int src_p, dst_p; 00071 00072 if (tocode == NULL) { 00073 /* just disable conversion */ 00074 *enable_conv = FALSE; 00075 } else { 00076 /* determine source character set */ 00077 if (fromcode == NULL) { 00078 jlog("Error: charconv_win32: charset names of both input and output should be given.\n"); 00079 jlog("Error: charconv_win32: use \"-charconv from to\" instead of \"-kanji\".\n"); 00080 *enable_conv = FALSE; 00081 return FALSE; 00082 } 00083 euctosjis = FALSE; 00084 if (strmatch(fromcode, "euc-jp") 00085 || strmatch(fromcode, "euc") 00086 || strmatch(fromcode, "eucjp")) { 00087 /* pre-convert Japanese euc to Shift-jis */ 00088 euctosjis = TRUE; 00089 /* input = Shift_jis (codepage 932) */ 00090 from_cp = 932; 00091 } else if (strmatch(fromcode, "ansi")) { 00092 /* ANSI codepage (MBCS) ex. shift-jis in Windows XP Japanese edition.*/ 00093 from_cp = CP_ACP; 00094 } else if (strmatch(fromcode, "mac")) { 00095 /* Macintosh codepage */ 00096 from_cp = CP_MACCP; 00097 } else if (strmatch(fromcode, "oem")) { 00098 /* OEM localized default codepage */ 00099 from_cp = CP_OEMCP; 00100 } else if (strmatch(fromcode, "utf-7")) { 00101 /* UTF-7 codepage */ 00102 from_cp = CP_UTF7; 00103 } else if (strmatch(fromcode, "utf-8")) { 00104 /* UTF-8 codepage */ 00105 from_cp = CP_UTF8; 00106 } else if (strmatch(fromcode, "sjis") 00107 || strmatch(fromcode, "sjis-win") 00108 || strmatch(fromcode, "shift-jis") 00109 || strmatch(fromcode, "shift_jis")) { 00110 /* sjis codepage = 932 */ 00111 from_cp = 932; 00112 } else if (fromcode[0] >= '0' && fromcode[0] <= '9') { 00113 /* codepage number */ 00114 from_cp = atoi(fromcode); 00115 if (! IsValidCodePage(from_cp)) { 00116 jlog("Error: charconv_win32: codepage #%d not found\n", from_cp); 00117 *enable_conv = FALSE; 00118 return FALSE; 00119 } 00120 } else { 00121 jlog("Error: charconv_win32: unknown source codepage \"%s\"\n", fromcode); 00122 jlog("Error: charconv_win32: valids are \"ansi\", \"mac\", \"oem\", \"utf-7\", \"utf-8\" and codepage number\n"); 00123 jlog("Error: charconv_win32: the default local charcode can be speicified by \"ansi\".\n"); 00124 *enable_conv = FALSE; 00125 return FALSE; 00126 } 00127 /* determine the target character set */ 00128 if (strmatch(tocode, "ansi")) { 00129 /* ANSI codepage (MBCS) ex. shift-jis in Windows XP Japanese edition.*/ 00130 to_cp = CP_ACP; 00131 } else if (strmatch(tocode, "mac")) { 00132 /* Macintosh codepage */ 00133 to_cp = CP_MACCP; 00134 } else if (strmatch(tocode, "oem")) { 00135 /* OEM codepage */ 00136 to_cp = CP_OEMCP; 00137 } else if (strmatch(tocode, "utf-7")) { 00138 /* UTF-7 codepage */ 00139 to_cp = CP_UTF7; 00140 } else if (strmatch(tocode, "utf-8")) { 00141 /* UTF-8 codepage */ 00142 to_cp = CP_UTF8; 00143 } else if (strmatch(tocode, "sjis") 00144 || strmatch(tocode, "sjis-win") 00145 || strmatch(tocode, "shift-jis") 00146 || strmatch(tocode, "shift_jis")) { 00147 /* sjis codepage = 932 */ 00148 to_cp = 932; 00149 } else if (tocode[0] >= '0' && tocode[0] <= '9') { 00150 /* codepage number */ 00151 to_cp = atoi(tocode); 00152 if (! IsValidCodePage(to_cp)) { 00153 jlog("Error: charconv_win32: codepage #%d not found\n", to_cp); 00154 *enable_conv = FALSE; 00155 return FALSE; 00156 } 00157 } else { 00158 jlog("Error: charconv_win32: unknown target codepage \"%s\"\n", tocode); 00159 jlog("Error: charconv_win32: valids are \"ansi\", \"mac\", \"oem\", \"utf-7\", \"utf-8\" and codepage number\n"); 00160 jlog("Error: charconv_win32: the default local charcode can be speicified by \"ansi\".\n"); 00161 *enable_conv = FALSE; 00162 return FALSE; 00163 } 00164 00165 /* check whether the actual conversion is needed */ 00166 src_p = from_cp; 00167 dst_p = to_cp; 00168 if (src_p == CP_ACP) src_p = GetACP(); 00169 if (dst_p == CP_ACP) dst_p = GetACP(); 00170 if (src_p == CP_OEMCP) src_p = GetOEMCP(); 00171 if (dst_p == CP_OEMCP) dst_p = GetOEMCP(); 00172 00173 if (src_p == dst_p) { 00174 if (euctosjis == FALSE) { 00175 only_euc_conv = FALSE; 00176 *enable_conv = FALSE; 00177 } else { 00178 only_euc_conv = TRUE; 00179 *enable_conv = TRUE; 00180 } 00181 } else { 00182 only_euc_conv = FALSE; 00183 *enable_conv = TRUE; 00184 } 00185 } 00186 00187 return TRUE; 00188 } 00189 00190 #define UNICODE_BUFFER_SIZE 4096 ///< Buffer length to use for unicode conversion 00191 static wchar_t unibuf[UNICODE_BUFFER_SIZE]; 00192 00203 char * 00204 charconv_win32(char *instr, char *outstr, int maxoutlen) 00205 { 00206 int unilen, newlen; 00207 char *srcbuf; 00208 00209 srcbuf = instr; 00210 if (euctosjis == TRUE) { 00211 /* euc->sjis conversion */ 00212 //toStringSJIS(instr, outstr, maxoutlen); 00213 EUCtoSJIS(instr, outstr, maxoutlen); 00214 srcbuf = outstr; 00215 if (only_euc_conv) { 00216 return(outstr); 00217 } 00218 } 00219 00220 /* get length of unicode string */ 00221 unilen = MultiByteToWideChar(from_cp, 0, srcbuf, -1, NULL, 0); 00222 if (unilen <= 0) { 00223 jlog("Error: charconv_win32: conversion error?\n"); 00224 return(instr); 00225 } 00226 if (unilen > UNICODE_BUFFER_SIZE) { 00227 jlog("Error: charconv_win32: unicode buffer size exceeded (%d > %d)!\n", unilen, UNICODE_BUFFER_SIZE); 00228 return(instr); 00229 } 00230 /* convert source string to unicode */ 00231 MultiByteToWideChar(from_cp, 0, srcbuf, -1, unibuf, unilen); 00232 /* get length of target string */ 00233 newlen = WideCharToMultiByte(to_cp, 0, unibuf, -1, outstr, 0, NULL, NULL); 00234 if (newlen <= 0) { 00235 jlog("Error: charconv_win32: conversion error?\n"); 00236 return(instr); 00237 } 00238 if (newlen > maxoutlen) { 00239 jlog("Error: charconv_win32: target buffer size exceeded (%d > %d)!\n", newlen, maxoutlen); 00240 return(instr); 00241 } 00242 /* convert unicode to target string */ 00243 WideCharToMultiByte(to_cp, 0, unibuf, -1, outstr, newlen, NULL, NULL); 00244 00245 return(outstr); 00246 } 00247 00248 #endif /* USE_WIN32_MULTIBYTE */ 00249 00250 #endif /* CHARACTER_CONVERSION */