/* HomeBank -- Free, easy, personal accounting for everyone. * Copyright (C) 1995-2014 Maxime DOYEN * * This file is part of HomeBank. * * HomeBank is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * HomeBank is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include "homebank.h" #include "hb-encoding.h" #define MYDEBUG 0 #if MYDEBUG #define DB(x) (x); #else #define DB(x); #endif /* our global datas */ extern struct HomeBank *GLOBALS; /* = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = */ /* * The original versions of the following tables are taken from profterm * * Copyright (C) 2002 Red Hat, Inc. */ static const GeditEncoding utf8_encoding = { GEDIT_ENCODING_UTF_8, "UTF-8", "Unicode" }; static const GeditEncoding encodings [] = { { GEDIT_ENCODING_ISO_8859_1, "ISO-8859-1", "Western" }, { GEDIT_ENCODING_ISO_8859_2, "ISO-8859-2", "Central European" }, { GEDIT_ENCODING_ISO_8859_3, "ISO-8859-3", "South European" }, { GEDIT_ENCODING_ISO_8859_4, "ISO-8859-4", "Baltic" }, { GEDIT_ENCODING_ISO_8859_5, "ISO-8859-5", "Cyrillic" }, { GEDIT_ENCODING_ISO_8859_6, "ISO-8859-6", "Arabic" }, { GEDIT_ENCODING_ISO_8859_7, "ISO-8859-7", "Greek" }, { GEDIT_ENCODING_ISO_8859_8, "ISO-8859-8", "Hebrew Visual" }, { GEDIT_ENCODING_ISO_8859_8_I, "ISO-8859-8-I", "Hebrew" }, { GEDIT_ENCODING_ISO_8859_9, "ISO-8859-9", "Turkish" }, { GEDIT_ENCODING_ISO_8859_10, "ISO-8859-10", "Nordic" }, { GEDIT_ENCODING_ISO_8859_13, "ISO-8859-13", "Baltic" }, { GEDIT_ENCODING_ISO_8859_14, "ISO-8859-14", "Celtic" }, { GEDIT_ENCODING_ISO_8859_15, "ISO-8859-15", "Western" }, { GEDIT_ENCODING_ISO_8859_16, "ISO-8859-16", "Romanian" }, { GEDIT_ENCODING_UTF_7, "UTF-7", "Unicode" }, { GEDIT_ENCODING_UTF_16, "UTF-16", "Unicode" }, { GEDIT_ENCODING_UTF_16_BE, "UTF-16BE", "Unicode" }, { GEDIT_ENCODING_UTF_16_LE, "UTF-16LE", "Unicode" }, { GEDIT_ENCODING_UTF_32, "UTF-32", "Unicode" }, { GEDIT_ENCODING_UCS_2, "UCS-2", "Unicode" }, { GEDIT_ENCODING_UCS_4, "UCS-4", "Unicode" }, { GEDIT_ENCODING_ARMSCII_8, "ARMSCII-8", "Armenian" }, { GEDIT_ENCODING_BIG5, "BIG5", "Chinese Traditional" }, { GEDIT_ENCODING_BIG5_HKSCS, "BIG5-HKSCS", "Chinese Traditional" }, { GEDIT_ENCODING_CP_866, "CP866", "Cyrillic/Russian" }, { GEDIT_ENCODING_EUC_JP, "EUC-JP", "Japanese" }, { GEDIT_ENCODING_EUC_JP_MS, "EUC-JP-MS", "Japanese" }, { GEDIT_ENCODING_CP932, "CP932", "Japanese" }, { GEDIT_ENCODING_EUC_KR, "EUC-KR", "Korean" }, { GEDIT_ENCODING_EUC_TW, "EUC-TW", "Chinese Traditional" }, { GEDIT_ENCODING_GB18030, "GB18030", "Chinese Simplified" }, { GEDIT_ENCODING_GB2312, "GB2312", "Chinese Simplified" }, { GEDIT_ENCODING_GBK, "GBK", "Chinese Simplified" }, { GEDIT_ENCODING_GEOSTD8, "GEORGIAN-ACADEMY", "Georgian" }, /* FIXME GEOSTD8 ? */ { GEDIT_ENCODING_HZ, "HZ", "Chinese Simplified" }, { GEDIT_ENCODING_IBM_850, "IBM850", "Western" }, { GEDIT_ENCODING_IBM_852, "IBM852", "Central European" }, { GEDIT_ENCODING_IBM_855, "IBM855", "Cyrillic" }, { GEDIT_ENCODING_IBM_857, "IBM857", "Turkish" }, { GEDIT_ENCODING_IBM_862, "IBM862", "Hebrew" }, { GEDIT_ENCODING_IBM_864, "IBM864", "Arabic" }, { GEDIT_ENCODING_ISO_2022_JP, "ISO-2022-JP", "Japanese" }, { GEDIT_ENCODING_ISO_2022_KR, "ISO-2022-KR", "Korean" }, { GEDIT_ENCODING_ISO_IR_111, "ISO-IR-111", "Cyrillic" }, { GEDIT_ENCODING_JOHAB, "JOHAB", "Korean" }, { GEDIT_ENCODING_KOI8_R, "KOI8R", "Cyrillic" }, { GEDIT_ENCODING_KOI8__R, "KOI8-R", "Cyrillic" }, { GEDIT_ENCODING_KOI8_U, "KOI8U", "Cyrillic/Ukrainian" }, { GEDIT_ENCODING_SHIFT_JIS, "SHIFT_JIS", "Japanese" }, { GEDIT_ENCODING_TCVN, "TCVN", "Vietnamese" }, { GEDIT_ENCODING_TIS_620, "TIS-620", "Thai" }, { GEDIT_ENCODING_UHC, "UHC", "Korean" }, { GEDIT_ENCODING_VISCII, "VISCII", "Vietnamese" }, { GEDIT_ENCODING_WINDOWS_1250, "WINDOWS-1250", "Central European" }, { GEDIT_ENCODING_WINDOWS_1251, "WINDOWS-1251", "Cyrillic" }, { GEDIT_ENCODING_WINDOWS_1252, "WINDOWS-1252", "Western" }, { GEDIT_ENCODING_WINDOWS_1253, "WINDOWS-1253", "Greek" }, { GEDIT_ENCODING_WINDOWS_1254, "WINDOWS-1254", "Turkish" }, { GEDIT_ENCODING_WINDOWS_1255, "WINDOWS-1255", "Hebrew" }, { GEDIT_ENCODING_WINDOWS_1256, "WINDOWS-1256", "Arabic" }, { GEDIT_ENCODING_WINDOWS_1257, "WINDOWS-1257", "Baltic" }, { GEDIT_ENCODING_WINDOWS_1258, "WINDOWS-1258", "Vietnamese" } }; const GeditEncoding * gedit_encoding_get_from_index (gint index) { //g_return_val_if_fail (index >= 0, NULL); if (index >= GEDIT_ENCODING_LAST) return NULL; //gedit_encoding_lazy_init (); return &encodings [index]; } const GeditEncoding * gedit_encoding_get_utf8 (void) { //gedit_encoding_lazy_init (); return &utf8_encoding; } static gchar *homebank_utf8_convert(gchar *buffer, const gchar **charset) { GError *conv_error; gchar* conv_buffer = NULL; gsize new_len; guint i; gboolean valid; const struct _GeditEncoding *enc; DB( g_print("(homebank) homebank_utf8_convert\n") ); for (i=0 ; i should try %s\n", enc->charset) ); conv_buffer = g_convert(buffer, -1, "UTF-8", enc->charset, NULL, &new_len, &conv_error); valid = g_utf8_validate (conv_buffer, -1, NULL); if ((conv_error != NULL) || !valid ) { DB( g_print (" -> Couldn't convert from %s to UTF-8.\n", enc->charset) ); } else { DB( g_print (" -> file compatible with '%s'\n", enc->charset) ); if(charset != NULL) *charset = enc->charset; return conv_buffer; } } if(charset != NULL) *charset = NULL; return NULL; } /* * Ensure a buffer to be utf-8, and convert if necessary * */ gchar *homebank_utf8_ensure(gchar *buffer) { gboolean isvalid; gchar *converted; DB( g_print("(homebank) homebank_utf8_ensure\n") ); if(buffer == NULL) return NULL; isvalid = g_utf8_validate(buffer, -1, NULL); DB( g_print(" -> is valid utf8: %d\n", isvalid) ); if(!isvalid) { converted = homebank_utf8_convert(buffer, NULL); if(converted != NULL) { //g_warn here ? g_free(buffer); return converted; } //g_warn here ? } return buffer; } const gchar *homebank_file_getencoding(gchar *filename) { const gchar *charset = NULL; gchar *buffer; gsize length; GError *error = NULL; gboolean isutf8; const gchar *locale_charset; const struct _GeditEncoding *enc; DB( g_print("(homebank) test encoding\n") ); if (g_get_charset (&locale_charset) == FALSE) { //unknown_encoding.charset = g_strdup (locale_charset); } DB( g_print(" -> locale charset is '%s'\n", locale_charset) ); if (g_file_get_contents (filename, &buffer, &length, &error)) { isutf8 = g_utf8_validate(buffer, -1, NULL); DB( g_print(" -> is valid utf8: %d\n", isutf8) ); if( isutf8 == FALSE ) { gchar *converted; converted = homebank_utf8_convert(buffer, &charset); DB( g_print(" -> converted charset match: '%s'\n", charset) ); DB( g_print(" -> converted: '%p' %s\n", converted, converted) ); if(converted != NULL) g_free(converted); } else { enc = gedit_encoding_get_utf8(); charset = enc->charset; } g_free(buffer); } DB( g_print (" -> charset is '%s'\n", charset) ); return charset; }