| /* NameDecoder.java -- Decodes names of OpenType and TrueType fonts. |
| Copyright (C) 2006 Free Software Foundation, Inc. |
| |
| This file is part of GNU Classpath. |
| |
| GNU Classpath is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2, or (at your option) |
| any later version. |
| |
| GNU Classpath is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GNU Classpath; see the file COPYING. If not, write to the |
| Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
| 02110-1301 USA. |
| |
| Linking this library statically or dynamically with other modules is |
| making a combined work based on this library. Thus, the terms and |
| conditions of the GNU General Public License cover the whole |
| combination. |
| |
| As a special exception, the copyright holders of this library give you |
| permission to link this library with independent modules to produce an |
| executable, regardless of the license terms of these independent |
| modules, and to copy and distribute the resulting executable under |
| terms of your choice, provided that you also meet, for each linked |
| independent module, the terms and conditions of the license of that |
| module. An independent module is a module which is not derived from |
| or based on this library. If you modify this library, you may extend |
| this exception to your version of the library, but you are not |
| obligated to do so. If you do not wish to do so, delete this |
| exception statement from your version. */ |
| |
| package gnu.java.awt.font.opentype; |
| |
| import java.io.UnsupportedEncodingException; |
| import java.nio.ByteBuffer; |
| import java.util.Locale; |
| |
| |
| /** |
| * A utility class that helps with decoding the names of OpenType |
| * and TrueType fonts. |
| * |
| * @author Sascha Brawer (brawer@dandelis.ch) |
| */ |
| public class NameDecoder |
| { |
| public static final int NAME_COPYRIGHT = 0; |
| |
| |
| /** |
| * Specifies the name of the family to which a font belongs, for |
| * example “Univers”. |
| */ |
| public static final int NAME_FAMILY = 1; |
| |
| |
| /** |
| * Specified the name of the font inside its family, for |
| * example “Light”. |
| */ |
| public static final int NAME_SUBFAMILY = 2; |
| |
| |
| public static final int NAME_UNIQUE = 3; |
| |
| |
| /** |
| * Specifies the full human-readable name of a font, for example |
| * “Univers Light” |
| */ |
| public static final int NAME_FULL = 4; |
| |
| |
| public static final int NAME_VERSION = 5; |
| |
| |
| /** |
| * Specifies the PostScript name of a font, for example |
| * “Univers-Light”. |
| */ |
| public static final int NAME_POSTSCRIPT = 6; |
| |
| |
| public static final int NAME_TRADEMARK = 7; |
| public static final int NAME_MANUFACTURER = 8; |
| public static final int NAME_DESIGNER = 9; |
| public static final int NAME_DESCRIPTION = 10; |
| public static final int NAME_VENDOR_URL = 11; |
| public static final int NAME_DESIGNER_URL = 12; |
| public static final int NAME_LICENSE = 13; |
| public static final int NAME_LICENSE_URL = 14; |
| public static final int NAME_PREFERRED_FAMILY = 16; |
| public static final int NAME_PREFERRED_SUBFAMILY = 17; |
| public static final int NAME_FULL_MACCOMPATIBLE = 18; |
| public static final int NAME_SAMPLE_TEXT = 19; |
| public static final int NAME_POSTSCRIPT_CID = 20; |
| |
| |
| private static final int PLATFORM_MACINTOSH = 1; |
| private static final int PLATFORM_MICROSOFT = 3; |
| |
| |
| public static String getName(ByteBuffer nameTable, |
| int name, Locale locale) |
| { |
| int numRecords; |
| int macLanguage, msLanguage; |
| int offset; |
| int namePlatform, nameEncoding, nameLanguage, nameID, nameLen; |
| int nameStart; |
| String result; |
| boolean match; |
| |
| if (nameTable == null) |
| return null; |
| |
| nameTable.position(0); |
| /* We understand only format 0 of the name table. */ |
| if (nameTable.getShort() != 0) |
| return null; |
| |
| macLanguage = getMacLanguageCode(locale); |
| msLanguage = getMicrosoftLanguageCode(locale); |
| numRecords = nameTable.getShort(); |
| offset = nameTable.getShort(); |
| |
| for (int i = 0; i < numRecords; i++) |
| { |
| namePlatform = nameTable.getShort(); |
| nameEncoding = nameTable.getShort(); |
| nameLanguage = nameTable.getShort(); |
| nameID = nameTable.getShort(); |
| nameLen = nameTable.getShort(); |
| nameStart = offset + nameTable.getShort(); |
| |
| |
| if (nameID != name) |
| continue; |
| |
| // Handle PS seperately as it can be only ASCII, although |
| // possibly encoded as UTF-16BE |
| if ( name == NAME_POSTSCRIPT ) |
| { |
| if( nameTable.get(nameStart) == 0 ) // Peek at top byte |
| result = decodeName("UTF-16BE", nameTable, nameStart, nameLen); |
| else |
| result = decodeName("ASCII", nameTable, nameStart, nameLen); |
| return result; |
| } |
| |
| match = false; |
| switch (namePlatform) |
| { |
| case PLATFORM_MACINTOSH: |
| if ((nameLanguage == macLanguage) || (locale == null)) |
| match = true; |
| else |
| { |
| switch (macLanguage) |
| { |
| case 49: /* Azerbaijani/Cyrillic */ |
| match = (nameLanguage == /* Azerbaijani/Arabic */ 50) |
| || (nameLanguage == /* Azerbaijani/Roman */ 150); |
| break; |
| |
| case 57: /* Mongolian/Mongolian */ |
| match = (nameLanguage == /* Mongolian/Cyrillic */ 58); |
| break; |
| |
| case 83: /* Malay/Roman */ |
| match = (nameLanguage == /* Malay/Arabic */ 84); |
| break; |
| } |
| } |
| break; |
| |
| case PLATFORM_MICROSOFT: |
| if (((nameLanguage & 0xff) == msLanguage) || (locale == null)) |
| match = true; |
| break; |
| } |
| |
| |
| if (match) |
| { |
| result = decodeName(namePlatform, nameEncoding, nameLanguage, |
| nameTable, nameStart, nameLen); |
| if (result != null) |
| return result; |
| } |
| } |
| |
| return null; |
| } |
| |
| |
| /** |
| * The language codes used by the Macintosh operating system. MacOS |
| * defines numeric language identifiers in the range [0 .. 95] and |
| * [128 .. 150]. To map this numeric identifier into an ISO 639 |
| * language code, multiply it by two and take the substring at that |
| * position. |
| * |
| * <p>ISO 639 has revised the code for some languages, namely |
| * <code>he</code> for Hebrew (formerly <code>iw</code>), |
| * <code>yi</code> (formerly <code>ji</code>), and <code>id</code> |
| * for Indonesian (formerly <code>in</code>). In those cases, this |
| * table intentionally contains the older, obsolete code. The |
| * reason is that this is the code which |
| * java.util.Locale.getLanguage() is specified to return. The |
| * implementation of {@link #getMacLanguageCode} depends on this. |
| * |
| * @see <a href= |
| * "http://www.unicode.org/unicode/onlinedat/languages.html" |
| * >Language Codes: ISO 639, Microsoft and Macintosh</a> |
| */ |
| private static final String macLanguageCodes |
| // 0 1 2 |
| // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 |
| = "enfrdeitnlsvesdaptnoiwjaarfielismttrhrzhurhithkoltplhuetlv " |
| |
| // 3 4 5 |
| // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 |
| + "fofaruzhnlgdsqrocssksljisrmkbgukbeuzkkazazhykamokytgtkmnmnps" |
| |
| // 6 7 8 |
| // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 |
| + "kukssdbonesamrbnasgupaormlkntatesimykmloviintlmsmsamti sosw" |
| |
| // 9 10 11 |
| // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 |
| + "rwrn mgeo " |
| |
| // 12 13 14 |
| // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 |
| + " cyeucalaqugnayttugtsjwsuglafbriugdgvgatoelkl" |
| |
| // 15 |
| // 0 |
| + "az"; |
| |
| |
| /** |
| * The primary language IDs used by the Microsoft operating systems. |
| * |
| * <p>ISO 639 has revised the code for some languages, namely |
| * <code>he</code> for Hebrew (formerly <code>iw</code>), |
| * <code>yi</code> (formerly <code>ji</code>), and <code>id</code> |
| * for Indonesian (formerly <code>in</code>). In those cases, this |
| * table intentionally contains the older, obsolete code. The |
| * reason is that this is the code which |
| * java.util.Locale.getLanguage() is specified to return. The |
| * implementation of {@link #getMicrosoftLanguageCode} depends on |
| * this. |
| * |
| * @see <a href= |
| * "http://www.unicode.org/unicode/onlinedat/languages.html" |
| * >Language Codes: ISO 639, Microsoft and Macintosh</a> |
| */ |
| private static final String microsoftLanguageCodes |
| // 0 1 2 |
| // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 |
| = " arbgcazhcsdadeelenesfifriwhuisitjakonlnoplptrmrorushsksqsv" |
| |
| // 3 4 5 |
| // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 |
| + "thtrurinukbesletlvlttgfavihyazeu mk ts xhzuafkafohimt " |
| |
| // 6 7 8 |
| // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 |
| + "gajimskkkyswtkuzttbnpaguortateknmlasmrsamnbocykmlomygl sd" |
| |
| // 9 10 11 |
| // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 |
| + " si iuam ksnefypstl ha yo omtign laso"; |
| |
| |
| /** |
| * Maps a Java Locale into a MacOS language code. |
| * |
| * <p>For languages that are written in several script systems, |
| * MacOS defines multiple language codes. Java Locales have a |
| * variant which could be used for that purpose, but a small |
| * test program revealed that with Sun's JDK 1.4.1_01, only two |
| * of 134 available Locales have a variant tag (namely no_NO_NY |
| * and th_TH_TH).</p> |
| * |
| * <p>The following cases are problematic: |
| * |
| * <ul> <li>Azerbaijani (az): The MacOS language code is 49 if |
| * Azerbaijani is written in the Cyrillic script; 50 if written in |
| * the Arabic script; 150 if written in the Roman script. This |
| * method will always return 49 for the Azerbaijani locale.</li> |
| * |
| * <li>Mongolian (mn): The MacOS language code is 57 if Mongolian is |
| * written in the Mongolian script; 58 if written in the Cyrillic |
| * script. This method will always return 57 for the Mongolian |
| * locale.</li> |
| * |
| * <li>Malay (ms): The MacOS language code is 83 if Malay is written |
| * in the Roman script; 84 if written in the Arabic script. This |
| * method will always return 83 for the Malay locale.</li> </ul> |
| * |
| * @return a MacOS language code, or -1 if there is no such code for |
| * <code>loc</code>’s language. |
| */ |
| private static int getMacLanguageCode(Locale loc) |
| { |
| int code; |
| |
| if (loc == null) |
| return -1; |
| |
| code = findLanguageCode(loc.getLanguage(), macLanguageCodes); |
| switch (code) |
| { |
| case 19: |
| /* Traditional Chinese (MacOS language #19) and and Simplified |
| * Chinese (MacOS language #33) both have "zh" as their ISO 639 |
| * code. |
| */ |
| if (loc.equals(Locale.SIMPLIFIED_CHINESE)) |
| code = 33; |
| break; |
| |
| // Other special cases would be 49, 57 and 83, but we do not |
| // know what do do about them. See the method documentation for |
| // details. |
| } |
| |
| return code; |
| } |
| |
| |
| /** |
| * Maps a Java Locale into a Microsoft language code. |
| */ |
| private static int getMicrosoftLanguageCode(Locale locale) |
| { |
| String isoCode; |
| int code; |
| |
| if (locale == null) |
| return -1; |
| |
| isoCode = locale.getLanguage(); |
| code = findLanguageCode(isoCode, microsoftLanguageCodes); |
| if (code == -1) |
| { |
| if (isoCode.equals("hr") || isoCode.equals("sr")) |
| { |
| /* Microsoft uses code 26 for "sh" (Serbo-Croatian), |
| * "hr" (Croatian) and "sr" (Serbian). Our table contains |
| * "sh". |
| */ |
| code = 26; |
| } |
| else if (isoCode.equals("gd")) |
| { |
| /* Microsoft uses code 60 for "gd" (Scottish Gaelic) and |
| * "ga" (Irish Gaelic). Out table contains "ga". |
| */ |
| code = 60; |
| } |
| } |
| return code; |
| } |
| |
| |
| private static int findLanguageCode(String lang, String langCodes) |
| { |
| int index; |
| if (lang == null) |
| return -1; |
| |
| if (lang.length() != 2) |
| return -1; |
| |
| index = 0; |
| do |
| { |
| index = langCodes.indexOf(lang, index); |
| |
| /* The index must be even to be considered a match. Otherwise, we |
| * could match with the second letter of one language and the |
| * first of antoher one. |
| */ |
| } |
| while (!((index < 0) || ((index & 1) == 0))); |
| if (index < 0) |
| return -1; |
| |
| index = index / 2; |
| return index; |
| } |
| |
| |
| private static String decodeName(int platform, int encoding, int language, |
| ByteBuffer buffer, int offset, int len) |
| { |
| String charsetName = getCharsetName(platform, language, encoding); |
| if (charsetName == null) |
| return null; |
| |
| return decodeName(charsetName, buffer, offset, len); |
| } |
| |
| private static String decodeName(String charsetName, |
| ByteBuffer buffer, int offset, int len) |
| { |
| byte[] byteBuf; |
| int oldPosition; |
| |
| byteBuf = new byte[len]; |
| oldPosition = buffer.position(); |
| try |
| { |
| buffer.position(offset); |
| buffer.get(byteBuf); |
| try |
| { |
| return new String(byteBuf, charsetName); |
| } |
| catch (UnsupportedEncodingException uex) |
| { |
| } |
| } |
| finally |
| { |
| buffer.position(oldPosition); |
| } |
| |
| return null; |
| } |
| |
| |
| /** |
| * Maps a MacOS language code into a Java Locale. |
| * |
| * @param macLanguageCode the MacOS language code for |
| * the language whose Java locale is to be retrieved. |
| * |
| * @return an suitable Locale, or <code>null</code> if |
| * the mapping cannot be performed. |
| */ |
| private static Locale getMacLocale(int macLanguageCode) |
| { |
| String isoCode; |
| |
| switch (macLanguageCode) |
| { |
| case 0: return Locale.ENGLISH; |
| case 1: return Locale.FRENCH; |
| case 2: return Locale.GERMAN; |
| case 3: return Locale.ITALIAN; |
| case 11: return Locale.JAPANESE; |
| case 23: return Locale.KOREAN; |
| case 19: return Locale.TRADITIONAL_CHINESE; |
| case 33: return Locale.SIMPLIFIED_CHINESE; |
| } |
| |
| if ((macLanguageCode < 0) || (macLanguageCode > 150)) |
| return null; |
| |
| isoCode = macLanguageCodes.substring(macLanguageCode << 1, |
| (macLanguageCode + 1) << 1); |
| if (isoCode.charAt(0) == ' ') |
| return null; |
| |
| return new Locale(isoCode); |
| } |
| |
| |
| |
| /** |
| * Maps a Windows LCID into a Java Locale. |
| * |
| * @param lcid the Windows language ID whose Java locale |
| * is to be retrieved. |
| * |
| * @return an suitable Locale, or <code>null</code> if |
| * the mapping cannot be performed. |
| */ |
| private static Locale getWindowsLocale(int lcid) |
| { |
| /* FIXME: This is grossly incomplete. */ |
| switch (lcid) |
| { |
| case 0x0407: return Locale.GERMAN; |
| case 0x0408: return new Locale("el", "GR"); |
| case 0x0409: return Locale.ENGLISH; |
| case 0x040b: return new Locale("fi"); |
| case 0x040c: return Locale.FRENCH; |
| case 0x0416: return new Locale("pt"); |
| case 0x0807: return new Locale("de", "CH"); |
| case 0x0809: return new Locale("en", "UK"); |
| case 0x080c: return new Locale("fr", "BE"); |
| case 0x0816: return new Locale("pt", "BR"); |
| case 0x0c07: return new Locale("de", "AT"); |
| case 0x0c09: return new Locale("en", "AU"); |
| case 0x0c0c: return new Locale("fr", "CA"); |
| case 0x1007: return new Locale("de", "LU"); |
| case 0x1009: return new Locale("en", "CA"); |
| case 0x100c: return new Locale("fr", "CH"); |
| case 0x1407: return new Locale("de", "LI"); |
| case 0x1409: return new Locale("en", "NZ"); |
| case 0x140c: return new Locale("fr", "LU"); |
| case 0x1809: return new Locale("en", "IE"); |
| |
| default: |
| return null; |
| } |
| } |
| |
| |
| /** |
| * Maps a Macintosh Script Manager code to the name of the |
| * corresponding Java Charset. |
| * |
| * @param macScript a MacOS ScriptCode, for example |
| * 6 for <code>smGreek</code>. |
| * |
| * @return a String that can be used to retrieve a Java |
| * CharsetDecorder, for example <code>MacGreek</code>, or |
| * <code>null</code> if <code>macScript</code> has an |
| * unsupported value. |
| */ |
| private static String getMacCharsetName(int macScript) |
| { |
| switch (macScript) |
| { |
| case 0: return "MacRoman"; |
| case 1: return "MacJapanese"; |
| case 2: return "MacKorean"; |
| case 3: return "MacTradChinese"; |
| case 4: return "MacArabic"; |
| case 5: return "MacHebrew"; |
| case 6: return "MacGreek"; |
| case 7: return "MacCyrillic"; |
| case 8: return "MacRSymbol"; |
| case 9: return "MacDevanagari"; |
| case 10: return "MacGurmukhi"; |
| case 11: return "MacGujarati"; |
| case 12: return "MacOriya"; |
| case 13: return "MacBengali"; |
| case 14: return "MacTamil"; |
| case 15: return "MacTelugu"; |
| case 16: return "MacKannada"; |
| case 17: return "MacMalayalam"; |
| case 18: return "MacSinhalese"; |
| case 19: return "MacBurmese"; |
| case 20: return "MacKhmer"; |
| case 21: return "MacThai"; |
| case 22: return "MacLao"; |
| case 23: return "MacGeorgian"; |
| case 24: return "MacArmenian"; |
| case 25: return "MacSimpChinese"; |
| case 26: return "MacTibetan"; |
| case 27: return "MacMongolian"; |
| case 28: return "MacEthiopic"; |
| case 29: return "MacCentralEurope"; |
| case 30: return "MacVietnamese"; |
| case 31: return "MacExtArabic"; |
| |
| default: return null; |
| } |
| } |
| |
| |
| /** |
| * Maps a Microsoft locale ID (LCID) to the name of the |
| * corresponding Java Charset. |
| * |
| * @param lcid the Microsoft locale ID. |
| * |
| * @return a String that can be used to retrieve a Java |
| * CharsetDecorder, for example <code>windows-1252</code>, or |
| * <code>null</code> if <code>lcid</code> has an unsupported value. |
| */ |
| private static String getMicrosoftCharsetName(int lcid) |
| { |
| int lang; |
| char codePage = '?'; |
| |
| /* Extract the language code from the LCID. */ |
| lang = lcid & 0x3ff; |
| |
| /* In the majority of cases, the language alone determines the |
| * codepage. |
| */ |
| if (lang < 100) |
| codePage = (" 612D022322225022EC2202201?002A462110777 68 ?2 1 " |
| + " 2 2 2112 ?1 1 2 2 ") |
| .charAt(lang); |
| |
| /* There are a few exceptions, however, where multiple code pages |
| * are used for the same language. */ |
| if (codePage == '?') |
| { |
| switch (lcid) |
| { |
| case 0x041a: // Croatian --> Windows-1250 (Central Europe) |
| case 0x081a: // Serbian (Latin) --> Windows-1250 (Central Europe) |
| codePage = '0'; |
| break; |
| |
| case 0x42c: // Azeri (Latin) --> Windows-1254 (Turkish) |
| case 0x443: // Uzbek (Latin) --> Windows-1254 (Turkish) |
| codePage = '4'; |
| break; |
| |
| case 0x82c: // Azeri (Cyrillic) --> Windows-1251 (Cyrillic) |
| case 0x843: // Uzbek (Cyrillic) --> Windows-1251 (Cyrillic) |
| case 0xc1a: // Serbian (Cyrillic) --> Windows-1251 (Cyrillic) |
| codePage = '1'; |
| break; |
| } |
| } |
| |
| switch (codePage) |
| { |
| case '0': return "windows-1250"; // Central Europe |
| case '1': return "windows-1251"; // Cyrillic |
| case '2': return "windows-1252"; // Latin 1 |
| case '3': return "windows-1253"; // Greek |
| case '4': return "windows-1254"; // Turkish |
| case '5': return "windows-1255"; // Hebrew |
| case '6': return "windows-1256"; // Arabic |
| case '7': return "windows-1257"; // Baltic |
| case '8': return "windows-1258"; // Vietnam |
| case 'A': return "windows-874"; // Thai |
| case 'B': return "windows-936"; // Simplified Chinese, GBK |
| case 'C': return "windows-949"; // Korean |
| case 'D': return "windows-950"; // Traditional Chinese, Big5 |
| case 'E': return "windows-932"; // Japanese Shift-JIS |
| default: return null; |
| } |
| } |
| |
| |
| /** |
| * Returns the Locale of an OpenType name. |
| * |
| * @param platform the OpenType platform ID. |
| * |
| * @param language the language tag of the OpenType name. If |
| * <code>platform</code> is 1, this is the MacOS language code. |
| * |
| * @param encoding the encoding tag of the OpenType name. If |
| * <code>platform</code> is 1, this is the MacOS script code. |
| */ |
| public static Locale getLocale(int platform, int language, int encoding) |
| { |
| switch (platform) |
| { |
| case 1: /* Apple Macintosh */ |
| return getMacLocale(language); |
| |
| case 3: /* Microsoft Windows */ |
| return getWindowsLocale(language); |
| |
| default: |
| return null; |
| } |
| } |
| |
| |
| /** |
| * Determines the name of the charset for an OpenType font name. |
| * |
| * @param platform the OpenType platform ID. |
| * |
| * @param language the language tag of the OpenType name. If |
| * <code>platform</code> is 1, this is the MacOS language code. |
| * |
| * @param encoding the encoding tag of the OpenType name. If |
| * <code>platform</code> is 1, this is the MacOS script code. |
| * |
| * @return a charset name such as <code>"MacRoman"</code>, |
| * or <code>null</code> if the combination is not known. |
| */ |
| public static String getCharsetName(int platform, int language, int encoding) |
| { |
| switch (platform) |
| { |
| case 1: /* Apple Macintosh */ |
| return getMacCharsetName(encoding); |
| |
| case 3: /* Microsoft Windows */ |
| return getMicrosoftCharsetName(language); |
| |
| default: |
| return null; |
| } |
| } |
| } |