| /* CharGlyphMap.java -- Manages the 'cmap' table of TrueType fonts |
| Copyright (C) 2006 Free Software Foundation, Inc. |
| |
| This file is part of GNU Classpath. |
| |
| GNU Classpath is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2, or (at your option) |
| any later version. |
| |
| GNU Classpath is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GNU Classpath; see the file COPYING. If not, write to the |
| Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
| 02110-1301 USA. |
| |
| Linking this library statically or dynamically with other modules is |
| making a combined work based on this library. Thus, the terms and |
| conditions of the GNU General Public License cover the whole |
| combination. |
| |
| As a special exception, the copyright holders of this library give you |
| permission to link this library with independent modules to produce an |
| executable, regardless of the license terms of these independent |
| modules, and to copy and distribute the resulting executable under |
| terms of your choice, provided that you also meet, for each linked |
| independent module, the terms and conditions of the license of that |
| module. An independent module is a module which is not derived from |
| or based on this library. If you modify this library, you may extend |
| this exception to your version of the library, but you are not |
| obligated to do so. If you do not wish to do so, delete this |
| exception statement from your version. */ |
| |
| |
| package gnu.java.awt.font.opentype; |
| |
| import java.nio.ByteBuffer; |
| import java.nio.CharBuffer; |
| import java.nio.ShortBuffer; |
| import java.nio.IntBuffer; |
| |
| |
| /** |
| * A mapping from Unicode codepoints to glyphs. This mapping |
| * does not perform any re-ordering or decomposition, so it |
| * is not everything that is needed to support Unicode. |
| * |
| * <p>This class manages the <code>cmap</code> table of |
| * OpenType and TrueType fonts. |
| * |
| * @see <a href="http://partners.adobe.com/asn/tech/type/opentype/cmap.jsp"> |
| * the <code>cmap</code> part of Adobe’ OpenType Specification</a> |
| * |
| * @see <a href="http://developer.apple.com/fonts/TTRefMan/RM06/Chap6cmap.html"> |
| * the <code>cmap</code> section of Apple’s TrueType Reference |
| * Manual</a> |
| * |
| * @author Sascha Brawer (brawer@dandelis.ch) |
| */ |
| abstract class CharGlyphMap |
| { |
| private static final int PLATFORM_UNICODE = 0; |
| private static final int PLATFORM_MACINTOSH = 1; |
| private static final int PLATFORM_MICROSOFT = 3; |
| |
| |
| /** |
| * Determines the glyph index for a given Unicode codepoint. Users |
| * should be aware that the character-to-glyph mapping not not |
| * everything that is needed for full Unicode support. For example, |
| * the <code>cmap</code> table is not able to synthesize accented |
| * glyphs from the canonical decomposition sequence, even if the |
| * font would contain a glyph for the composed form. |
| * |
| * @param ucs4 the Unicode codepoint in UCS-4 encoding. Surrogates |
| * (U+D800 to U+DFFF) cannot be passed, they must be mapped to |
| * UCS-4 first. |
| * |
| * @return the glyph index, or 0 if the font does not contain |
| * a glyph for this codepoint. |
| */ |
| public abstract int getGlyph(int ucs4); |
| |
| |
| /** |
| * Reads a CharGlyphMap from an OpenType or TrueType <code>cmap</code> |
| * table. The current implementation works as follows: |
| * |
| * <p><ol><li>If the font has a type 4 cmap for the Unicode platform |
| * (encoding 0, 1, 2, 3 or 4), or a type 4 cmap for the Microsoft |
| * platform (encodings 1 or 10), that table is used to map Unicode |
| * codepoints to glyphs. Most recent fonts, both for Macintosh and |
| * Windows, should provide such a table.</li> |
| * |
| * <li>Otherwise, if the font has any type 0 cmap for the Macintosh |
| * platform, a Unicode-to-glyph mapping is synthesized from certain |
| * type 0 cmaps. The current implementation collects mappings from |
| * Roman, Icelandic, Turkish, Croatian, Romanian, Eastern European, |
| * Cyrillic, Greek, Hebrew, Arabic and Farsi cmaps.</li>.</ol> |
| * |
| * @param buf a buffer whose position is right at the start |
| * of the entire <code>cmap</code> table, and whose limit |
| * is at its end. |
| * |
| * @return a concrete subclass of <code>CharGlyphMap</code> |
| * that performs the mapping. |
| * |
| * @see <a href= |
| * "http://partners.adobe.com/asn/tech/type/opentype/cmap.jsp" |
| * >the <code>cmap</code> part of Adobe’ OpenType Specification</a> |
| * |
| * @see <a href= |
| * "http://developer.apple.com/fonts/TTRefMan/RM06/Chap6cmap.html" |
| * >the <code>cmap</code> section of Apple’s TrueType Reference |
| * Manual</a> |
| */ |
| public static CharGlyphMap forTable(ByteBuffer buf) |
| { |
| boolean hasType0 = false; |
| int start4 = -1, platform4 = 0, encoding4 = 0; |
| int start12 = -1, platform12 = 0, encoding12 = 0; |
| int version; |
| int numTables; |
| int tableStart = buf.position(); |
| int limit = buf.limit(); |
| int format, platform, language, encoding, length, offset; |
| |
| version = buf.getChar(); |
| if (version != 0) |
| return null; |
| |
| numTables = buf.getChar(); |
| for (int i = 0; i < numTables; i++) |
| { |
| buf.limit(limit).position(tableStart + 4 + i * 8); |
| platform = buf.getChar(); |
| encoding = buf.getChar(); |
| offset = tableStart + buf.getInt(); |
| |
| buf.position(offset); |
| format = buf.getChar(); |
| |
| switch (format) |
| { |
| case 0: |
| hasType0 = true; |
| break; |
| |
| case 4: |
| length = buf.getChar(); |
| language = buf.getChar(); |
| if ((start4 == -1) |
| && Type4.isSupported(platform, language, encoding)) |
| { |
| start4 = offset; |
| platform4 = platform; |
| encoding4 = encoding; |
| } |
| break; |
| |
| case 12: |
| if ((start12 == -1) && Type12.isSupported(platform, encoding)) |
| { |
| start12 = offset; |
| platform12 = platform; |
| encoding12 = encoding; |
| } |
| break; |
| } |
| } |
| |
| |
| if (start12 >= 0) |
| { |
| try |
| { |
| buf.limit(limit).position(start12); |
| return new Type12(buf, platform12, encoding12); |
| } |
| catch (Exception ex) |
| { |
| ex.printStackTrace(); |
| } |
| } |
| |
| if (start4 >= 0) |
| { |
| try |
| { |
| buf.limit(limit).position(start4); |
| return Type4.readTable(buf, platform4, encoding4); |
| } |
| catch (Exception ex) |
| { |
| } |
| } |
| |
| if (hasType0) |
| { |
| try |
| { |
| buf.limit(limit).position(tableStart); |
| return new Type0(buf); |
| } |
| catch (Exception ex) |
| { |
| } |
| } |
| |
| return new Dummy(); |
| } |
| |
| |
| /** |
| * A dummy mapping that maps anything to the undefined glyph. |
| * Used if no other cmap is understood in a font. |
| * |
| * @author Sascha Brawer (brawer@dandelis.ch) |
| */ |
| private static final class Dummy |
| extends CharGlyphMap |
| { |
| public int getGlyph(int ucs4) |
| { |
| return 0; |
| } |
| } |
| |
| |
| /** |
| * A mapping from Unicode code points to glyph IDs through CMAP Type |
| * 0 tables. These tables have serious limitations: Only the first |
| * 256 glyphs can be addressed, and the source of the mapping is not |
| * Unicode, but an encoding used on the Macintosh. |
| * |
| * <p>However, some fonts have only a Type 0 cmap. In this case, we |
| * process all the Type 0 tables we understand, and establish |
| * a reversed glyph-to-Unicode mapping. When a glyph is requested |
| * for a given Unicode character, we perform a linear search on the |
| * reversed table to find the glyph which maps to the requested |
| * character. While not blazingly fast, this gives a reasonable |
| * fallback for old fonts. |
| * |
| * @author Sascha Brawer (brawer@dandelis.ch) |
| */ |
| private static final class Type0 |
| extends CharGlyphMap |
| { |
| /** |
| * An array whose <code>i</code>-th element indicates the |
| * Unicode code point of glyph <code>i</code> in the font. |
| */ |
| private char[] glyphToUCS2 = new char[256]; |
| |
| |
| /** |
| * A String whose <code>charAt(i)</code> is the Unicode character |
| * that corresponds to the codepoint <code>i + 127</code> in the |
| * MacOS Arabic encoding. |
| * |
| * @see <a href= |
| * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ARABIC.TXT" |
| * >the Unicode mapping table for the MacOS Arabic encoding</a> |
| */ |
| private static final String UPPER_ARABIC |
| = "\u007e\u0000\u00c4\u00a0\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" |
| + "\u00e0\u00e2\u00e4\u06ba\u00ab\u00e7\u00e9\u00e8\u00ea\u00eb" |
| + "\u00ed\u2026\u00ee\u00ef\u00f1\u00f3\u00bb\u00f4\u00f6\u00f7" |
| + "\u00fa\u00f9\u00fb\u00fc\u0020\u0021\"\u0023\u0024\u066a" |
| + "\u0026\u0027\u0028\u0029\u002a\u002b\u060c\u002d\u002e\u002f" |
| + "\u0660\u0661\u0662\u0663\u0664\u0665\u0666\u0667\u0668\u0669" |
| + "\u003a\u061b\u003c\u003d\u003e\u061f\u274a\u0621\u0622\u0623" |
| + "\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d" |
| + "\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637" |
| + "\u0638\u0639\u063a\u005b\\\u005d\u005e\u005f\u0640\u0641" |
| + "\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u064b" |
| + "\u064c\u064d\u064e\u064f\u0650\u0651\u0652\u067e\u0679\u0686" |
| + "\u06d5\u06a4\u06af\u0688\u0691\u007b\u007c\u007d\u0698\u06d2"; |
| |
| |
| /** |
| * A String whose <code>charAt(i)</code> is the Unicode character |
| * that corresponds to the codepoint <code>i + 127</code> in the |
| * MacOS East European Roman encoding. |
| * |
| * @see <a href= |
| * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CENTEURO.TXT" |
| * >the Unicode mapping table for the MacOS Central European |
| * encoding</a> |
| */ |
| private static final String UPPER_EAST_EUROPEAN_ROMAN |
| = "\u007e\u0000\u00c4\u0100\u0101\u00c9\u0104\u00d6\u00dc\u00e1" |
| + "\u0105\u010c\u00e4\u010d\u0106\u0107\u00e9\u0179\u017a\u010e" |
| + "\u00ed\u010f\u0112\u0113\u0116\u00f3\u0117\u00f4\u00f6\u00f5" |
| + "\u00fa\u011a\u011b\u00fc\u2020\u00b0\u0118\u00a3\u00a7\u2022" |
| + "\u00b6\u00df\u00ae\u00a9\u2122\u0119\u00a8\u2260\u0123\u012e" |
| + "\u012f\u012a\u2264\u2265\u012b\u0136\u2202\u2211\u0142\u013b" |
| + "\u013c\u013d\u013e\u0139\u013a\u0145\u0146\u0143\u00ac\u221a" |
| + "\u0144\u0147\u2206\u00ab\u00bb\u2026\u00a0\u0148\u0150\u00d5" |
| + "\u0151\u014c\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca" |
| + "\u014d\u0154\u0155\u0158\u2039\u203a\u0159\u0156\u0157\u0160" |
| + "\u201a\u201e\u0161\u015a\u015b\u00c1\u0164\u0165\u00cd\u017d" |
| + "\u017e\u016a\u00d3\u00d4\u016b\u016e\u00da\u016f\u0170\u0171" |
| + "\u0172\u0173\u00dd\u00fd\u0137\u017b\u0141\u017c\u0122\u02c7"; |
| |
| |
| /** |
| * A String whose <code>charAt(i)</code> is the Unicode character |
| * that corresponds to the codepoint <code>i + 127</code> in the |
| * MacOS Roman encoding for the Croatian language. |
| * |
| * @see <a href= |
| * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CROATIAN.TXT" |
| * >the Unicode mapping table for the MacOS Croatian encoding</a> |
| */ |
| private static final String UPPER_CROATIAN |
| = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" |
| + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb" |
| + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5" |
| + "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022" |
| + "\u00b6\u00df\u00ae\u0160\u2122\u00b4\u00a8\u2260\u017d\u00d8" |
| + "\u221e\u00b1\u2264\u2265\u2206\u00b5\u2202\u2211\u220f\u0161" |
| + "\u222b\u00aa\u00ba\u03a9\u017e\u00f8\u00bf\u00a1\u00ac\u221a" |
| + "\u0192\u2248\u0106\u00ab\u010c\u2026\u00a0\u00c0\u00c3\u00d5" |
| + "\u0152\u0153\u0110\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca" |
| + "\uf8ff\u00a9\u2044\u20ac\u2039\u203a\u00c6\u00bb\u2013\u00b7" |
| + "\u201a\u201e\u2030\u00c2\u0107\u00c1\u010d\u00c8\u00cd\u00ce" |
| + "\u00cf\u00cc\u00d3\u00d4\u0111\u00d2\u00da\u00db\u00d9\u0131" |
| + "\u02c6\u02dc\u00af\u03c0\u00cb\u02da\u00b8\u00ca\u00e6\u02c7"; |
| |
| |
| /** |
| * A String whose <code>charAt(i)</code> is the Unicode character |
| * that corresponds to the codepoint <code>i + 127</code> in the |
| * MacOS Cyrillic encoding. |
| * |
| * @see <a href= |
| * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/CYRILLIC.TXT" |
| * >the Unicode mapping table for the MacOS Cyrillic encoding</a> |
| */ |
| private static final String UPPER_CYRILLIC |
| = "\u007e\u0000\u0410\u0411\u0412\u0413\u0414\u0415\u0416\u0417" |
| + "\u0418\u0419\u041a\u041b\u041c\u041d\u041e\u041f\u0420\u0421" |
| + "\u0422\u0423\u0424\u0425\u0426\u0427\u0428\u0429\u042a\u042b" |
| + "\u042c\u042d\u042e\u042f\u2020\u00b0\u0490\u00a3\u00a7\u2022" |
| + "\u00b6\u0406\u00ae\u00a9\u2122\u0402\u0452\u2260\u0403\u0453" |
| + "\u221e\u00b1\u2264\u2265\u0456\u00b5\u0491\u0408\u0404\u0454" |
| + "\u0407\u0457\u0409\u0459\u040a\u045a\u0458\u0405\u00ac\u221a" |
| + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u040b\u045b\u040c" |
| + "\u045c\u0455\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u201e" |
| + "\u040e\u045e\u040f\u045f\u2116\u0401\u0451\u044f\u0430\u0431" |
| + "\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b" |
| + "\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445" |
| + "\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u20ac"; |
| |
| |
| /** |
| * A String whose <code>charAt(i)</code> is the Unicode character |
| * that corresponds to the codepoint <code>i + 127</code> in the |
| * MacOS Arabic encoding with the Farsi language. |
| * |
| * @see <a href= |
| * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/FARSI.TXT" |
| * >the Unicode mapping table for the MacOS Farsi encoding</a> |
| */ |
| private static final String UPPER_FARSI |
| = "\u007e\u0000\u00c4\u00a0\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" |
| + "\u00e0\u00e2\u00e4\u06ba\u00ab\u00e7\u00e9\u00e8\u00ea\u00eb" |
| + "\u00ed\u2026\u00ee\u00ef\u00f1\u00f3\u00bb\u00f4\u00f6\u00f7" |
| + "\u00fa\u00f9\u00fb\u00fc\u0020\u0021\"\u0023\u0024\u066a" |
| + "\u0026\u0027\u0028\u0029\u002a\u002b\u060c\u002d\u002e\u002f" |
| + "\u06f0\u06f1\u06f2\u06f3\u06f4\u06f5\u06f6\u06f7\u06f8\u06f9" |
| + "\u003a\u061b\u003c\u003d\u003e\u061f\u274a\u0621\u0622\u0623" |
| + "\u0624\u0625\u0626\u0627\u0628\u0629\u062a\u062b\u062c\u062d" |
| + "\u062e\u062f\u0630\u0631\u0632\u0633\u0634\u0635\u0636\u0637" |
| + "\u0638\u0639\u063a\u005b\\\u005d\u005e\u005f\u0640\u0641" |
| + "\u0642\u0643\u0644\u0645\u0646\u0647\u0648\u0649\u064a\u064b" |
| + "\u064c\u064d\u064e\u064f\u0650\u0651\u0652\u067e\u0679\u0686" |
| + "\u06d5\u06a4\u06af\u0688\u0691\u007b\u007c\u007d\u0698\u06d2"; |
| |
| |
| /** |
| * A String whose <code>charAt(i)</code> is the Unicode character |
| * that corresponds to the codepoint <code>i + 127</code> in the |
| * MacOS Greek encoding. |
| * |
| * @see <a |
| * href="http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/GREEK.TXT" |
| * >the Unicode mapping table for the MacOS Greek encoding</a> |
| */ |
| private static final String UPPER_GREEK |
| = "\u007e\u0000\u00c4\u00b9\u00b2\u00c9\u00b3\u00d6\u00dc\u0385" |
| + "\u00e0\u00e2\u00e4\u0384\u00a8\u00e7\u00e9\u00e8\u00ea\u00eb" |
| + "\u00a3\u2122\u00ee\u00ef\u2022\u00bd\u2030\u00f4\u00f6\u00a6" |
| + "\u20ac\u00f9\u00fb\u00fc\u2020\u0393\u0394\u0398\u039b\u039e" |
| + "\u03a0\u00df\u00ae\u00a9\u03a3\u03aa\u00a7\u2260\u00b0\u00b7" |
| + "\u0391\u00b1\u2264\u2265\u00a5\u0392\u0395\u0396\u0397\u0399" |
| + "\u039a\u039c\u03a6\u03ab\u03a8\u03a9\u03ac\u039d\u00ac\u039f" |
| + "\u03a1\u2248\u03a4\u00ab\u00bb\u2026\u00a0\u03a5\u03a7\u0386" |
| + "\u0388\u0153\u2013\u2015\u201c\u201d\u2018\u2019\u00f7\u0389" |
| + "\u038a\u038c\u038e\u03ad\u03ae\u03af\u03cc\u038f\u03cd\u03b1" |
| + "\u03b2\u03c8\u03b4\u03b5\u03c6\u03b3\u03b7\u03b9\u03be\u03ba" |
| + "\u03bb\u03bc\u03bd\u03bf\u03c0\u03ce\u03c1\u03c3\u03c4\u03b8" |
| + "\u03c9\u03c2\u03c7\u03c5\u03b6\u03ca\u03cb\u0390\u03b0\u00ad"; |
| |
| |
| /** |
| * A String whose <code>charAt(i)</code> is the Unicode character |
| * that corresponds to the codepoint <code>i + 127</code> in the |
| * MacOS Hebrew encoding. |
| * |
| * <p>The codepoint 0x81 (HEBREW LIGATURE YIDDISH YOD YOD PATAH) |
| * has no composed Unicode equivalent, but is expressed as the |
| * sequence U+05F2 U+05B7 in Unicode. A similar situation exists |
| * with the codepoint 0xC0 (HEBREW LIGATURE LAMED HOLAM), which |
| * MacOS converts to U+F86A U+05DC U+05B9. To correctly deal |
| * with these sequences, we probably should synthesize a ligature |
| * table if a Hebrew font only provides a Type 0 CMAP. |
| * |
| * @see <a href= |
| * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/HEBREW.TXT" |
| * >the Unicode mapping table for the MacOS Hebrew encoding</a> |
| */ |
| private static final String UPPER_HEBREW |
| = "\u007e\u0000\u00c4\u0000\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" |
| + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb" |
| + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5" |
| + "\u00fa\u00f9\u00fb\u00fc\u0020\u0021\"\u0023\u0024\u0025" |
| + "\u20aa\u0027\u0029\u0028\u002a\u002b\u002c\u002d\u002e\u002f" |
| + "\u0030\u0031\u0032\u0033\u0034\u0035\u0036\u0037\u0038\u0039" |
| + "\u003a\u003b\u003c\u003d\u003e\u003f\u0000\u201e\uf89b\uf89c" |
| + "\uf89d\uf89e\u05bc\ufb4b\ufb35\u2026\u00a0\u05b8\u05b7\u05b5" |
| + "\u05b6\u05b4\u2013\u2014\u201c\u201d\u2018\u2019\ufb2a\ufb2b" |
| + "\u05bf\u05b0\u05b2\u05b1\u05bb\u05b9\u0000\u05b3\u05d0\u05d1" |
| + "\u05d2\u05d3\u05d4\u05d5\u05d6\u05d7\u05d8\u05d9\u05da\u05db" |
| + "\u05dc\u05dd\u05de\u05df\u05e0\u05e1\u05e2\u05e3\u05e4\u05e5" |
| + "\u05e6\u05e7\u05e8\u05e9\u05ea\u007d\u005d\u007b\u005b\u007c"; |
| |
| |
| /** |
| * A String whose <code>charAt(i)</code> is the Unicode character |
| * that corresponds to the codepoint <code>i + 127</code> in the |
| * MacOS Roman encoding with the Icelandic language. |
| * |
| * @see <a href= |
| * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ICELAND.TXT" |
| * >the Unicode mapping table for the MacOS Icelandic encoding</a> |
| */ |
| private static final String UPPER_ICELANDIC |
| = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" |
| + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb" |
| + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5" |
| + "\u00fa\u00f9\u00fb\u00fc\u00dd\u00b0\u00a2\u00a3\u00a7\u2022" |
| + "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8" |
| + "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0" |
| + "\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a" |
| + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5" |
| + "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca" |
| + "\u00ff\u0178\u2044\u20ac\u00d0\u00f0\u00de\u00fe\u00fd\u00b7" |
| + "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce" |
| + "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131" |
| + "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7"; |
| |
| |
| /** |
| * A String whose <code>charAt(i)</code> is the Unicode character |
| * that corresponds to the codepoint <code>i + 127</code> in the |
| * MacOS Roman encoding for most languages. Exceptions include |
| * Croatian, Icelandic, Romanian, and Turkish. |
| * |
| * @see <a |
| * href="http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT" |
| * >the Unicode mapping table for the MacOS Roman encoding</a> |
| */ |
| private static final String UPPER_ROMAN |
| = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" |
| + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb" |
| + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5" |
| + "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022" |
| + "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8" |
| + "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0" |
| + "\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a" |
| + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5" |
| + "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca" |
| + "\u00ff\u0178\u2044\u20ac\u2039\u203a\ufb01\ufb02\u2021\u00b7" |
| + "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce" |
| + "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131" |
| + "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7"; |
| |
| |
| /** |
| * A String whose <code>charAt(i)</code> is the Unicode character |
| * that corresponds to the codepoint <code>i + 127</code> in the |
| * MacOS Roman encoding with the Romanian language. |
| * |
| * @see <a href= |
| * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMANIAN.TXT" |
| * >the Unicode mapping table for the MacOS Romanian encoding</a> |
| */ |
| private static final String UPPER_ROMANIAN |
| = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" |
| + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb" |
| + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5" |
| + "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022" |
| + "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u0102\u0218" |
| + "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0" |
| + "\u222b\u00aa\u00ba\u03a9\u0103\u0219\u00bf\u00a1\u00ac\u221a" |
| + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5" |
| + "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca" |
| + "\u00ff\u0178\u2044\u20ac\u2039\u203a\u021a\u021b\u2021\u00b7" |
| + "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce" |
| + "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\u0131" |
| + "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7"; |
| |
| |
| /** |
| * A String whose <code>charAt(i)</code> is the Unicode character |
| * that corresponds to the codepoint <code>i + 127</code> in the |
| * MacOS Roman encoding with the Turkish language. |
| * |
| * @see <a href= |
| * "http://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/TURKISH.TXT" |
| * >the Unicode mapping table for the MacOS Turkish encoding</a> |
| */ |
| private static final String UPPER_TURKISH |
| = "\u007e\u0000\u00c4\u00c5\u00c7\u00c9\u00d1\u00d6\u00dc\u00e1" |
| + "\u00e0\u00e2\u00e4\u00e3\u00e5\u00e7\u00e9\u00e8\u00ea\u00eb" |
| + "\u00ed\u00ec\u00ee\u00ef\u00f1\u00f3\u00f2\u00f4\u00f6\u00f5" |
| + "\u00fa\u00f9\u00fb\u00fc\u2020\u00b0\u00a2\u00a3\u00a7\u2022" |
| + "\u00b6\u00df\u00ae\u00a9\u2122\u00b4\u00a8\u2260\u00c6\u00d8" |
| + "\u221e\u00b1\u2264\u2265\u00a5\u00b5\u2202\u2211\u220f\u03c0" |
| + "\u222b\u00aa\u00ba\u03a9\u00e6\u00f8\u00bf\u00a1\u00ac\u221a" |
| + "\u0192\u2248\u2206\u00ab\u00bb\u2026\u00a0\u00c0\u00c3\u00d5" |
| + "\u0152\u0153\u2013\u2014\u201c\u201d\u2018\u2019\u00f7\u25ca" |
| + "\u00ff\u0178\u011e\u011f\u0130\u0131\u015e\u015f\u2021\u00b7" |
| + "\u201a\u201e\u2030\u00c2\u00ca\u00c1\u00cb\u00c8\u00cd\u00ce" |
| + "\u00cf\u00cc\u00d3\u00d4\uf8ff\u00d2\u00da\u00db\u00d9\uf8a0" |
| + "\u02c6\u02dc\u00af\u02d8\u02d9\u02da\u00b8\u02dd\u02db\u02c7"; |
| |
| |
| /** |
| * Constructs a CharGlyphMap.Type0 from all type 0 cmaps provided |
| * by the font. The implementation is able to fuse multiple type |
| * 0 cmaps, such as the MacRoman, Turkish, Icelandic and Croatian |
| * encoding, into a single map from Unicode characters to glyph |
| * indices. |
| * |
| * @param buf a ByteBuffer whose position is right at the |
| * beginning of the entire cmap table of the font (<i>not</i> |
| * at some subtable). |
| */ |
| public Type0(ByteBuffer buf) |
| { |
| int numTables; |
| int tableStart = buf.position(); |
| int limit = buf.limit(); |
| |
| /* The CMAP version must be 0. */ |
| if (buf.getChar() != 0) |
| throw new IllegalStateException(); |
| |
| numTables = buf.getChar(); |
| for (int i = 0; i < numTables; i++) |
| { |
| buf.limit(limit).position(tableStart + 4 + i * 8); |
| int platform = buf.getChar(); |
| int encoding = buf.getChar(); |
| int offset = tableStart + buf.getInt(); |
| |
| buf.position(offset); |
| int format = buf.getChar(); |
| int length = buf.getChar(); |
| buf.limit(offset + length); |
| int language = buf.getChar(); |
| |
| if (format == 0) |
| readSingleTable(buf, platform, language, encoding); |
| } |
| } |
| |
| |
| /** |
| * Processes a CMAP Type 0 table whose platform, encoding and |
| * language are already known. |
| * |
| * @param buf the buffer to read the table from, positioned |
| * right after the language tag. |
| */ |
| private void readSingleTable(ByteBuffer buf, |
| int platform, int language, |
| int encoding) |
| { |
| String upper = getUpper129(platform, encoding, language); |
| if (upper == null) |
| return; |
| |
| /* Skip the MacOS codepoints [0 .. 31] because they do not |
| * correspond to any Unicode codepoint. |
| */ |
| buf.position(buf.position() + 32); |
| |
| /* Irrespective of script and language, the MacOS codepoints |
| * [32 .. 126] correspond to the same Unicode codepoint. |
| */ |
| for (int i = 32; i < 126; i++) |
| glyphToUCS2[buf.get() & 0xff] = (char) i; |
| |
| for (int i = 127; i < 256; i++) |
| glyphToUCS2[buf.get() & 0xff] = upper.charAt(i - 127); |
| |
| /* Glyph 0 is always the undefined character, which has |
| * no codepoint in Unicode. |
| */ |
| glyphToUCS2[0] = 0; |
| } |
| |
| |
| /** |
| * Determines the glyph index for a given Unicode codepoint. |
| * |
| * @param ucs4 the Unicode codepoint in UCS-4 encoding. |
| * |
| * @return the glyph index, or 0 if the font does not contain |
| * a glyph for this codepoint. |
| */ |
| public int getGlyph(int ucs4) |
| { |
| /* This linear search is not exactly super fast. However, |
| * only really ancient fonts have only a type 0 cmap, |
| * so it should not hurt in very many cases. If it shows |
| * to be a performance problem, one could do a binary search |
| * on a 256-entry table sorted by Unicode codepoint. The |
| * matching index of that table could then be used to look |
| * up the glyph ID at that position. |
| */ |
| for (int i = 0; i < 256; i++) |
| if (glyphToUCS2[i] == ucs4) |
| return i; |
| return 0; |
| } |
| |
| |
| /** |
| * Returns a String whose <code>charAt(i)</code> is the Unicode |
| * character that corresponds to the codepoint <code>i + |
| * 127</code> in the encoding specified by the platform, script |
| * and language tag of a Type 0 CMAP. |
| * |
| * @param language the language tag in the cmap subtable. For the |
| * Macintosh platform, this is 0 to indicate language-neutral |
| * encoding, or the MacOS language code <i>plus one.</i> The |
| * Apple documentation does not mention that one needs to be |
| * added, but the Adobe OpenType specification does. |
| * |
| * @return a String for mapping the top 129 characters to |
| * UCS-2. If <code>platform</code> is not <code>1</code> |
| * (indicating Macintosh), or if the combination of |
| * <code>script</code> and <code>language</code> is not |
| * recognized, <code>null</code> will be returned. |
| */ |
| private static String getUpper129(int platform, int script, int language) |
| { |
| if (platform != PLATFORM_MACINTOSH) |
| return null; |
| |
| switch (script) |
| { |
| case 0: /* smRoman */ |
| if (language == /* langIcelandic+1 */ 16) |
| return UPPER_ICELANDIC; |
| else if (language == /* langTurkish+1 */ 18) |
| return UPPER_TURKISH; |
| else if (language == /* langCroatian+1 */ 19) |
| return UPPER_CROATIAN; |
| else if (language == /* langRomanian+1 */ 38) |
| return UPPER_ROMANIAN; |
| else if (language == /* language-neutral */ 0) |
| return UPPER_ROMAN; |
| else |
| return null; |
| |
| case 4: /* smArabic */ |
| if (language == /* langFarsi+1 */ 32) |
| return UPPER_FARSI; |
| else |
| return UPPER_ARABIC; |
| |
| case 5: /* smHebrew */ |
| return UPPER_HEBREW; |
| |
| case 6: /* smGreek */ |
| return UPPER_GREEK; |
| |
| case 7: /* smCyrillic */ |
| return UPPER_CYRILLIC; |
| |
| case 29: /* smSlavic == smEastEurRoman */ |
| return UPPER_EAST_EUROPEAN_ROMAN; |
| } |
| |
| return null; |
| } |
| } |
| |
| |
| /** |
| * A mapping from Unicode code points to glyph IDs through CMAP Type |
| * 4 tables. These tables are able to map two-byte encoded text |
| * to glyph IDs, such as Unicode Basic Multilingual Plane which |
| * contains U+0000 .. U+FFFE without surrogates. |
| * |
| * @author Sascha Brawer (brawer@dandelis.ch) |
| */ |
| private static final class Type4 |
| extends CharGlyphMap |
| { |
| /** |
| * Determines whether this implementation supports a combination |
| * of platform, language and encoding is supported for a type 4 |
| * <code>cmap</code> table. |
| * |
| * <p>Currently, we support the following combinations: |
| * |
| * <ul><li>the Unicode platform in encodings 0, 1, 2, 3 and |
| * 4;</li> |
| * |
| * <li>the Microsoft platform in encodings 1 (Basic Multilingual |
| * Plane) and 10 (full Unicode).</li></ul> |
| * |
| * <p>Most recent Macintosh fonts provide a type 4 |
| * <code>cmap</code> for Unicode. Microsoft recommends providing a |
| * type 4 <code>cmap</code> for encoding 1 of the Microsoft |
| * platform. The implementation of GNU Classpath supports both |
| * variants. |
| * |
| * <p>Not supported are ShiftJIS, Big5, Wansung, Johab, and other |
| * non-Unicode encodings. Text can easily be converted to Unicode |
| * using the java.nio.charset package. |
| */ |
| static boolean isSupported(int platform, int language, int encoding) |
| { |
| switch (platform) |
| { |
| case PLATFORM_UNICODE: |
| return (encoding >= 0) && (encoding <= 4); |
| |
| case PLATFORM_MICROSOFT: |
| return (encoding == /* Basic Multilingual Plane */ 1) |
| || (encoding == /* Full Unicode */ 10); |
| } |
| |
| return false; |
| } |
| |
| |
| /** |
| * Processes a CMAP Type 4 table whose platform, encoding and |
| * language are already known. We understand the Unicode platform |
| * with encodings 0, 1, 2, 3 and 4, and the Microsoft platform |
| * with encodings 1 (Unicode BMP) and 10 (UCS-4). |
| * |
| * @param buf the buffer to read the table from, positioned at |
| * its beginning. |
| * |
| * @return a Type4 table, or <code>null</code> if the combination |
| * of platform and encoding is not understood. |
| */ |
| static Type4 readTable(ByteBuffer buf, |
| int platform, int encoding) |
| { |
| int tableStart = buf.position(); |
| char format = buf.getChar(); |
| int length = buf.getChar(); |
| int language = buf.getChar(); |
| |
| if ((format != 4) || !isSupported(platform, language, encoding)) |
| throw new IllegalArgumentException(); |
| |
| buf.limit(tableStart + length); |
| |
| int segCountX2 = buf.getChar(); |
| int segCount = segCountX2 / 2; |
| int searchRange = buf.getChar(); |
| int entrySelector = buf.getChar(); |
| int rangeShift = buf.getChar(); |
| |
| CharBuffer endCode, startCode, idRangeOffset_glyphID; |
| ShortBuffer idDelta; |
| |
| int pos = buf.position(); |
| endCode = buf.asCharBuffer(); |
| pos += segCountX2 + /* reservedPad */ 2; |
| |
| buf.position(pos); |
| startCode = buf.asCharBuffer(); |
| pos += segCountX2; |
| |
| buf.position(pos); |
| idDelta = buf.asShortBuffer(); |
| pos += segCountX2; |
| |
| buf.position(pos); |
| idRangeOffset_glyphID = buf.asCharBuffer(); |
| |
| endCode.limit(segCount); |
| startCode.limit(segCount); |
| idDelta.limit(segCount); |
| idRangeOffset_glyphID.limit((buf.limit() - pos) / 2); |
| |
| return new Type4(segCount, |
| endCode, startCode, idDelta, |
| idRangeOffset_glyphID); |
| } |
| |
| |
| private CharBuffer lastChar; |
| private CharBuffer firstChar; |
| private ShortBuffer idDelta; |
| private CharBuffer rangeID; |
| private int numSegments; |
| |
| private Type4(int numSegments, |
| CharBuffer lastChar, CharBuffer firstChar, |
| ShortBuffer idDelta, CharBuffer rangeID) |
| { |
| this.numSegments = numSegments; |
| this.lastChar = lastChar; |
| this.firstChar = firstChar; |
| this.idDelta = idDelta; |
| this.rangeID = rangeID; |
| } |
| |
| |
| /** |
| * Determines the glyph index for a given Unicode codepoint. |
| * |
| * @param ucs4 the Unicode codepoint in UCS-4 encoding. |
| * |
| * @return the glyph index, or 0 if the font does not contain |
| * a glyph for this codepoint. |
| */ |
| public int getGlyph(int ucs4) |
| { |
| char c, segStart; |
| int segment, idRangeOffset; |
| |
| if (ucs4 > 0xffff) |
| return 0; |
| |
| c = (char) ucs4; |
| segment = find(c); |
| segStart = firstChar.get(segment); |
| if ((c < segStart) || (c > lastChar.get(segment))) |
| return 0; |
| |
| /* |
| * System.out.println("seg " + segment |
| * + ", range=" + (int) rangeID[segment] |
| * + ", delta=" + delta[segment]); |
| */ |
| |
| idRangeOffset = rangeID.get(segment); |
| if (idRangeOffset == 0) |
| return (int) (char) (((int) c) + idDelta.get(segment)); |
| int result = rangeID.get((idRangeOffset >> 1) |
| + (c - segStart) + segment); |
| if (result == 0) |
| return 0; |
| return (int) (char) (result + idDelta.get(segment)); |
| } |
| |
| |
| private int find(char c) |
| { |
| int min, max, mid; |
| |
| min = 0; |
| max = numSegments - 1; |
| mid = max >> 1; |
| |
| while (min < max) |
| { |
| // System.out.println("(" + min + "," + max + ") " + mid); |
| char val = lastChar.get(mid); |
| if (val == c) |
| break; |
| else if (val < c) |
| min = mid + 1; |
| else if (val > c) |
| max = mid; |
| mid = (min + max) >> 1; |
| } |
| |
| return mid; |
| } |
| } |
| |
| |
| /** |
| * A mapping from Unicode code points to glyph IDs through CMAP Type |
| * 12 tables. These tables are able to map four-byte encoded text |
| * to glyph IDs, such as Unicode UCS-4. |
| * |
| * @author Sascha Brawer (brawer@dandelis.ch) |
| */ |
| private static final class Type12 |
| extends CharGlyphMap |
| { |
| int numGroups; |
| IntBuffer data; |
| |
| |
| /** |
| * Determines whether this implementation supports a combination |
| * of platform and encoding for a type 12 <code>cmap</code> table. |
| * |
| * <p>Currently, we support the following combinations: |
| * |
| * <ul><li>the Unicode platform in encodings 0, 1, 2, 3 and |
| * 4;</li> |
| * |
| * <li>the Microsoft platform in encodings 1 (Basic Multilingual |
| * Plane) and 10 (full Unicode).</li></ul> |
| */ |
| static boolean isSupported(int platform, int encoding) |
| { |
| switch (platform) |
| { |
| case PLATFORM_UNICODE: |
| return (encoding >= 0) && (encoding <= 4); |
| |
| case PLATFORM_MICROSOFT: |
| return (encoding == /* Basic Multilingual Plane */ 1) |
| || (encoding == /* Full Unicode */ 10); |
| } |
| |
| return false; |
| } |
| |
| |
| /** |
| * Constructs a <code>cmap</code> type 12 table whose platform and |
| * encoding are already known. We understand the Unicode platform |
| * with encodings 0, 1, 2, 3 and 4, and the Microsoft platform |
| * with encodings 1 (Unicode BMP) and 10 (UCS-4). |
| * |
| * @param buf the buffer to read the table from, positioned at |
| * its beginning. |
| */ |
| Type12(ByteBuffer buf, int platform, int encoding) |
| { |
| int tableStart = buf.position(); |
| int format = buf.getChar(); |
| if ((format != 12) || !isSupported(platform, encoding)) |
| throw new IllegalStateException(); |
| |
| buf.getChar(); // skip reserved field |
| buf.limit(tableStart + buf.getInt()); |
| int language = buf.getInt(); |
| numGroups = buf.getInt(); |
| data = buf.asIntBuffer(); |
| } |
| |
| |
| /** |
| * Determines the glyph index for a given Unicode codepoint. Users |
| * should be aware that the character-to-glyph mapping not not |
| * everything that is needed for full Unicode support. For example, |
| * the <code>cmap</code> table is not able to synthesize accented |
| * glyphs from the canonical decomposition sequence, even if the |
| * font would contain a glyph for the composed form. |
| * |
| * @param ucs4 the Unicode codepoint in UCS-4 encoding. Surrogates |
| * (U+D800 to U+DFFF) cannot be passed, they must be mapped to |
| * UCS-4 first. |
| * |
| * @return the glyph index, or 0 if the font does not contain |
| * a glyph for this codepoint. |
| */ |
| public int getGlyph(int ucs4) |
| { |
| int min, max, mid, startCharCode, endCharCode; |
| |
| min = 0; |
| max = numGroups - 1; |
| mid = max >> 1; |
| do |
| { |
| startCharCode = data.get(3 * mid); |
| endCharCode = data.get(3 * mid + 1); |
| |
| |
| /* |
| System.out.println("group " + mid + " (U+" |
| + Integer.toHexString(startCharCode) |
| + " .. U+" + Integer.toHexString(endCharCode) |
| + "): glyph " + (int) data.get(mid*3+2)); |
| */ |
| |
| if ((startCharCode <= ucs4) && (ucs4 <= endCharCode)) |
| return ucs4 |
| - startCharCode |
| + /* startGlyphID */ data.get(mid * 3 + 2); |
| |
| if (endCharCode < ucs4) |
| min = mid + 1; |
| else |
| max = mid; |
| mid = (min + max) >> 1; |
| } |
| while (min < max); |
| |
| startCharCode = data.get(3 * mid); |
| endCharCode = data.get(3 * mid + 1); |
| if ((startCharCode <= ucs4) && (ucs4 <= endCharCode)) |
| return ucs4 |
| - startCharCode |
| + /* startGlyphID */ data.get(mid * 3 + 2); |
| |
| return 0; |
| } |
| } |
| } |