View | Details | Raw Unified | Return to bug 21705
Collapse All | Expand All

(-)src/org/apache/poi/hssf/usermodel/contrib/HSSFCellUtil.java (-19 / +350 lines)
Lines 78-84 Link Here
78
public class HSSFCellUtil
78
public class HSSFCellUtil
79
{
79
{
80
80
81
    private static HashMap unicodeMappings = new HashMap();
81
    private static Map unicodeMappings = new HashMap();
82
83
    /**
84
     *  Return the Map file of HTML escape to unicode mappings.
85
     *  Currently protected as only the test cases needs access.
86
     *
87
     *@return  The map where the key is the html and value is the Unicode.
88
     */
89
    protected static Map getUnicodeMappings(){
90
	    return unicodeMappings;
91
	}
82
92
83
93
84
    /**
94
    /**
Lines 249-255 Link Here
249
259
250
    /**
260
    /**
251
     *  Looks for text in the cell that should be unicode, like α and provides the
261
     *  Looks for text in the cell that should be unicode, like α and provides the
252
     *  unicode version of it.
262
     *  unicode version of it.  It seems like CASE matters, for instance there are as HTML
263
     *  escapes both ← and ⇐, which are different.  Not sure if we want to deal
264
     *  with those or not...
253
     *
265
     *
254
     *@param  cell  The cell to check for unicode values
266
     *@param  cell  The cell to check for unicode values
255
     *@return       transalted to unicode
267
     *@return       transalted to unicode
Lines 264-270 Link Here
264
        {
276
        {
265
            Map.Entry entry = (Map.Entry) i.next();
277
            Map.Entry entry = (Map.Entry) i.next();
266
            String key = (String) entry.getKey();
278
            String key = (String) entry.getKey();
267
            if ( s.toLowerCase().indexOf( key ) != -1 )
279
            if ( s.indexOf( key ) != -1 )
268
            {
280
            {
269
                s = StringUtils.replace( s, key, "" + entry.getValue().toString() + "" );
281
                s = StringUtils.replace( s, key, "" + entry.getValue().toString() + "" );
270
                foundUnicode = true;
282
                foundUnicode = true;
Lines 278-300 Link Here
278
        return cell;
290
        return cell;
279
    }
291
    }
280
292
281
    
293
282
    static {
294
    static {
283
        unicodeMappings.put( "α",   "\u03B1" );
295
        // Portions © International Organization for Standardization 1986
284
        unicodeMappings.put( "β",    "\u03B2" );
296
        // Permission to copy in any form is granted for use with
285
        unicodeMappings.put( "γ",   "\u03B3" );
297
        // conforming SGML systems and applications as defined in
286
        unicodeMappings.put( "δ",   "\u03B4" );
298
        // ISO 8879, provided this notice is included in all copies.
287
        unicodeMappings.put( "ε", "\u03B5" );
299
        // Character entity set. Typical invocation:
288
        unicodeMappings.put( "ζ",    "\u03B6" );
300
        // <!ENTITY % HTMLlat1 PUBLIC
289
        unicodeMappings.put( "&eta;",     "\u03B7" );
301
        // "-//W3C//ENTITIES Latin 1//EN//HTML">
290
        unicodeMappings.put( "&theta;",   "\u03B8" );
302
        // %HTMLlat1;
291
        unicodeMappings.put( "&iota;",    "\u03B9" );
303
        unicodeMappings.put ("&nbsp;",     "\u00a0"); // no-break space = non-breaking space, U+00A0 ISOnum
292
        unicodeMappings.put( "&kappa;",   "\u03BA" );
304
        unicodeMappings.put ("&iexcl;",    "\u00a1"); // inverted exclamation mark, U+00A1 ISOnum
293
        unicodeMappings.put( "&lambda;",  "\u03BB" );
305
        unicodeMappings.put ("&cent;",     "\u00a2"); // cent sign, U+00A2 ISOnum
294
        unicodeMappings.put( "&mu;",      "\u03BC" );
306
        unicodeMappings.put ("&pound;",    "\u00a3"); // pound sign, U+00A3 ISOnum
295
        unicodeMappings.put( "&nu;",      "\u03BD" );
307
        unicodeMappings.put ("&curren;",   "\u00a4"); // currency sign, U+00A4 ISOnum
296
        unicodeMappings.put( "&xi;",      "\u03BE" );
308
        unicodeMappings.put ("&yen;",      "\u00a5"); // yen sign = yuan sign, U+00A5 ISOnum
297
        unicodeMappings.put( "&omicron;", "\u03BF" );
309
        unicodeMappings.put ("&brvbar;",   "\u00a6"); // broken bar = broken vertical bar, U+00A6 ISOnum
310
        unicodeMappings.put ("&sect;",     "\u00a7"); // section sign, U+00A7 ISOnum
311
        unicodeMappings.put ("&uml;",      "\u00a8"); // diaeresis = spacing diaeresis, U+00A8 ISOdia
312
        unicodeMappings.put ("&copy;",     "\u00a9"); // copyright sign, U+00A9 ISOnum
313
        unicodeMappings.put ("&ordf;",     "\u00aa"); // feminine ordinal indicator, U+00AA ISOnum
314
        unicodeMappings.put ("&laquo;",    "\u00ab"); // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum
315
        unicodeMappings.put ("&not;",      "\u00ac"); // not sign, U+00AC ISOnum
316
        unicodeMappings.put ("&shy;",      "\u00ad"); // soft hyphen = discretionary hyphen, U+00AD ISOnum
317
        unicodeMappings.put ("&reg;",      "\u00ae"); // registered sign = registered trade mark sign, U+00AE ISOnum
318
        unicodeMappings.put ("&macr;",     "\u00af"); // macron = spacing macron = overline = APL overbar, U+00AF ISOdia
319
        unicodeMappings.put ("&deg;",      "\u00b0"); // degree sign, U+00B0 ISOnum
320
        unicodeMappings.put ("&plusmn;",   "\u00b1"); // plus-minus sign = plus-or-minus sign, U+00B1 ISOnum
321
        unicodeMappings.put ("&sup2;",     "\u00b2"); // superscript two = superscript digit two = squared, U+00B2 ISOnum
322
        unicodeMappings.put ("&sup3;",     "\u00b3"); // superscript three = superscript digit three = cubed, U+00B3 ISOnum
323
        unicodeMappings.put ("&acute;",    "\u00b4"); // acute accent = spacing acute, U+00B4 ISOdia
324
        unicodeMappings.put ("&micro;",    "\u00b5"); // micro sign, U+00B5 ISOnum
325
        unicodeMappings.put ("&para;",     "\u00b6"); // pilcrow sign = paragraph sign, U+00B6 ISOnum
326
        unicodeMappings.put ("&middot;",   "\u00b7"); // middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum
327
        unicodeMappings.put ("&cedil;",    "\u00b8"); // cedilla = spacing cedilla, U+00B8 ISOdia
328
        unicodeMappings.put ("&sup1;",     "\u00b9"); // superscript one = superscript digit one, U+00B9 ISOnum
329
        unicodeMappings.put ("&ordm;",     "\u00ba"); // masculine ordinal indicator, U+00BA ISOnum
330
        unicodeMappings.put ("&raquo;",    "\u00bb"); // right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum
331
        unicodeMappings.put ("&frac14;",   "\u00bc"); // vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum
332
        unicodeMappings.put ("&frac12;",   "\u00bd"); // vulgar fraction one half = fraction one half, U+00BD ISOnum
333
        unicodeMappings.put ("&frac34;",   "\u00be"); // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum
334
        unicodeMappings.put ("&iquest;",   "\u00bf"); // inverted question mark = turned question mark, U+00BF ISOnum
335
        unicodeMappings.put ("&Agrave;",   "\u00c0"); // latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1
336
        unicodeMappings.put ("&Aacute;",   "\u00c1"); // latin capital letter A with acute, U+00C1 ISOlat1
337
        unicodeMappings.put ("&Acirc;",    "\u00c2"); // latin capital letter A with circumflex, U+00C2 ISOlat1
338
        unicodeMappings.put ("&Atilde;",   "\u00c3"); // latin capital letter A with tilde, U+00C3 ISOlat1
339
        unicodeMappings.put ("&Auml;",     "\u00c4"); // latin capital letter A with diaeresis, U+00C4 ISOlat1
340
        unicodeMappings.put ("&Aring;",    "\u00c5"); // latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1
341
        unicodeMappings.put ("&AElig;",    "\u00c6"); // latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1
342
        unicodeMappings.put ("&Ccedil;",   "\u00c7"); // latin capital letter C with cedilla, U+00C7 ISOlat1
343
        unicodeMappings.put ("&Egrave;",   "\u00c8"); // latin capital letter E with grave, U+00C8 ISOlat1
344
        unicodeMappings.put ("&Eacute;",   "\u00c9"); // latin capital letter E with acute, U+00C9 ISOlat1
345
        unicodeMappings.put ("&Ecirc;",    "\u00ca"); // latin capital letter E with circumflex, U+00CA ISOlat1
346
        unicodeMappings.put ("&Euml;",     "\u00cb"); // latin capital letter E with diaeresis, U+00CB ISOlat1
347
        unicodeMappings.put ("&Igrave;",   "\u00cc"); // latin capital letter I with grave, U+00CC ISOlat1
348
        unicodeMappings.put ("&Iacute;",   "\u00cd"); // latin capital letter I with acute, U+00CD ISOlat1
349
        unicodeMappings.put ("&Icirc;",    "\u00ce"); // latin capital letter I with circumflex, U+00CE ISOlat1
350
        unicodeMappings.put ("&Iuml;",     "\u00cf"); // latin capital letter I with diaeresis, U+00CF ISOlat1
351
        unicodeMappings.put ("&ETH;",      "\u00d0"); // latin capital letter ETH, U+00D0 ISOlat1
352
        unicodeMappings.put ("&Ntilde;",   "\u00d1"); // latin capital letter N with tilde, U+00D1 ISOlat1
353
        unicodeMappings.put ("&Ograve;",   "\u00d2"); // latin capital letter O with grave, U+00D2 ISOlat1
354
        unicodeMappings.put ("&Oacute;",   "\u00d3"); // latin capital letter O with acute, U+00D3 ISOlat1
355
        unicodeMappings.put ("&Ocirc;",    "\u00d4"); // latin capital letter O with circumflex, U+00D4 ISOlat1
356
        unicodeMappings.put ("&Otilde;",   "\u00d5"); // latin capital letter O with tilde, U+00D5 ISOlat1
357
        unicodeMappings.put ("&Ouml;",     "\u00d6"); // latin capital letter O with diaeresis, U+00D6 ISOlat1
358
        unicodeMappings.put ("&times;",    "\u00d7"); // multiplication sign, U+00D7 ISOnum
359
        unicodeMappings.put ("&Oslash;",   "\u00d8"); // latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1
360
        unicodeMappings.put ("&Ugrave;",   "\u00d9"); // latin capital letter U with grave, U+00D9 ISOlat1
361
        unicodeMappings.put ("&Uacute;",   "\u00da"); // latin capital letter U with acute, U+00DA ISOlat1
362
        unicodeMappings.put ("&Ucirc;",    "\u00db"); // latin capital letter U with circumflex, U+00DB ISOlat1
363
        unicodeMappings.put ("&Uuml;",     "\u00dc"); // latin capital letter U with diaeresis, U+00DC ISOlat1
364
        unicodeMappings.put ("&Yacute;",   "\u00dd"); // latin capital letter Y with acute, U+00DD ISOlat1
365
        unicodeMappings.put ("&THORN;",    "\u00de"); // latin capital letter THORN, U+00DE ISOlat1
366
        unicodeMappings.put ("&szlig;",    "\u00df"); // latin small letter sharp s = ess-zed, U+00DF ISOlat1
367
        unicodeMappings.put ("&agrave;",   "\u00e0"); // latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1
368
        unicodeMappings.put ("&aacute;",   "\u00e1"); // latin small letter a with acute, U+00E1 ISOlat1
369
        unicodeMappings.put ("&acirc;",    "\u00e2"); // latin small letter a with circumflex, U+00E2 ISOlat1
370
        unicodeMappings.put ("&atilde;",   "\u00e3"); // latin small letter a with tilde, U+00E3 ISOlat1
371
        unicodeMappings.put ("&auml;",     "\u00e4"); // latin small letter a with diaeresis, U+00E4 ISOlat1
372
        unicodeMappings.put ("&aring;",    "\u00e5"); // latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1
373
        unicodeMappings.put ("&aelig;",    "\u00e6"); // latin small letter ae = latin small ligature ae, U+00E6 ISOlat1
374
        unicodeMappings.put ("&ccedil;",   "\u00e7"); // latin small letter c with cedilla, U+00E7 ISOlat1
375
        unicodeMappings.put ("&egrave;",   "\u00e8"); // latin small letter e with grave, U+00E8 ISOlat1
376
        unicodeMappings.put ("&eacute;",   "\u00e9"); // latin small letter e with acute, U+00E9 ISOlat1
377
        unicodeMappings.put ("&ecirc;",    "\u00ea"); // latin small letter e with circumflex, U+00EA ISOlat1
378
        unicodeMappings.put ("&euml;",     "\u00eb"); // latin small letter e with diaeresis, U+00EB ISOlat1
379
        unicodeMappings.put ("&igrave;",   "\u00ec"); // latin small letter i with grave, U+00EC ISOlat1
380
        unicodeMappings.put ("&iacute;",   "\u00ed"); // latin small letter i with acute, U+00ED ISOlat1
381
        unicodeMappings.put ("&icirc;",    "\u00ee"); // latin small letter i with circumflex, U+00EE ISOlat1
382
        unicodeMappings.put ("&iuml;",     "\u00ef"); // latin small letter i with diaeresis, U+00EF ISOlat1
383
        unicodeMappings.put ("&eth;",      "\u00f0"); // latin small letter eth, U+00F0 ISOlat1
384
        unicodeMappings.put ("&ntilde;",   "\u00f1"); // latin small letter n with tilde, U+00F1 ISOlat1
385
        unicodeMappings.put ("&ograve;",   "\u00f2"); // latin small letter o with grave, U+00F2 ISOlat1
386
        unicodeMappings.put ("&oacute;",   "\u00f3"); // latin small letter o with acute, U+00F3 ISOlat1
387
        unicodeMappings.put ("&ocirc;",    "\u00f4"); // latin small letter o with circumflex, U+00F4 ISOlat1
388
        unicodeMappings.put ("&otilde;",   "\u00f5"); // latin small letter o with tilde, U+00F5 ISOlat1
389
        unicodeMappings.put ("&ouml;",     "\u00f6"); // latin small letter o with diaeresis, U+00F6 ISOlat1
390
        unicodeMappings.put ("&divide;",   "\u00f7"); // division sign, U+00F7 ISOnum
391
        unicodeMappings.put ("&oslash;",   "\u00f8"); // latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1
392
        unicodeMappings.put ("&ugrave;",   "\u00f9"); // latin small letter u with grave, U+00F9 ISOlat1
393
        unicodeMappings.put ("&uacute;",   "\u00fa"); // latin small letter u with acute, U+00FA ISOlat1
394
        unicodeMappings.put ("&ucirc;",    "\u00fb"); // latin small letter u with circumflex, U+00FB ISOlat1
395
        unicodeMappings.put ("&uuml;",     "\u00fc"); // latin small letter u with diaeresis, U+00FC ISOlat1
396
        unicodeMappings.put ("&yacute;",   "\u00fd"); // latin small letter y with acute, U+00FD ISOlat1
397
        unicodeMappings.put ("&thorn;",    "\u00fe"); // latin small letter thorn, U+00FE ISOlat1
398
        unicodeMappings.put ("&yuml;",     "\u00ff"); // latin small letter y with diaeresis, U+00FF ISOlat1
399
        // Mathematical, Greek and Symbolic characters for HTML
400
        // Character entity set. Typical invocation:
401
        // <!ENTITY % HTMLsymbol PUBLIC
402
        // "-//W3C//ENTITIES Symbols//EN//HTML">
403
        // %HTMLsymbol;
404
        // Portions © International Organization for Standardization 1986:
405
        // Permission to copy in any form is granted for use with
406
        // conforming SGML systems and applications as defined in
407
        // ISO 8879, provided this notice is included in all copies.
408
        // Relevant ISO entity set is given unless names are newly introduced.
409
        // New names (i.e., not in ISO 8879 list) do not clash with any
410
        // existing ISO 8879 entity names. ISO 10646 character numbers
411
        // are given for each character, in hex. CDATA values are decimal
412
        // conversions of the ISO 10646 values and refer to the document
413
        // character set. Names are ISO 10646 names.
414
        // Latin Extended-B
415
        unicodeMappings.put ("&fnof;",     "\u0192"); // latin small f with hook = function = florin, U+0192 ISOtech
416
        // Greek
417
        unicodeMappings.put ("&Alpha;",    "\u0391"); // greek capital letter alpha, U+0391
418
        unicodeMappings.put ("&Beta;",     "\u0392"); // greek capital letter beta, U+0392
419
        unicodeMappings.put ("&Gamma;",    "\u0393"); // greek capital letter gamma, U+0393 ISOgrk3
420
        unicodeMappings.put ("&Delta;",    "\u0394"); // greek capital letter delta, U+0394 ISOgrk3
421
        unicodeMappings.put ("&Epsilon;",  "\u0395"); // greek capital letter epsilon, U+0395
422
        unicodeMappings.put ("&Zeta;",     "\u0396"); // greek capital letter zeta, U+0396
423
        unicodeMappings.put ("&Eta;",      "\u0397"); // greek capital letter eta, U+0397
424
        unicodeMappings.put ("&Theta;",    "\u0398"); // greek capital letter theta, U+0398 ISOgrk3
425
        unicodeMappings.put ("&Iota;",     "\u0399"); // greek capital letter iota, U+0399
426
        unicodeMappings.put ("&Kappa;",    "\u039a"); // greek capital letter kappa, U+039A
427
        unicodeMappings.put ("&Lambda;",   "\u039b"); // greek capital letter lambda, U+039B ISOgrk3
428
        unicodeMappings.put ("&Mu;",       "\u039c"); // greek capital letter mu, U+039C
429
        unicodeMappings.put ("&Nu;",       "\u039d"); // greek capital letter nu, U+039D
430
        unicodeMappings.put ("&Xi;",       "\u039e"); // greek capital letter xi, U+039E ISOgrk3
431
        unicodeMappings.put ("&Omicron;",  "\u039f"); // greek capital letter omicron, U+039F
432
        unicodeMappings.put ("&Pi;",       "\u03a0"); // greek capital letter pi, U+03A0 ISOgrk3
433
        unicodeMappings.put ("&Rho;",      "\u03a1"); // greek capital letter rho, U+03A1
434
        // there is no Sigmaf, and no U+03A2 character either
435
        unicodeMappings.put ("&Sigma;",    "\u03a3"); // greek capital letter sigma, U+03A3 ISOgrk3
436
        unicodeMappings.put ("&Tau;",      "\u03a4"); // greek capital letter tau, U+03A4
437
        unicodeMappings.put ("&Upsilon;",  "\u03a5"); // greek capital letter upsilon, U+03A5 ISOgrk3
438
        unicodeMappings.put ("&Phi;",      "\u03a6"); // greek capital letter phi, U+03A6 ISOgrk3
439
        unicodeMappings.put ("&Chi;",      "\u03a7"); // greek capital letter chi, U+03A7
440
        unicodeMappings.put ("&Psi;",      "\u03a8"); // greek capital letter psi, U+03A8 ISOgrk3
441
        unicodeMappings.put ("&Omega;",    "\u03a9"); // greek capital letter omega, U+03A9 ISOgrk3
442
        unicodeMappings.put ("&alpha;",    "\u03b1"); // greek small letter alpha, U+03B1 ISOgrk3
443
        unicodeMappings.put ("&beta;",     "\u03b2"); // greek small letter beta, U+03B2 ISOgrk3
444
        unicodeMappings.put ("&gamma;",    "\u03b3"); // greek small letter gamma, U+03B3 ISOgrk3
445
        unicodeMappings.put ("&delta;",    "\u03b4"); // greek small letter delta, U+03B4 ISOgrk3
446
        unicodeMappings.put ("&epsilon;",  "\u03b5"); // greek small letter epsilon, U+03B5 ISOgrk3
447
        unicodeMappings.put ("&zeta;",     "\u03b6"); // greek small letter zeta, U+03B6 ISOgrk3
448
        unicodeMappings.put ("&eta;",      "\u03b7"); // greek small letter eta, U+03B7 ISOgrk3
449
        unicodeMappings.put ("&theta;",    "\u03b8"); // greek small letter theta, U+03B8 ISOgrk3
450
        unicodeMappings.put ("&iota;",     "\u03b9"); // greek small letter iota, U+03B9 ISOgrk3
451
        unicodeMappings.put ("&kappa;",    "\u03ba"); // greek small letter kappa, U+03BA ISOgrk3
452
        unicodeMappings.put ("&lambda;",   "\u03bb"); // greek small letter lambda, U+03BB ISOgrk3
453
        unicodeMappings.put ("&mu;",       "\u03bc"); // greek small letter mu, U+03BC ISOgrk3
454
        unicodeMappings.put ("&nu;",       "\u03bd"); // greek small letter nu, U+03BD ISOgrk3
455
        unicodeMappings.put ("&xi;",       "\u03be"); // greek small letter xi, U+03BE ISOgrk3
456
        unicodeMappings.put ("&omicron;",  "\u03bf"); // greek small letter omicron, U+03BF NEW
457
        unicodeMappings.put ("&pi;",       "\u03c0"); // greek small letter pi, U+03C0 ISOgrk3
458
        unicodeMappings.put ("&rho;",      "\u03c1"); // greek small letter rho, U+03C1 ISOgrk3
459
        unicodeMappings.put ("&sigmaf;",   "\u03c2"); // greek small letter final sigma, U+03C2 ISOgrk3
460
        unicodeMappings.put ("&sigma;",    "\u03c3"); // greek small letter sigma, U+03C3 ISOgrk3
461
        unicodeMappings.put ("&tau;",      "\u03c4"); // greek small letter tau, U+03C4 ISOgrk3
462
        unicodeMappings.put ("&upsilon;",  "\u03c5"); // greek small letter upsilon, U+03C5 ISOgrk3
463
        unicodeMappings.put ("&phi;",      "\u03c6"); // greek small letter phi, U+03C6 ISOgrk3
464
        unicodeMappings.put ("&chi;",      "\u03c7"); // greek small letter chi, U+03C7 ISOgrk3
465
        unicodeMappings.put ("&psi;",      "\u03c8"); // greek small letter psi, U+03C8 ISOgrk3
466
        unicodeMappings.put ("&omega;",    "\u03c9"); // greek small letter omega, U+03C9 ISOgrk3
467
        unicodeMappings.put ("&thetasym;", "\u03d1"); // greek small letter theta symbol, U+03D1 NEW
468
        unicodeMappings.put ("&upsih;",    "\u03d2"); // greek upsilon with hook symbol, U+03D2 NEW
469
        unicodeMappings.put ("&piv;",      "\u03d6"); // greek pi symbol, U+03D6 ISOgrk3
470
        // General Punctuation
471
        unicodeMappings.put ("&bull;",     "\u2022"); // bullet = black small circle, U+2022 ISOpub
472
        // bullet is NOT the same as bullet operator, U+2219
473
        unicodeMappings.put ("&hellip;",   "\u2026"); // horizontal ellipsis = three dot leader, U+2026 ISOpub
474
        unicodeMappings.put ("&prime;",    "\u2032"); // prime = minutes = feet, U+2032 ISOtech
475
        unicodeMappings.put ("&Prime;",    "\u2033"); // double prime = seconds = inches, U+2033 ISOtech
476
        unicodeMappings.put ("&oline;",    "\u203e"); // overline = spacing overscore, U+203E NEW
477
        unicodeMappings.put ("&frasl;",    "\u2044"); // fraction slash, U+2044 NEW
478
        // Letterlike Symbols
479
        unicodeMappings.put ("&weierp;",   "\u2118"); // script capital P = power set = Weierstrass p, U+2118 ISOamso
480
        unicodeMappings.put ("&image;",    "\u2111"); // blackletter capital I = imaginary part, U+2111 ISOamso
481
        unicodeMappings.put ("&real;",     "\u211c"); // blackletter capital R = real part symbol, U+211C ISOamso
482
        unicodeMappings.put ("&trade;",    "\u2122"); // trade mark sign, U+2122 ISOnum
483
        unicodeMappings.put ("&alefsym;",  "\u2135"); // alef symbol = first transfinite cardinal, U+2135 NEW
484
        // alef symbol is NOT the same as hebrew letter alef,
485
        // U+05D0 although the same glyph could be used to depict both characters
486
        // Arrows
487
        unicodeMappings.put ("&larr;",     "\u2190"); // leftwards arrow, U+2190 ISOnum
488
        unicodeMappings.put ("&uarr;",     "\u2191"); // upwards arrow, U+2191 ISOnum
489
        unicodeMappings.put ("&rarr;",     "\u2192"); // rightwards arrow, U+2192 ISOnum
490
        unicodeMappings.put ("&darr;",     "\u2193"); // downwards arrow, U+2193 ISOnum
491
        unicodeMappings.put ("&harr;",     "\u2194"); // left right arrow, U+2194 ISOamsa
492
        unicodeMappings.put ("&crarr;",    "\u21b5"); // downwards arrow with corner leftwards = carriage return, U+21B5 NEW
493
        unicodeMappings.put ("&lArr;",     "\u21d0"); // leftwards double arrow, U+21D0 ISOtech
494
        // ISO 10646 does not say that lArr is the same as the 'is implied by' arrow
495
        // but also does not have any other character for that function. So ? lArr can
496
        // be used for 'is implied by' as ISOtech suggests
497
        unicodeMappings.put ("&uArr;",     "\u21d1"); // upwards double arrow, U+21D1 ISOamsa
498
        unicodeMappings.put ("&rArr;",     "\u21d2"); // rightwards double arrow, U+21D2 ISOtech
499
        // ISO 10646 does not say this is the 'implies' character but does not have
500
        // another character with this function so ?
501
        // rArr can be used for 'implies' as ISOtech suggests
502
        unicodeMappings.put ("&dArr;",     "\u21d3"); // downwards double arrow, U+21D3 ISOamsa
503
        unicodeMappings.put ("&hArr;",     "\u21d4"); // left right double arrow, U+21D4 ISOamsa
504
        // Mathematical Operators
505
        unicodeMappings.put ("&forall;",   "\u2200"); // for all, U+2200 ISOtech
506
        unicodeMappings.put ("&part;",     "\u2202"); // partial differential, U+2202 ISOtech
507
        unicodeMappings.put ("&exist;",    "\u2203"); // there exists, U+2203 ISOtech
508
        unicodeMappings.put ("&empty;",    "\u2205"); // empty set = null set = diameter, U+2205 ISOamso
509
        unicodeMappings.put ("&nabla;",    "\u2207"); // nabla = backward difference, U+2207 ISOtech
510
        unicodeMappings.put ("&isin;",     "\u2208"); // element of, U+2208 ISOtech
511
        unicodeMappings.put ("&notin;",    "\u2209"); // not an element of, U+2209 ISOtech
512
        unicodeMappings.put ("&ni;",       "\u220b"); // contains as member, U+220B ISOtech
513
        // should there be a more memorable name than 'ni'?
514
        unicodeMappings.put ("&prod;",     "\u220f"); // n-ary product = product sign, U+220F ISOamsb
515
        // prod is NOT the same character as U+03A0 'greek capital letter pi' though
516
        // the same glyph might be used for both
517
        unicodeMappings.put ("&sum;",      "\u2211"); // n-ary sumation, U+2211 ISOamsb
518
        // sum is NOT the same character as U+03A3 'greek capital letter sigma'
519
        // though the same glyph might be used for both
520
        unicodeMappings.put ("&minus;",    "\u2212"); // minus sign, U+2212 ISOtech
521
        unicodeMappings.put ("&lowast;",   "\u2217"); // asterisk operator, U+2217 ISOtech
522
        unicodeMappings.put ("&radic;",    "\u221a"); // square root = radical sign, U+221A ISOtech
523
        unicodeMappings.put ("&prop;",     "\u221d"); // proportional to, U+221D ISOtech
524
        unicodeMappings.put ("&infin;",    "\u221e"); // infinity, U+221E ISOtech
525
        unicodeMappings.put ("&ang;",      "\u2220"); // angle, U+2220 ISOamso
526
        unicodeMappings.put ("&and;",      "\u2227"); // logical and = wedge, U+2227 ISOtech
527
        unicodeMappings.put ("&or;",       "\u2228"); // logical or = vee, U+2228 ISOtech
528
        unicodeMappings.put ("&cap;",      "\u2229"); // intersection = cap, U+2229 ISOtech
529
        unicodeMappings.put ("&cup;",      "\u222a"); // union = cup, U+222A ISOtech
530
        unicodeMappings.put ("&int;",      "\u222b"); // integral, U+222B ISOtech
531
        unicodeMappings.put ("&there4;",   "\u2234"); // therefore, U+2234 ISOtech
532
        unicodeMappings.put ("&sim;",      "\u223c"); // tilde operator = varies with = similar to, U+223C ISOtech
533
        // tilde operator is NOT the same character as the tilde, U+007E,
534
        // although the same glyph might be used to represent both
535
        unicodeMappings.put ("&cong;",     "\u2245"); // approximately equal to, U+2245 ISOtech
536
        unicodeMappings.put ("&asymp;",    "\u2248"); // almost equal to = asymptotic to, U+2248 ISOamsr
537
        unicodeMappings.put ("&ne;",       "\u2260"); // not equal to, U+2260 ISOtech
538
        unicodeMappings.put ("&equiv;",    "\u2261"); // identical to, U+2261 ISOtech
539
        unicodeMappings.put ("&le;",       "\u2264"); // less-than or equal to, U+2264 ISOtech
540
        unicodeMappings.put ("&ge;",       "\u2265"); // greater-than or equal to, U+2265 ISOtech
541
        unicodeMappings.put ("&sub;",      "\u2282"); // subset of, U+2282 ISOtech
542
        unicodeMappings.put ("&sup;",      "\u2283"); // superset of, U+2283 ISOtech
543
        // note that nsup, 'not a superset of, U+2283' is not covered by the Symbol
544
        // font encoding and is not included. Should it be, for symmetry?
545
        // It is in ISOamsn
546
        unicodeMappings.put ("&nsub;",     "\u2284"); // not a subset of, U+2284 ISOamsn
547
        unicodeMappings.put ("&sube;",     "\u2286"); // subset of or equal to, U+2286 ISOtech
548
        unicodeMappings.put ("&supe;",     "\u2287"); // superset of or equal to, U+2287 ISOtech
549
        unicodeMappings.put ("&oplus;",    "\u2295"); // circled plus = direct sum, U+2295 ISOamsb
550
        unicodeMappings.put ("&otimes;",   "\u2297"); // circled times = vector product, U+2297 ISOamsb
551
        unicodeMappings.put ("&perp;",     "\u22a5"); // up tack = orthogonal to = perpendicular, U+22A5 ISOtech
552
        unicodeMappings.put ("&sdot;",     "\u22c5"); // dot operator, U+22C5 ISOamsb
553
        // dot operator is NOT the same character as U+00B7 middle dot
554
        // Miscellaneous Technical
555
        unicodeMappings.put ("&lceil;",    "\u2308"); // left ceiling = apl upstile, U+2308 ISOamsc
556
        unicodeMappings.put ("&rceil;",    "\u2309"); // right ceiling, U+2309 ISOamsc
557
        unicodeMappings.put ("&lfloor;",   "\u230a"); // left floor = apl downstile, U+230A ISOamsc
558
        unicodeMappings.put ("&rfloor;",   "\u230b"); // right floor, U+230B ISOamsc
559
        unicodeMappings.put ("&lang;",     "\u2329"); // left-pointing angle bracket = bra, U+2329 ISOtech
560
        // lang is NOT the same character as U+003C 'less than'
561
        // or U+2039 'single left-pointing angle quotation mark'
562
        unicodeMappings.put ("&rang;",     "\u232a"); // right-pointing angle bracket = ket, U+232A ISOtech
563
        // rang is NOT the same character as U+003E 'greater than'
564
        // or U+203A 'single right-pointing angle quotation mark'
565
        // Geometric Shapes
566
        unicodeMappings.put ("&loz;",      "\u25ca"); // lozenge, U+25CA ISOpub
567
        // Miscellaneous Symbols
568
        unicodeMappings.put ("&spades;",   "\u2660"); // black spade suit, U+2660 ISOpub
569
        // black here seems to mean filled as opposed to hollow
570
        unicodeMappings.put ("&clubs;",    "\u2663"); // black club suit = shamrock, U+2663 ISOpub
571
        unicodeMappings.put ("&hearts;",   "\u2665"); // black heart suit = valentine, U+2665 ISOpub
572
        unicodeMappings.put ("&diams;",    "\u2666"); // black diamond suit, U+2666 ISOpub
573
        // Special characters for HTML
574
        // Character entity set. Typical invocation:
575
        // <!ENTITY % HTMLspecial PUBLIC
576
        // "-//W3C//ENTITIES Special//EN//HTML">
577
        // %HTMLspecial;
578
        // Portions © International Organization for Standardization 1986:
579
        // Permission to copy in any form is granted for use with
580
        // conforming SGML systems and applications as defined in
581
        // ISO 8879, provided this notice is included in all copies.
582
        // Relevant ISO entity set is given unless names are newly introduced.
583
        // New names (i.e., not in ISO 8879 list) do not clash with any
584
        // existing ISO 8879 entity names. ISO 10646 character numbers
585
        // are given for each character, in hex. CDATA values are decimal
586
        // conversions of the ISO 10646 values and refer to the document
587
        // character set. Names are ISO 10646 names.
588
        // C0 Controls and Basic Latin
589
590
		// epugh  This is a werid one..  If I list it as \u0022, then it doesn't complile, because I think it is a double quote!
591
        unicodeMappings.put ("&quot;",     "\""); // quotation mark = APL quote, U+0022 ISOnum
592
        unicodeMappings.put ("&amp;",      "\u0026"); // ampersand, U+0026 ISOnum
593
        unicodeMappings.put ("&lt;",       "\u003c"); // less-than sign, U+003C ISOnum
594
        unicodeMappings.put ("&gt;",       "\u003e"); // greater-than sign, U+003E ISOnum
595
        // Latin Extended-A
596
        unicodeMappings.put ("&OElig;",    "\u0152"); // latin capital ligature OE, U+0152 ISOlat2
597
        unicodeMappings.put ("&oelig;",    "\u0153"); // latin small ligature oe, U+0153 ISOlat2
598
        // ligature is a misnomer, this is a separate character in some languages
599
        unicodeMappings.put ("&Scaron;",   "\u0160"); // latin capital letter S with caron, U+0160 ISOlat2
600
        unicodeMappings.put ("&scaron;",   "\u0161"); // latin small letter s with caron, U+0161 ISOlat2
601
        unicodeMappings.put ("&Yuml;",     "\u0178"); // latin capital letter Y with diaeresis, U+0178 ISOlat2
602
        // Spacing Modifier Letters
603
        unicodeMappings.put ("&circ;",     "\u02c6"); // modifier letter circumflex accent, U+02C6 ISOpub
604
        unicodeMappings.put ("&tilde;",    "\u02dc"); // small tilde, U+02DC ISOdia
605
        // General Punctuation
606
        unicodeMappings.put ("&ensp;",     "\u2002"); // en space, U+2002 ISOpub
607
        unicodeMappings.put ("&emsp;",     "\u2003"); // em space, U+2003 ISOpub
608
        unicodeMappings.put ("&thinsp;",   "\u2009"); // thin space, U+2009 ISOpub
609
        unicodeMappings.put ("&zwnj;",     "\u200c"); // zero width non-joiner, U+200C NEW RFC 2070
610
        unicodeMappings.put ("&zwj;",      "\u200d"); // zero width joiner, U+200D NEW RFC 2070
611
        unicodeMappings.put ("&lrm;",      "\u200e"); // left-to-right mark, U+200E NEW RFC 2070
612
        unicodeMappings.put ("&rlm;",      "\u200f"); // right-to-left mark, U+200F NEW RFC 2070
613
        unicodeMappings.put ("&ndash;",    "\u2013"); // en dash, U+2013 ISOpub
614
        unicodeMappings.put ("&mdash;",    "\u2014"); // em dash, U+2014 ISOpub
615
        unicodeMappings.put ("&lsquo;",    "\u2018"); // left single quotation mark, U+2018 ISOnum
616
        unicodeMappings.put ("&rsquo;",    "\u2019"); // right single quotation mark, U+2019 ISOnum
617
        unicodeMappings.put ("&sbquo;",    "\u201a"); // single low-9 quotation mark, U+201A NEW
618
        unicodeMappings.put ("&ldquo;",    "\u201c"); // left double quotation mark, U+201C ISOnum
619
        unicodeMappings.put ("&rdquo;",    "\u201d"); // right double quotation mark, U+201D ISOnum
620
        unicodeMappings.put ("&bdquo;",    "\u201e"); // double low-9 quotation mark, U+201E NEW
621
        unicodeMappings.put ("&dagger;",   "\u2020"); // dagger, U+2020 ISOpub
622
        unicodeMappings.put ("&Dagger;",   "\u2021"); // double dagger, U+2021 ISOpub
623
        unicodeMappings.put ("&permil;",   "\u2030"); // per mille sign, U+2030 ISOtech
624
        unicodeMappings.put ("&lsaquo;",   "\u2039"); // single left-pointing angle quotation mark, U+2039 ISO proposed
625
        // lsaquo is proposed but not yet ISO standardized
626
        unicodeMappings.put ("&rsaquo;",   "\u203a"); // single right-pointing angle quotation mark, U+203A ISO proposed
627
        // rsaquo is proposed but not yet ISO standardized
628
        unicodeMappings.put ("&euro;",     "\u20ac"); // euro sign, U+20AC NEW
298
    }
629
    }
299
630
300
}
631
}

Return to bug 21705