Lines 78-84
Link Here
|
78 |
public class HSSFCellUtil |
78 |
public class HSSFCellUtil |
79 |
{ |
79 |
{ |
80 |
|
80 |
|
81 |
private static HashMap unicodeMappings = new HashMap(); |
81 |
private static Map unicodeMappings = new HashMap(); |
|
|
82 |
|
83 |
/** |
84 |
* Return the Map file of HTML escape to unicode mappings. |
85 |
* Currently protected as only the test cases needs access. |
86 |
* |
87 |
*@return The map where the key is the html and value is the Unicode. |
88 |
*/ |
89 |
protected static Map getUnicodeMappings(){ |
90 |
return unicodeMappings; |
91 |
} |
82 |
|
92 |
|
83 |
|
93 |
|
84 |
/** |
94 |
/** |
Lines 249-255
Link Here
|
249 |
|
259 |
|
250 |
/** |
260 |
/** |
251 |
* Looks for text in the cell that should be unicode, like α and provides the |
261 |
* Looks for text in the cell that should be unicode, like α and provides the |
252 |
* unicode version of it. |
262 |
* unicode version of it. It seems like CASE matters, for instance there are as HTML |
|
|
263 |
* escapes both ← and ⇐, which are different. Not sure if we want to deal |
264 |
* with those or not... |
253 |
* |
265 |
* |
254 |
*@param cell The cell to check for unicode values |
266 |
*@param cell The cell to check for unicode values |
255 |
*@return transalted to unicode |
267 |
*@return transalted to unicode |
Lines 264-270
Link Here
|
264 |
{ |
276 |
{ |
265 |
Map.Entry entry = (Map.Entry) i.next(); |
277 |
Map.Entry entry = (Map.Entry) i.next(); |
266 |
String key = (String) entry.getKey(); |
278 |
String key = (String) entry.getKey(); |
267 |
if ( s.toLowerCase().indexOf( key ) != -1 ) |
279 |
if ( s.indexOf( key ) != -1 ) |
268 |
{ |
280 |
{ |
269 |
s = StringUtils.replace( s, key, "" + entry.getValue().toString() + "" ); |
281 |
s = StringUtils.replace( s, key, "" + entry.getValue().toString() + "" ); |
270 |
foundUnicode = true; |
282 |
foundUnicode = true; |
Lines 278-300
Link Here
|
278 |
return cell; |
290 |
return cell; |
279 |
} |
291 |
} |
280 |
|
292 |
|
281 |
|
293 |
|
282 |
static { |
294 |
static { |
283 |
unicodeMappings.put( "α", "\u03B1" ); |
295 |
// Portions © International Organization for Standardization 1986 |
284 |
unicodeMappings.put( "β", "\u03B2" ); |
296 |
// Permission to copy in any form is granted for use with |
285 |
unicodeMappings.put( "γ", "\u03B3" ); |
297 |
// conforming SGML systems and applications as defined in |
286 |
unicodeMappings.put( "δ", "\u03B4" ); |
298 |
// ISO 8879, provided this notice is included in all copies. |
287 |
unicodeMappings.put( "ε", "\u03B5" ); |
299 |
// Character entity set. Typical invocation: |
288 |
unicodeMappings.put( "ζ", "\u03B6" ); |
300 |
// <!ENTITY % HTMLlat1 PUBLIC |
289 |
unicodeMappings.put( "η", "\u03B7" ); |
301 |
// "-//W3C//ENTITIES Latin 1//EN//HTML"> |
290 |
unicodeMappings.put( "θ", "\u03B8" ); |
302 |
// %HTMLlat1; |
291 |
unicodeMappings.put( "ι", "\u03B9" ); |
303 |
unicodeMappings.put (" ", "\u00a0"); // no-break space = non-breaking space, U+00A0 ISOnum |
292 |
unicodeMappings.put( "κ", "\u03BA" ); |
304 |
unicodeMappings.put ("¡", "\u00a1"); // inverted exclamation mark, U+00A1 ISOnum |
293 |
unicodeMappings.put( "λ", "\u03BB" ); |
305 |
unicodeMappings.put ("¢", "\u00a2"); // cent sign, U+00A2 ISOnum |
294 |
unicodeMappings.put( "μ", "\u03BC" ); |
306 |
unicodeMappings.put ("£", "\u00a3"); // pound sign, U+00A3 ISOnum |
295 |
unicodeMappings.put( "ν", "\u03BD" ); |
307 |
unicodeMappings.put ("¤", "\u00a4"); // currency sign, U+00A4 ISOnum |
296 |
unicodeMappings.put( "ξ", "\u03BE" ); |
308 |
unicodeMappings.put ("¥", "\u00a5"); // yen sign = yuan sign, U+00A5 ISOnum |
297 |
unicodeMappings.put( "ο", "\u03BF" ); |
309 |
unicodeMappings.put ("¦", "\u00a6"); // broken bar = broken vertical bar, U+00A6 ISOnum |
|
|
310 |
unicodeMappings.put ("§", "\u00a7"); // section sign, U+00A7 ISOnum |
311 |
unicodeMappings.put ("¨", "\u00a8"); // diaeresis = spacing diaeresis, U+00A8 ISOdia |
312 |
unicodeMappings.put ("©", "\u00a9"); // copyright sign, U+00A9 ISOnum |
313 |
unicodeMappings.put ("ª", "\u00aa"); // feminine ordinal indicator, U+00AA ISOnum |
314 |
unicodeMappings.put ("«", "\u00ab"); // left-pointing double angle quotation mark = left pointing guillemet, U+00AB ISOnum |
315 |
unicodeMappings.put ("¬", "\u00ac"); // not sign, U+00AC ISOnum |
316 |
unicodeMappings.put ("­", "\u00ad"); // soft hyphen = discretionary hyphen, U+00AD ISOnum |
317 |
unicodeMappings.put ("®", "\u00ae"); // registered sign = registered trade mark sign, U+00AE ISOnum |
318 |
unicodeMappings.put ("¯", "\u00af"); // macron = spacing macron = overline = APL overbar, U+00AF ISOdia |
319 |
unicodeMappings.put ("°", "\u00b0"); // degree sign, U+00B0 ISOnum |
320 |
unicodeMappings.put ("±", "\u00b1"); // plus-minus sign = plus-or-minus sign, U+00B1 ISOnum |
321 |
unicodeMappings.put ("²", "\u00b2"); // superscript two = superscript digit two = squared, U+00B2 ISOnum |
322 |
unicodeMappings.put ("³", "\u00b3"); // superscript three = superscript digit three = cubed, U+00B3 ISOnum |
323 |
unicodeMappings.put ("´", "\u00b4"); // acute accent = spacing acute, U+00B4 ISOdia |
324 |
unicodeMappings.put ("µ", "\u00b5"); // micro sign, U+00B5 ISOnum |
325 |
unicodeMappings.put ("¶", "\u00b6"); // pilcrow sign = paragraph sign, U+00B6 ISOnum |
326 |
unicodeMappings.put ("·", "\u00b7"); // middle dot = Georgian comma = Greek middle dot, U+00B7 ISOnum |
327 |
unicodeMappings.put ("¸", "\u00b8"); // cedilla = spacing cedilla, U+00B8 ISOdia |
328 |
unicodeMappings.put ("¹", "\u00b9"); // superscript one = superscript digit one, U+00B9 ISOnum |
329 |
unicodeMappings.put ("º", "\u00ba"); // masculine ordinal indicator, U+00BA ISOnum |
330 |
unicodeMappings.put ("»", "\u00bb"); // right-pointing double angle quotation mark = right pointing guillemet, U+00BB ISOnum |
331 |
unicodeMappings.put ("¼", "\u00bc"); // vulgar fraction one quarter = fraction one quarter, U+00BC ISOnum |
332 |
unicodeMappings.put ("½", "\u00bd"); // vulgar fraction one half = fraction one half, U+00BD ISOnum |
333 |
unicodeMappings.put ("¾", "\u00be"); // vulgar fraction three quarters = fraction three quarters, U+00BE ISOnum |
334 |
unicodeMappings.put ("¿", "\u00bf"); // inverted question mark = turned question mark, U+00BF ISOnum |
335 |
unicodeMappings.put ("À", "\u00c0"); // latin capital letter A with grave = latin capital letter A grave, U+00C0 ISOlat1 |
336 |
unicodeMappings.put ("Á", "\u00c1"); // latin capital letter A with acute, U+00C1 ISOlat1 |
337 |
unicodeMappings.put ("Â", "\u00c2"); // latin capital letter A with circumflex, U+00C2 ISOlat1 |
338 |
unicodeMappings.put ("Ã", "\u00c3"); // latin capital letter A with tilde, U+00C3 ISOlat1 |
339 |
unicodeMappings.put ("Ä", "\u00c4"); // latin capital letter A with diaeresis, U+00C4 ISOlat1 |
340 |
unicodeMappings.put ("Å", "\u00c5"); // latin capital letter A with ring above = latin capital letter A ring, U+00C5 ISOlat1 |
341 |
unicodeMappings.put ("Æ", "\u00c6"); // latin capital letter AE = latin capital ligature AE, U+00C6 ISOlat1 |
342 |
unicodeMappings.put ("Ç", "\u00c7"); // latin capital letter C with cedilla, U+00C7 ISOlat1 |
343 |
unicodeMappings.put ("È", "\u00c8"); // latin capital letter E with grave, U+00C8 ISOlat1 |
344 |
unicodeMappings.put ("É", "\u00c9"); // latin capital letter E with acute, U+00C9 ISOlat1 |
345 |
unicodeMappings.put ("Ê", "\u00ca"); // latin capital letter E with circumflex, U+00CA ISOlat1 |
346 |
unicodeMappings.put ("Ë", "\u00cb"); // latin capital letter E with diaeresis, U+00CB ISOlat1 |
347 |
unicodeMappings.put ("Ì", "\u00cc"); // latin capital letter I with grave, U+00CC ISOlat1 |
348 |
unicodeMappings.put ("Í", "\u00cd"); // latin capital letter I with acute, U+00CD ISOlat1 |
349 |
unicodeMappings.put ("Î", "\u00ce"); // latin capital letter I with circumflex, U+00CE ISOlat1 |
350 |
unicodeMappings.put ("Ï", "\u00cf"); // latin capital letter I with diaeresis, U+00CF ISOlat1 |
351 |
unicodeMappings.put ("Ð", "\u00d0"); // latin capital letter ETH, U+00D0 ISOlat1 |
352 |
unicodeMappings.put ("Ñ", "\u00d1"); // latin capital letter N with tilde, U+00D1 ISOlat1 |
353 |
unicodeMappings.put ("Ò", "\u00d2"); // latin capital letter O with grave, U+00D2 ISOlat1 |
354 |
unicodeMappings.put ("Ó", "\u00d3"); // latin capital letter O with acute, U+00D3 ISOlat1 |
355 |
unicodeMappings.put ("Ô", "\u00d4"); // latin capital letter O with circumflex, U+00D4 ISOlat1 |
356 |
unicodeMappings.put ("Õ", "\u00d5"); // latin capital letter O with tilde, U+00D5 ISOlat1 |
357 |
unicodeMappings.put ("Ö", "\u00d6"); // latin capital letter O with diaeresis, U+00D6 ISOlat1 |
358 |
unicodeMappings.put ("×", "\u00d7"); // multiplication sign, U+00D7 ISOnum |
359 |
unicodeMappings.put ("Ø", "\u00d8"); // latin capital letter O with stroke = latin capital letter O slash, U+00D8 ISOlat1 |
360 |
unicodeMappings.put ("Ù", "\u00d9"); // latin capital letter U with grave, U+00D9 ISOlat1 |
361 |
unicodeMappings.put ("Ú", "\u00da"); // latin capital letter U with acute, U+00DA ISOlat1 |
362 |
unicodeMappings.put ("Û", "\u00db"); // latin capital letter U with circumflex, U+00DB ISOlat1 |
363 |
unicodeMappings.put ("Ü", "\u00dc"); // latin capital letter U with diaeresis, U+00DC ISOlat1 |
364 |
unicodeMappings.put ("Ý", "\u00dd"); // latin capital letter Y with acute, U+00DD ISOlat1 |
365 |
unicodeMappings.put ("Þ", "\u00de"); // latin capital letter THORN, U+00DE ISOlat1 |
366 |
unicodeMappings.put ("ß", "\u00df"); // latin small letter sharp s = ess-zed, U+00DF ISOlat1 |
367 |
unicodeMappings.put ("à", "\u00e0"); // latin small letter a with grave = latin small letter a grave, U+00E0 ISOlat1 |
368 |
unicodeMappings.put ("á", "\u00e1"); // latin small letter a with acute, U+00E1 ISOlat1 |
369 |
unicodeMappings.put ("â", "\u00e2"); // latin small letter a with circumflex, U+00E2 ISOlat1 |
370 |
unicodeMappings.put ("ã", "\u00e3"); // latin small letter a with tilde, U+00E3 ISOlat1 |
371 |
unicodeMappings.put ("ä", "\u00e4"); // latin small letter a with diaeresis, U+00E4 ISOlat1 |
372 |
unicodeMappings.put ("å", "\u00e5"); // latin small letter a with ring above = latin small letter a ring, U+00E5 ISOlat1 |
373 |
unicodeMappings.put ("æ", "\u00e6"); // latin small letter ae = latin small ligature ae, U+00E6 ISOlat1 |
374 |
unicodeMappings.put ("ç", "\u00e7"); // latin small letter c with cedilla, U+00E7 ISOlat1 |
375 |
unicodeMappings.put ("è", "\u00e8"); // latin small letter e with grave, U+00E8 ISOlat1 |
376 |
unicodeMappings.put ("é", "\u00e9"); // latin small letter e with acute, U+00E9 ISOlat1 |
377 |
unicodeMappings.put ("ê", "\u00ea"); // latin small letter e with circumflex, U+00EA ISOlat1 |
378 |
unicodeMappings.put ("ë", "\u00eb"); // latin small letter e with diaeresis, U+00EB ISOlat1 |
379 |
unicodeMappings.put ("ì", "\u00ec"); // latin small letter i with grave, U+00EC ISOlat1 |
380 |
unicodeMappings.put ("í", "\u00ed"); // latin small letter i with acute, U+00ED ISOlat1 |
381 |
unicodeMappings.put ("î", "\u00ee"); // latin small letter i with circumflex, U+00EE ISOlat1 |
382 |
unicodeMappings.put ("ï", "\u00ef"); // latin small letter i with diaeresis, U+00EF ISOlat1 |
383 |
unicodeMappings.put ("ð", "\u00f0"); // latin small letter eth, U+00F0 ISOlat1 |
384 |
unicodeMappings.put ("ñ", "\u00f1"); // latin small letter n with tilde, U+00F1 ISOlat1 |
385 |
unicodeMappings.put ("ò", "\u00f2"); // latin small letter o with grave, U+00F2 ISOlat1 |
386 |
unicodeMappings.put ("ó", "\u00f3"); // latin small letter o with acute, U+00F3 ISOlat1 |
387 |
unicodeMappings.put ("ô", "\u00f4"); // latin small letter o with circumflex, U+00F4 ISOlat1 |
388 |
unicodeMappings.put ("õ", "\u00f5"); // latin small letter o with tilde, U+00F5 ISOlat1 |
389 |
unicodeMappings.put ("ö", "\u00f6"); // latin small letter o with diaeresis, U+00F6 ISOlat1 |
390 |
unicodeMappings.put ("÷", "\u00f7"); // division sign, U+00F7 ISOnum |
391 |
unicodeMappings.put ("ø", "\u00f8"); // latin small letter o with stroke, = latin small letter o slash, U+00F8 ISOlat1 |
392 |
unicodeMappings.put ("ù", "\u00f9"); // latin small letter u with grave, U+00F9 ISOlat1 |
393 |
unicodeMappings.put ("ú", "\u00fa"); // latin small letter u with acute, U+00FA ISOlat1 |
394 |
unicodeMappings.put ("û", "\u00fb"); // latin small letter u with circumflex, U+00FB ISOlat1 |
395 |
unicodeMappings.put ("ü", "\u00fc"); // latin small letter u with diaeresis, U+00FC ISOlat1 |
396 |
unicodeMappings.put ("ý", "\u00fd"); // latin small letter y with acute, U+00FD ISOlat1 |
397 |
unicodeMappings.put ("þ", "\u00fe"); // latin small letter thorn, U+00FE ISOlat1 |
398 |
unicodeMappings.put ("ÿ", "\u00ff"); // latin small letter y with diaeresis, U+00FF ISOlat1 |
399 |
// Mathematical, Greek and Symbolic characters for HTML |
400 |
// Character entity set. Typical invocation: |
401 |
// <!ENTITY % HTMLsymbol PUBLIC |
402 |
// "-//W3C//ENTITIES Symbols//EN//HTML"> |
403 |
// %HTMLsymbol; |
404 |
// Portions © International Organization for Standardization 1986: |
405 |
// Permission to copy in any form is granted for use with |
406 |
// conforming SGML systems and applications as defined in |
407 |
// ISO 8879, provided this notice is included in all copies. |
408 |
// Relevant ISO entity set is given unless names are newly introduced. |
409 |
// New names (i.e., not in ISO 8879 list) do not clash with any |
410 |
// existing ISO 8879 entity names. ISO 10646 character numbers |
411 |
// are given for each character, in hex. CDATA values are decimal |
412 |
// conversions of the ISO 10646 values and refer to the document |
413 |
// character set. Names are ISO 10646 names. |
414 |
// Latin Extended-B |
415 |
unicodeMappings.put ("ƒ", "\u0192"); // latin small f with hook = function = florin, U+0192 ISOtech |
416 |
// Greek |
417 |
unicodeMappings.put ("Α", "\u0391"); // greek capital letter alpha, U+0391 |
418 |
unicodeMappings.put ("Β", "\u0392"); // greek capital letter beta, U+0392 |
419 |
unicodeMappings.put ("Γ", "\u0393"); // greek capital letter gamma, U+0393 ISOgrk3 |
420 |
unicodeMappings.put ("Δ", "\u0394"); // greek capital letter delta, U+0394 ISOgrk3 |
421 |
unicodeMappings.put ("Ε", "\u0395"); // greek capital letter epsilon, U+0395 |
422 |
unicodeMappings.put ("Ζ", "\u0396"); // greek capital letter zeta, U+0396 |
423 |
unicodeMappings.put ("Η", "\u0397"); // greek capital letter eta, U+0397 |
424 |
unicodeMappings.put ("Θ", "\u0398"); // greek capital letter theta, U+0398 ISOgrk3 |
425 |
unicodeMappings.put ("Ι", "\u0399"); // greek capital letter iota, U+0399 |
426 |
unicodeMappings.put ("Κ", "\u039a"); // greek capital letter kappa, U+039A |
427 |
unicodeMappings.put ("Λ", "\u039b"); // greek capital letter lambda, U+039B ISOgrk3 |
428 |
unicodeMappings.put ("Μ", "\u039c"); // greek capital letter mu, U+039C |
429 |
unicodeMappings.put ("Ν", "\u039d"); // greek capital letter nu, U+039D |
430 |
unicodeMappings.put ("Ξ", "\u039e"); // greek capital letter xi, U+039E ISOgrk3 |
431 |
unicodeMappings.put ("Ο", "\u039f"); // greek capital letter omicron, U+039F |
432 |
unicodeMappings.put ("Π", "\u03a0"); // greek capital letter pi, U+03A0 ISOgrk3 |
433 |
unicodeMappings.put ("Ρ", "\u03a1"); // greek capital letter rho, U+03A1 |
434 |
// there is no Sigmaf, and no U+03A2 character either |
435 |
unicodeMappings.put ("Σ", "\u03a3"); // greek capital letter sigma, U+03A3 ISOgrk3 |
436 |
unicodeMappings.put ("Τ", "\u03a4"); // greek capital letter tau, U+03A4 |
437 |
unicodeMappings.put ("Υ", "\u03a5"); // greek capital letter upsilon, U+03A5 ISOgrk3 |
438 |
unicodeMappings.put ("Φ", "\u03a6"); // greek capital letter phi, U+03A6 ISOgrk3 |
439 |
unicodeMappings.put ("Χ", "\u03a7"); // greek capital letter chi, U+03A7 |
440 |
unicodeMappings.put ("Ψ", "\u03a8"); // greek capital letter psi, U+03A8 ISOgrk3 |
441 |
unicodeMappings.put ("Ω", "\u03a9"); // greek capital letter omega, U+03A9 ISOgrk3 |
442 |
unicodeMappings.put ("α", "\u03b1"); // greek small letter alpha, U+03B1 ISOgrk3 |
443 |
unicodeMappings.put ("β", "\u03b2"); // greek small letter beta, U+03B2 ISOgrk3 |
444 |
unicodeMappings.put ("γ", "\u03b3"); // greek small letter gamma, U+03B3 ISOgrk3 |
445 |
unicodeMappings.put ("δ", "\u03b4"); // greek small letter delta, U+03B4 ISOgrk3 |
446 |
unicodeMappings.put ("ε", "\u03b5"); // greek small letter epsilon, U+03B5 ISOgrk3 |
447 |
unicodeMappings.put ("ζ", "\u03b6"); // greek small letter zeta, U+03B6 ISOgrk3 |
448 |
unicodeMappings.put ("η", "\u03b7"); // greek small letter eta, U+03B7 ISOgrk3 |
449 |
unicodeMappings.put ("θ", "\u03b8"); // greek small letter theta, U+03B8 ISOgrk3 |
450 |
unicodeMappings.put ("ι", "\u03b9"); // greek small letter iota, U+03B9 ISOgrk3 |
451 |
unicodeMappings.put ("κ", "\u03ba"); // greek small letter kappa, U+03BA ISOgrk3 |
452 |
unicodeMappings.put ("λ", "\u03bb"); // greek small letter lambda, U+03BB ISOgrk3 |
453 |
unicodeMappings.put ("μ", "\u03bc"); // greek small letter mu, U+03BC ISOgrk3 |
454 |
unicodeMappings.put ("ν", "\u03bd"); // greek small letter nu, U+03BD ISOgrk3 |
455 |
unicodeMappings.put ("ξ", "\u03be"); // greek small letter xi, U+03BE ISOgrk3 |
456 |
unicodeMappings.put ("ο", "\u03bf"); // greek small letter omicron, U+03BF NEW |
457 |
unicodeMappings.put ("π", "\u03c0"); // greek small letter pi, U+03C0 ISOgrk3 |
458 |
unicodeMappings.put ("ρ", "\u03c1"); // greek small letter rho, U+03C1 ISOgrk3 |
459 |
unicodeMappings.put ("ς", "\u03c2"); // greek small letter final sigma, U+03C2 ISOgrk3 |
460 |
unicodeMappings.put ("σ", "\u03c3"); // greek small letter sigma, U+03C3 ISOgrk3 |
461 |
unicodeMappings.put ("τ", "\u03c4"); // greek small letter tau, U+03C4 ISOgrk3 |
462 |
unicodeMappings.put ("υ", "\u03c5"); // greek small letter upsilon, U+03C5 ISOgrk3 |
463 |
unicodeMappings.put ("φ", "\u03c6"); // greek small letter phi, U+03C6 ISOgrk3 |
464 |
unicodeMappings.put ("χ", "\u03c7"); // greek small letter chi, U+03C7 ISOgrk3 |
465 |
unicodeMappings.put ("ψ", "\u03c8"); // greek small letter psi, U+03C8 ISOgrk3 |
466 |
unicodeMappings.put ("ω", "\u03c9"); // greek small letter omega, U+03C9 ISOgrk3 |
467 |
unicodeMappings.put ("ϑ", "\u03d1"); // greek small letter theta symbol, U+03D1 NEW |
468 |
unicodeMappings.put ("ϒ", "\u03d2"); // greek upsilon with hook symbol, U+03D2 NEW |
469 |
unicodeMappings.put ("ϖ", "\u03d6"); // greek pi symbol, U+03D6 ISOgrk3 |
470 |
// General Punctuation |
471 |
unicodeMappings.put ("•", "\u2022"); // bullet = black small circle, U+2022 ISOpub |
472 |
// bullet is NOT the same as bullet operator, U+2219 |
473 |
unicodeMappings.put ("…", "\u2026"); // horizontal ellipsis = three dot leader, U+2026 ISOpub |
474 |
unicodeMappings.put ("′", "\u2032"); // prime = minutes = feet, U+2032 ISOtech |
475 |
unicodeMappings.put ("″", "\u2033"); // double prime = seconds = inches, U+2033 ISOtech |
476 |
unicodeMappings.put ("‾", "\u203e"); // overline = spacing overscore, U+203E NEW |
477 |
unicodeMappings.put ("⁄", "\u2044"); // fraction slash, U+2044 NEW |
478 |
// Letterlike Symbols |
479 |
unicodeMappings.put ("℘", "\u2118"); // script capital P = power set = Weierstrass p, U+2118 ISOamso |
480 |
unicodeMappings.put ("ℑ", "\u2111"); // blackletter capital I = imaginary part, U+2111 ISOamso |
481 |
unicodeMappings.put ("ℜ", "\u211c"); // blackletter capital R = real part symbol, U+211C ISOamso |
482 |
unicodeMappings.put ("™", "\u2122"); // trade mark sign, U+2122 ISOnum |
483 |
unicodeMappings.put ("ℵ", "\u2135"); // alef symbol = first transfinite cardinal, U+2135 NEW |
484 |
// alef symbol is NOT the same as hebrew letter alef, |
485 |
// U+05D0 although the same glyph could be used to depict both characters |
486 |
// Arrows |
487 |
unicodeMappings.put ("←", "\u2190"); // leftwards arrow, U+2190 ISOnum |
488 |
unicodeMappings.put ("↑", "\u2191"); // upwards arrow, U+2191 ISOnum |
489 |
unicodeMappings.put ("→", "\u2192"); // rightwards arrow, U+2192 ISOnum |
490 |
unicodeMappings.put ("↓", "\u2193"); // downwards arrow, U+2193 ISOnum |
491 |
unicodeMappings.put ("↔", "\u2194"); // left right arrow, U+2194 ISOamsa |
492 |
unicodeMappings.put ("↵", "\u21b5"); // downwards arrow with corner leftwards = carriage return, U+21B5 NEW |
493 |
unicodeMappings.put ("⇐", "\u21d0"); // leftwards double arrow, U+21D0 ISOtech |
494 |
// ISO 10646 does not say that lArr is the same as the 'is implied by' arrow |
495 |
// but also does not have any other character for that function. So ? lArr can |
496 |
// be used for 'is implied by' as ISOtech suggests |
497 |
unicodeMappings.put ("⇑", "\u21d1"); // upwards double arrow, U+21D1 ISOamsa |
498 |
unicodeMappings.put ("⇒", "\u21d2"); // rightwards double arrow, U+21D2 ISOtech |
499 |
// ISO 10646 does not say this is the 'implies' character but does not have |
500 |
// another character with this function so ? |
501 |
// rArr can be used for 'implies' as ISOtech suggests |
502 |
unicodeMappings.put ("⇓", "\u21d3"); // downwards double arrow, U+21D3 ISOamsa |
503 |
unicodeMappings.put ("⇔", "\u21d4"); // left right double arrow, U+21D4 ISOamsa |
504 |
// Mathematical Operators |
505 |
unicodeMappings.put ("∀", "\u2200"); // for all, U+2200 ISOtech |
506 |
unicodeMappings.put ("∂", "\u2202"); // partial differential, U+2202 ISOtech |
507 |
unicodeMappings.put ("∃", "\u2203"); // there exists, U+2203 ISOtech |
508 |
unicodeMappings.put ("∅", "\u2205"); // empty set = null set = diameter, U+2205 ISOamso |
509 |
unicodeMappings.put ("∇", "\u2207"); // nabla = backward difference, U+2207 ISOtech |
510 |
unicodeMappings.put ("∈", "\u2208"); // element of, U+2208 ISOtech |
511 |
unicodeMappings.put ("∉", "\u2209"); // not an element of, U+2209 ISOtech |
512 |
unicodeMappings.put ("∋", "\u220b"); // contains as member, U+220B ISOtech |
513 |
// should there be a more memorable name than 'ni'? |
514 |
unicodeMappings.put ("∏", "\u220f"); // n-ary product = product sign, U+220F ISOamsb |
515 |
// prod is NOT the same character as U+03A0 'greek capital letter pi' though |
516 |
// the same glyph might be used for both |
517 |
unicodeMappings.put ("∑", "\u2211"); // n-ary sumation, U+2211 ISOamsb |
518 |
// sum is NOT the same character as U+03A3 'greek capital letter sigma' |
519 |
// though the same glyph might be used for both |
520 |
unicodeMappings.put ("−", "\u2212"); // minus sign, U+2212 ISOtech |
521 |
unicodeMappings.put ("∗", "\u2217"); // asterisk operator, U+2217 ISOtech |
522 |
unicodeMappings.put ("√", "\u221a"); // square root = radical sign, U+221A ISOtech |
523 |
unicodeMappings.put ("∝", "\u221d"); // proportional to, U+221D ISOtech |
524 |
unicodeMappings.put ("∞", "\u221e"); // infinity, U+221E ISOtech |
525 |
unicodeMappings.put ("∠", "\u2220"); // angle, U+2220 ISOamso |
526 |
unicodeMappings.put ("∧", "\u2227"); // logical and = wedge, U+2227 ISOtech |
527 |
unicodeMappings.put ("∨", "\u2228"); // logical or = vee, U+2228 ISOtech |
528 |
unicodeMappings.put ("∩", "\u2229"); // intersection = cap, U+2229 ISOtech |
529 |
unicodeMappings.put ("∪", "\u222a"); // union = cup, U+222A ISOtech |
530 |
unicodeMappings.put ("∫", "\u222b"); // integral, U+222B ISOtech |
531 |
unicodeMappings.put ("∴", "\u2234"); // therefore, U+2234 ISOtech |
532 |
unicodeMappings.put ("∼", "\u223c"); // tilde operator = varies with = similar to, U+223C ISOtech |
533 |
// tilde operator is NOT the same character as the tilde, U+007E, |
534 |
// although the same glyph might be used to represent both |
535 |
unicodeMappings.put ("≅", "\u2245"); // approximately equal to, U+2245 ISOtech |
536 |
unicodeMappings.put ("≈", "\u2248"); // almost equal to = asymptotic to, U+2248 ISOamsr |
537 |
unicodeMappings.put ("≠", "\u2260"); // not equal to, U+2260 ISOtech |
538 |
unicodeMappings.put ("≡", "\u2261"); // identical to, U+2261 ISOtech |
539 |
unicodeMappings.put ("≤", "\u2264"); // less-than or equal to, U+2264 ISOtech |
540 |
unicodeMappings.put ("≥", "\u2265"); // greater-than or equal to, U+2265 ISOtech |
541 |
unicodeMappings.put ("⊂", "\u2282"); // subset of, U+2282 ISOtech |
542 |
unicodeMappings.put ("⊃", "\u2283"); // superset of, U+2283 ISOtech |
543 |
// note that nsup, 'not a superset of, U+2283' is not covered by the Symbol |
544 |
// font encoding and is not included. Should it be, for symmetry? |
545 |
// It is in ISOamsn |
546 |
unicodeMappings.put ("⊄", "\u2284"); // not a subset of, U+2284 ISOamsn |
547 |
unicodeMappings.put ("⊆", "\u2286"); // subset of or equal to, U+2286 ISOtech |
548 |
unicodeMappings.put ("⊇", "\u2287"); // superset of or equal to, U+2287 ISOtech |
549 |
unicodeMappings.put ("⊕", "\u2295"); // circled plus = direct sum, U+2295 ISOamsb |
550 |
unicodeMappings.put ("⊗", "\u2297"); // circled times = vector product, U+2297 ISOamsb |
551 |
unicodeMappings.put ("⊥", "\u22a5"); // up tack = orthogonal to = perpendicular, U+22A5 ISOtech |
552 |
unicodeMappings.put ("⋅", "\u22c5"); // dot operator, U+22C5 ISOamsb |
553 |
// dot operator is NOT the same character as U+00B7 middle dot |
554 |
// Miscellaneous Technical |
555 |
unicodeMappings.put ("⌈", "\u2308"); // left ceiling = apl upstile, U+2308 ISOamsc |
556 |
unicodeMappings.put ("⌉", "\u2309"); // right ceiling, U+2309 ISOamsc |
557 |
unicodeMappings.put ("⌊", "\u230a"); // left floor = apl downstile, U+230A ISOamsc |
558 |
unicodeMappings.put ("⌋", "\u230b"); // right floor, U+230B ISOamsc |
559 |
unicodeMappings.put ("⟨", "\u2329"); // left-pointing angle bracket = bra, U+2329 ISOtech |
560 |
// lang is NOT the same character as U+003C 'less than' |
561 |
// or U+2039 'single left-pointing angle quotation mark' |
562 |
unicodeMappings.put ("⟩", "\u232a"); // right-pointing angle bracket = ket, U+232A ISOtech |
563 |
// rang is NOT the same character as U+003E 'greater than' |
564 |
// or U+203A 'single right-pointing angle quotation mark' |
565 |
// Geometric Shapes |
566 |
unicodeMappings.put ("◊", "\u25ca"); // lozenge, U+25CA ISOpub |
567 |
// Miscellaneous Symbols |
568 |
unicodeMappings.put ("♠", "\u2660"); // black spade suit, U+2660 ISOpub |
569 |
// black here seems to mean filled as opposed to hollow |
570 |
unicodeMappings.put ("♣", "\u2663"); // black club suit = shamrock, U+2663 ISOpub |
571 |
unicodeMappings.put ("♥", "\u2665"); // black heart suit = valentine, U+2665 ISOpub |
572 |
unicodeMappings.put ("♦", "\u2666"); // black diamond suit, U+2666 ISOpub |
573 |
// Special characters for HTML |
574 |
// Character entity set. Typical invocation: |
575 |
// <!ENTITY % HTMLspecial PUBLIC |
576 |
// "-//W3C//ENTITIES Special//EN//HTML"> |
577 |
// %HTMLspecial; |
578 |
// Portions © International Organization for Standardization 1986: |
579 |
// Permission to copy in any form is granted for use with |
580 |
// conforming SGML systems and applications as defined in |
581 |
// ISO 8879, provided this notice is included in all copies. |
582 |
// Relevant ISO entity set is given unless names are newly introduced. |
583 |
// New names (i.e., not in ISO 8879 list) do not clash with any |
584 |
// existing ISO 8879 entity names. ISO 10646 character numbers |
585 |
// are given for each character, in hex. CDATA values are decimal |
586 |
// conversions of the ISO 10646 values and refer to the document |
587 |
// character set. Names are ISO 10646 names. |
588 |
// C0 Controls and Basic Latin |
589 |
|
590 |
// epugh This is a werid one.. If I list it as \u0022, then it doesn't complile, because I think it is a double quote! |
591 |
unicodeMappings.put (""", "\""); // quotation mark = APL quote, U+0022 ISOnum |
592 |
unicodeMappings.put ("&", "\u0026"); // ampersand, U+0026 ISOnum |
593 |
unicodeMappings.put ("<", "\u003c"); // less-than sign, U+003C ISOnum |
594 |
unicodeMappings.put (">", "\u003e"); // greater-than sign, U+003E ISOnum |
595 |
// Latin Extended-A |
596 |
unicodeMappings.put ("Œ", "\u0152"); // latin capital ligature OE, U+0152 ISOlat2 |
597 |
unicodeMappings.put ("œ", "\u0153"); // latin small ligature oe, U+0153 ISOlat2 |
598 |
// ligature is a misnomer, this is a separate character in some languages |
599 |
unicodeMappings.put ("Š", "\u0160"); // latin capital letter S with caron, U+0160 ISOlat2 |
600 |
unicodeMappings.put ("š", "\u0161"); // latin small letter s with caron, U+0161 ISOlat2 |
601 |
unicodeMappings.put ("Ÿ", "\u0178"); // latin capital letter Y with diaeresis, U+0178 ISOlat2 |
602 |
// Spacing Modifier Letters |
603 |
unicodeMappings.put ("ˆ", "\u02c6"); // modifier letter circumflex accent, U+02C6 ISOpub |
604 |
unicodeMappings.put ("˜", "\u02dc"); // small tilde, U+02DC ISOdia |
605 |
// General Punctuation |
606 |
unicodeMappings.put (" ", "\u2002"); // en space, U+2002 ISOpub |
607 |
unicodeMappings.put (" ", "\u2003"); // em space, U+2003 ISOpub |
608 |
unicodeMappings.put (" ", "\u2009"); // thin space, U+2009 ISOpub |
609 |
unicodeMappings.put ("‌", "\u200c"); // zero width non-joiner, U+200C NEW RFC 2070 |
610 |
unicodeMappings.put ("‍", "\u200d"); // zero width joiner, U+200D NEW RFC 2070 |
611 |
unicodeMappings.put ("‎", "\u200e"); // left-to-right mark, U+200E NEW RFC 2070 |
612 |
unicodeMappings.put ("‏", "\u200f"); // right-to-left mark, U+200F NEW RFC 2070 |
613 |
unicodeMappings.put ("–", "\u2013"); // en dash, U+2013 ISOpub |
614 |
unicodeMappings.put ("—", "\u2014"); // em dash, U+2014 ISOpub |
615 |
unicodeMappings.put ("‘", "\u2018"); // left single quotation mark, U+2018 ISOnum |
616 |
unicodeMappings.put ("’", "\u2019"); // right single quotation mark, U+2019 ISOnum |
617 |
unicodeMappings.put ("‚", "\u201a"); // single low-9 quotation mark, U+201A NEW |
618 |
unicodeMappings.put ("“", "\u201c"); // left double quotation mark, U+201C ISOnum |
619 |
unicodeMappings.put ("”", "\u201d"); // right double quotation mark, U+201D ISOnum |
620 |
unicodeMappings.put ("„", "\u201e"); // double low-9 quotation mark, U+201E NEW |
621 |
unicodeMappings.put ("†", "\u2020"); // dagger, U+2020 ISOpub |
622 |
unicodeMappings.put ("‡", "\u2021"); // double dagger, U+2021 ISOpub |
623 |
unicodeMappings.put ("‰", "\u2030"); // per mille sign, U+2030 ISOtech |
624 |
unicodeMappings.put ("‹", "\u2039"); // single left-pointing angle quotation mark, U+2039 ISO proposed |
625 |
// lsaquo is proposed but not yet ISO standardized |
626 |
unicodeMappings.put ("›", "\u203a"); // single right-pointing angle quotation mark, U+203A ISO proposed |
627 |
// rsaquo is proposed but not yet ISO standardized |
628 |
unicodeMappings.put ("€", "\u20ac"); // euro sign, U+20AC NEW |
298 |
} |
629 |
} |
299 |
|
630 |
|
300 |
} |
631 |
} |