View | Details | Raw Unified | Return to issue 126230
Collapse All | Expand All

(-)a/main/starmath/source/smdetect.cxx (-53 / +83 lines)
Lines 74-80 Link Here
74
#include "document.hxx"
74
#include "document.hxx"
75
#include "eqnolefilehdr.hxx"
75
#include "eqnolefilehdr.hxx"
76
76
77
78
using namespace ::com::sun::star;
77
using namespace ::com::sun::star;
79
using namespace ::com::sun::star::uno;
78
using namespace ::com::sun::star::uno;
80
using namespace ::com::sun::star::io;
79
using namespace ::com::sun::star::io;
Lines 307-315 SmFilterDetect::~SmFilterDetect() Link Here
307
			}
306
			}
308
			else
307
			else
309
			{
308
			{
310
				//Test to see if this begins with xml and if so run it through
309
                // DesignScience Equation Editor MathType 3.0 ?
311
				//the MathML filter. There are all sorts of things wrong with
312
				//this approach, to be fixed at a better level than here
313
				SvStream *pStrm = aMedium.GetInStream();
310
				SvStream *pStrm = aMedium.GetInStream();
314
                aTypeName.Erase();
311
                aTypeName.Erase();
315
				if (pStrm && !pStrm->GetError())
312
				if (pStrm && !pStrm->GetError())
Lines 326-389 SmFilterDetect::~SmFilterDetect() Link Here
326
                    }
323
                    }
327
                    else
324
                    else
328
                    {
325
                    {
329
                        // #124636# detection should not only check for xml, but at least also for
326
                        // MathML? The SAX parser expects the 'math' root element incl.
330
                        // the math start element and the MathML URL. Additionally take their order
327
                        // the namespace URL. Neither '<?xml' prolog nor <!doctype is needed.
331
                        // into account. Also allow the case where the start element has a namespace
328
                        // If the math element has a prefix (e.g. <bla:math), the
332
                        // (e.g. <bla:math), but in that case ensure that it is in front of an evtl.
329
                        // prefix has to be defined in the namespace attribut
333
                        // xmlns:math namespace declaration and thus not part of that
330
                        // (e.g. xmlns:bla="http://www.w3.org/1998/Math/MathML")
334
                        const sal_uInt16 nReadSize(4095);
331
                        // #124636 is fixed too.
335
                        sal_Char aBuffer[nReadSize+1];
336
                        pStrm->Seek( STREAM_SEEK_TO_BEGIN );
332
                        pStrm->Seek( STREAM_SEEK_TO_BEGIN );
337
                        const sal_uLong nBytesRead(pStrm->Read( aBuffer, nReadSize ));
333
                        const size_t nBufSize=2048;
338
334
                        sal_uInt16 aBuffer[nBufSize]; // will be casted to an Unicode-Array below
339
                        if(nBytesRead > (5 + 1 + 34 + 5)) // xml + '>' + URL + '(<|:)math'
335
                        sal_uInt8* pByte = reinterpret_cast<sal_uInt8*>(aBuffer);
336
                        const sal_uLong nBytesRead(pStrm->Read( pByte, nBufSize * 2 ) );
337
                        const sal_uLong nUnicodeCharsRead (nBytesRead / 2 );
338
339
                        // For backwards searching an OUString is used. The conversion needs an
340
                        // encoding information. Default encoding is UTF-8, UTF-16 is possible
341
                        // (e.g. from MS "Math Input Control"), others are unlikely.
342
                        // Looking for Byte Order Mark
343
                        rtl_TextEncoding aEncoding = RTL_TEXTENCODING_UTF8;
344
                        bool bIsUnicode = false;
345
                        if (nBytesRead >= 2 && (aBuffer[0]==0xfffe || aBuffer[0]==0xfeff) )
340
                        {
346
                        {
341
                            // end string with null
347
                            aEncoding = RTL_TEXTENCODING_UNICODE;
342
                            aBuffer[nBytesRead + 1] = 0;
348
                            bIsUnicode = true;
343
349
                            if ( aBuffer[0] == 0xfffe)
344
                            // is it a xml file? 
350
                            { //swap bytes to make Big Endian
345
                            const sal_Char* pXML = strstr(aBuffer, "<?xml");
351
                              for (size_t i=0; i < nUnicodeCharsRead; ++i)
346
                            bool isMathFile(false);
352
                              {
353
                                  aBuffer[i] = (aBuffer[i]>>8) | (aBuffer[i]<<8) ;
354
                              }
355
                            }
356
                        }
347
357
348
                            if(pXML)
358
                        bool isMathFile(false);
359
                        if ( nBytesRead > 56) // minimal <math xmlns="http://www.w3.org/1998/Math/MathML"></math>
360
                        {
361
                            const sal_Char* pChar = reinterpret_cast<sal_Char*>(aBuffer);
362
                            sal_Unicode* pUnicode = (sal_Unicode*) aBuffer;
363
364
                            const OUString sFragment( (bIsUnicode)
365
                                   ? OUString( pUnicode , nUnicodeCharsRead )
366
                                   : OUString( pChar, nBytesRead, aEncoding) );
367
                            const sal_Int32 nFragmentLength(sFragment.getLength());
368
                            
369
                            // look for MathML URL http://www.w3.org/1998/Math/MathML
370
                            // #i53509 A MathML URL can be value of a namespace attribute, but can be as well
371
                            // inside a doctype e.g. [<!ENTITY mathml 'http://www.w3.org/1998/Math/MathML'>]
372
                            // or inside a schema reference e.g. s:schemaLocation="http://www.w3.org/1998/Math/MathML" 
373
                            // Use a loop to get the correct one.
374
                            const OUString sURL( OUString::createFromAscii("http://www.w3.org/1998/Math/MathML"));
375
                            const sal_Int32 nURLLength = sURL.getLength();
376
                            const OUString sEQ( OUString::createFromAscii("=") );
377
                            const OUString sXMLNS( OUString::createFromAscii("xmlns") );
378
                            sal_Int32 nPosURL = -1; // for index of first character of URL
379
                            sal_Int32 nPosURLSearchStart = 0;
380
                            sal_Int32 nPosEQ = -1; // for index of equal sign
381
                            sal_Int32 nPosXMLNS = -1; // for index of first character of string "xmlns"
382
                            do
349
                            {
383
                            {
350
                                // does it have the MathML URL?
384
                                nPosURL = sFragment.indexOf(sURL,nPosURLSearchStart);
351
                                const sal_Char* pURL = strstr(aBuffer, "http://www.w3.org/1998/Math/MathML");
385
                                if( nPosURL < 0 )
352
386
                                {
353
                                // URL has to be after XML start
387
                                    break; // no MathML URL, cannot be parsed
354
                                if(pURL && pURL > pXML)
388
                                }
389
                                // need 'xmlns:prefix =' or 'xmlns =', look backwards, first for equal sign
390
                                nPosEQ = sFragment.lastIndexOf(sEQ,nPosURL);
391
                                if (nPosEQ >= 0 && nPosEQ >= nPosURLSearchStart)
355
                                {
392
                                {
356
                                    // look if we have a direct math start element
393
                                    nPosXMLNS = sFragment.lastIndexOf(sXMLNS,nPosEQ);
357
                                    sal_Char* pMathStart = strstr(aBuffer, "<math");
394
                                    if( nPosXMLNS >= nPosURLSearchStart )
358
395
                                    { // an xmlns attribute is found, but it might belong to a schema
359
                                    if(!pMathStart)
396
                                        // get prefix if present
360
                                    {
397
                                        const OUString sPrefix = (sFragment.copy(nPosXMLNS+5,nPosEQ-(nPosXMLNS+5))).trim();
361
                                        // if not, look if we have a math start element in another namespace
398
                                        // such prefix definition must start with colon (will be removed below)
362
                                        pMathStart = strstr(aBuffer, ":math");
399
                                        bool bHasPrefix( (sPrefix.isEmpty()) ? false : sPrefix.toChar() == sal_Unicode(':') );
363
400
                                        // the math element starts either with '<prefix:math' or '<math'
364
                                        if(pMathStart)
401
                                        const OUString sMathStart( (bHasPrefix)
365
                                        {
402
                                                ?   OUString::createFromAscii("<") + sPrefix.copy(1,sPrefix.getLength()-1) + OUString::createFromAscii(":math")
366
                                            // if found, this has to be in front of the evtl. also existing namespace
403
                                                :   OUString::createFromAscii("<math") );
367
                                            // declaration also containing :math to be the start element
404
                                        sal_Int32 nPosMath (sFragment.lastIndexOf(sMathStart,nPosXMLNS));
368
                                            sal_Char* pNamespaceMath = strstr(aBuffer, "xmlns:math");
405
                                        if( nPosMath >= 0)
369
406
                                        {   // xmlns attribute belongs to math element
370
                                            if(pNamespaceMath && pMathStart > pNamespaceMath)
407
                                            isMathFile = true;
371
                                            {
408
                                            break;
372
                                                // invalid :math found (probably part of the namespace declaration)
373
                                                // -> this cannot be the math start element
374
                                                pMathStart = 0;
375
                                            }
376
                                        }
409
                                        }
377
                                    }
410
                                    }
378
379
                                    // MathStart has to be before the URL
380
                                    if(pMathStart && pMathStart < pURL)
381
                                    {
382
                                        isMathFile = true;
383
                                    }
384
                                }
411
                                }
412
                                // MathML URL was wrong one, look for next
413
                                nPosURLSearchStart = nPosURL + nURLLength;
385
                            }
414
                            }
386
415
                            while ( nPosURLSearchStart + nURLLength <= nFragmentLength);
416
                            
387
                            if(isMathFile)
417
                            if(isMathFile)
388
                            {
418
                            {
389
                                static const sal_Char sFltrNm_2[] = MATHML_XML;
419
                                static const sal_Char sFltrNm_2[] = MATHML_XML;

Return to issue 126230