View | Details | Raw Unified | Return to issue 126230
Collapse All | Expand All

(-)a/main/starmath/source/smdetect.cxx (-51 / +59 lines)
Lines 74-80 Link Here
74
#include "document.hxx"
74
#include "document.hxx"
75
#include "eqnolefilehdr.hxx"
75
#include "eqnolefilehdr.hxx"
76
76
77
78
using namespace ::com::sun::star;
77
using namespace ::com::sun::star;
79
using namespace ::com::sun::star::uno;
78
using namespace ::com::sun::star::uno;
80
using namespace ::com::sun::star::io;
79
using namespace ::com::sun::star::io;
Lines 307-315 SmFilterDetect::~SmFilterDetect() Link Here
307
			}
306
			}
308
			else
307
			else
309
			{
308
			{
310
				//Test to see if this begins with xml and if so run it through
309
                // DesignScience Equation Editor MathType 3.0
311
				//the MathML filter. There are all sorts of things wrong with
312
				//this approach, to be fixed at a better level than here
313
				SvStream *pStrm = aMedium.GetInStream();
310
				SvStream *pStrm = aMedium.GetInStream();
314
                aTypeName.Erase();
311
                aTypeName.Erase();
315
				if (pStrm && !pStrm->GetError())
312
				if (pStrm && !pStrm->GetError())
Lines 326-389 SmFilterDetect::~SmFilterDetect() Link Here
326
                    }
323
                    }
327
                    else
324
                    else
328
                    {
325
                    {
329
                        // #124636# detection should not only check for xml, but at least also for
326
                        // MathML. The SAX parser expects the 'math' root element incl.
330
                        // the math start element and the MathML URL. Additionally take their order
327
                        // the namespace URL. Neither '<?xml' prolog nor <!doctype is needed.
331
                        // into account. Also allow the case where the start element has a namespace
328
                        // If the math element has a prefix (e.g. <bla:math), the
332
                        // (e.g. <bla:math), but in that case ensure that it is in front of an evtl.
329
                        // prefix has to be defined in the namespace attribut
333
                        // xmlns:math namespace declaration and thus not part of that
330
                        // (e.g. xmlns:bla="http://www.w3.org/1998/Math/MathML")
334
                        const sal_uInt16 nReadSize(4095);
331
                        // #124636 is fixed too.
335
                        sal_Char aBuffer[nReadSize+1];
336
                        pStrm->Seek( STREAM_SEEK_TO_BEGIN );
332
                        pStrm->Seek( STREAM_SEEK_TO_BEGIN );
337
                        const sal_uLong nBytesRead(pStrm->Read( aBuffer, nReadSize ));
333
                        const size_t nBufSize=2048;
338
334
                        sal_uInt16 aBuffer[nBufSize]; // will be casted to an Unicode-Array below
339
                        if(nBytesRead > (5 + 1 + 34 + 5)) // xml + '>' + URL + '(<|:)math'
335
                        sal_uInt8* pByte = reinterpret_cast<sal_uInt8*>(aBuffer);
336
                        const sal_uLong nBytesRead(pStrm->Read( pByte, nBufSize * 2 ) );
337
                        const sal_uLong nUnicodeCharsRead (nBytesRead / 2 );
338
339
                        // For backwards searching an OUString is used. The conversion needs an
340
                        // encoding information. Default endocing is UTF-8, UTF-16 is possible
341
                        // (e.g. from MS "Math Input Control"), others are unlikely.
342
                        // Looking for Byte Order Mark
343
                        rtl_TextEncoding eEncoding = RTL_TEXTENCODING_UTF8;
344
                        bool bIsUnicode = false;
345
                        if (nBytesRead >= 2 && (aBuffer[0]==0xfffe || aBuffer[0]==0xfeff) )
340
                        {
346
                        {
341
                            // end string with null
347
                            eEncoding = RTL_TEXTENCODING_UNICODE;
342
                            aBuffer[nBytesRead + 1] = 0;
348
                            bIsUnicode = true;
343
349
                        }
344
                            // is it a xml file? 
345
                            const sal_Char* pXML = strstr(aBuffer, "<?xml");
346
                            bool isMathFile(false);
347
350
348
                            if(pXML)
351
                        bool isMathFile(false);
352
                        if ( nBytesRead > 56) // minimal length of 'math' element incl. namespace URL in UTF-8
353
                        {
354
                            const sal_Char* pChar = reinterpret_cast<sal_Char*>(aBuffer);
355
                            sal_Unicode* pUnicode = (sal_Unicode*) aBuffer;
356
357
                            OUString sFragment( (bIsUnicode)  ?
358
                                     OUString( pUnicode , nUnicodeCharsRead )
359
                                   : OUString( pChar, nBytesRead, eEncoding) );
360
361
                            // look for MathML URL
362
                            OUString sURL( OUString::createFromAscii("http://www.w3.org/1998/Math/MathML"));
363
                            sal_Int32 nPosURL;
364
                            nPosURL = sFragment.indexOf(sURL);
365
                            if (nPosURL >= 0)
349
                            {
366
                            {
350
                                // does it have the MathML URL?
367
                                // The URL has to be value of an xmlns attribute
351
                                const sal_Char* pURL = strstr(aBuffer, "http://www.w3.org/1998/Math/MathML");
368
                                sal_Int32 nPosEQ;
352
369
                                nPosEQ = sFragment.lastIndexOf(OUString::createFromAscii("="),nPosURL);
353
                                // URL has to be after XML start
370
                                if (nPosEQ >= 0)
354
                                if(pURL && pURL > pXML)
355
                                {
371
                                {
356
                                    // look if we have a direct math start element
372
                                    OUString sXMLNS( OUString::createFromAscii("xmlns"));
357
                                    sal_Char* pMathStart = strstr(aBuffer, "<math");
373
                                    sal_Int32 nPosXMLNS;
358
374
                                    nPosXMLNS = sFragment.lastIndexOf(sXMLNS,nPosEQ);
359
                                    if(!pMathStart)
375
                                    if(nPosXMLNS >= 0)
360
                                    {
376
                                    {
361
                                        // if not, look if we have a math start element in another namespace
377
                                        // a prefix might be defined
362
                                        pMathStart = strstr(aBuffer, ":math");
378
                                        OUString sPrefix = (sFragment.copy(nPosXMLNS+5,nPosEQ-(nPosXMLNS+5))).trim();
363
379
                                        // such prefix definition must start with colon (will be removed below)
364
                                        if(pMathStart)
380
                                        bool bHasPrefix( (sPrefix.isEmpty()) ? false : sPrefix.toChar() == sal_Unicode(':') );
381
382
                                        // now look for math element start '<prefix:math' or '<math'
383
                                        OUString sMathStart( (bHasPrefix) ?
384
                                                    OUString::createFromAscii("<") + sPrefix.copy(1,sPrefix.getLength()-1) + OUString::createFromAscii(":math")
385
                                                :   OUString::createFromAscii("<math") );
386
                                        sal_Int32 nPosMath;
387
                                        nPosMath = sFragment.lastIndexOf(sMathStart,nPosXMLNS);
388
                                        if( nPosMath >= 0)
365
                                        {
389
                                        {
366
                                            // if found, this has to be in front of the evtl. also existing namespace
390
                                            isMathFile = true;
367
                                            // declaration also containing :math to be the start element
368
                                            sal_Char* pNamespaceMath = strstr(aBuffer, "xmlns:math");
369
370
                                            if(pNamespaceMath && pMathStart > pNamespaceMath)
371
                                            {
372
                                                // invalid :math found (probably part of the namespace declaration)
373
                                                // -> this cannot be the math start element
374
                                                pMathStart = 0;
375
                                            }
376
                                        }
391
                                        }
377
                                    }
392
                                    }
378
379
                                    // MathStart has to be before the URL
380
                                    if(pMathStart && pMathStart < pURL)
381
                                    {
382
                                        isMathFile = true;
383
                                    }
384
                                }
393
                                }
385
                            }
394
                            }
386
387
                            if(isMathFile)
395
                            if(isMathFile)
388
                            {
396
                            {
389
                                static const sal_Char sFltrNm_2[] = MATHML_XML;
397
                                static const sal_Char sFltrNm_2[] = MATHML_XML;

Return to issue 126230