--- a/main/starmath/source/smdetect.cxx +++ a/main/starmath/source/smdetect.cxx @@ -74,7 +74,6 @@ #include "document.hxx" #include "eqnolefilehdr.hxx" - using namespace ::com::sun::star; using namespace ::com::sun::star::uno; using namespace ::com::sun::star::io; @@ -307,9 +306,7 @@ SmFilterDetect::~SmFilterDetect() } else { - //Test to see if this begins with xml and if so run it through - //the MathML filter. There are all sorts of things wrong with - //this approach, to be fixed at a better level than here + // DesignScience Equation Editor MathType 3.0 ? SvStream *pStrm = aMedium.GetInStream(); aTypeName.Erase(); if (pStrm && !pStrm->GetError()) @@ -326,64 +323,97 @@ SmFilterDetect::~SmFilterDetect() } else { - // #124636# detection should not only check for xml, but at least also for - // the math start element and the MathML URL. Additionally take their order - // into account. Also allow the case where the start element has a namespace - // (e.g. Seek( STREAM_SEEK_TO_BEGIN ); - const sal_uLong nBytesRead(pStrm->Read( aBuffer, nReadSize )); - - if(nBytesRead > (5 + 1 + 34 + 5)) // xml + '>' + URL + '(<|:)math' + const size_t nBufSize=2048; + sal_uInt16 aBuffer[nBufSize]; // will be casted to an Unicode-Array below + sal_uInt8* pByte = reinterpret_cast(aBuffer); + const sal_uLong nBytesRead(pStrm->Read( pByte, nBufSize * 2 ) ); + const sal_uLong nUnicodeCharsRead (nBytesRead / 2 ); + + // For backwards searching an OUString is used. The conversion needs an + // encoding information. Default encoding is UTF-8, UTF-16 is possible + // (e.g. from MS "Math Input Control"), others are unlikely. + // Looking for Byte Order Mark + rtl_TextEncoding aEncoding = RTL_TEXTENCODING_UTF8; + bool bIsUnicode = false; + if (nBytesRead >= 2 && (aBuffer[0]==0xfffe || aBuffer[0]==0xfeff) ) { - // end string with null - aBuffer[nBytesRead + 1] = 0; - - // is it a xml file? - const sal_Char* pXML = strstr(aBuffer, ">8) | (aBuffer[i]<<8) ; + } + } + } - if(pXML) + bool isMathFile(false); + if ( nBytesRead > 56) // minimal + { + const sal_Char* pChar = reinterpret_cast(aBuffer); + sal_Unicode* pUnicode = (sal_Unicode*) aBuffer; + + const OUString sFragment( (bIsUnicode) + ? OUString( pUnicode , nUnicodeCharsRead ) + : OUString( pChar, nBytesRead, aEncoding) ); + const sal_Int32 nFragmentLength(sFragment.getLength()); + + // look for MathML URL http://www.w3.org/1998/Math/MathML + // #i53509 A MathML URL can be value of a namespace attribute, but can be as well + // inside a doctype e.g. [] + // or inside a schema reference e.g. s:schemaLocation="http://www.w3.org/1998/Math/MathML" + // Use a loop to get the correct one. + const OUString sURL( OUString::createFromAscii("http://www.w3.org/1998/Math/MathML")); + const sal_Int32 nURLLength = sURL.getLength(); + const OUString sEQ( OUString::createFromAscii("=") ); + const OUString sXMLNS( OUString::createFromAscii("xmlns") ); + sal_Int32 nPosURL = -1; // for index of first character of URL + sal_Int32 nPosURLSearchStart = 0; + sal_Int32 nPosEQ = -1; // for index of equal sign + sal_Int32 nPosXMLNS = -1; // for index of first character of string "xmlns" + do { - // does it have the MathML URL? - const sal_Char* pURL = strstr(aBuffer, "http://www.w3.org/1998/Math/MathML"); - - // URL has to be after XML start - if(pURL && pURL > pXML) + nPosURL = sFragment.indexOf(sURL,nPosURLSearchStart); + if( nPosURL < 0 ) + { + break; // no MathML URL, cannot be parsed + } + // need 'xmlns:prefix =' or 'xmlns =', look backwards, first for equal sign + nPosEQ = sFragment.lastIndexOf(sEQ,nPosURL); + if (nPosEQ >= 0 && nPosEQ >= nPosURLSearchStart) { - // look if we have a direct math start element - sal_Char* pMathStart = strstr(aBuffer, " pNamespaceMath) - { - // invalid :math found (probably part of the namespace declaration) - // -> this cannot be the math start element - pMathStart = 0; - } + nPosXMLNS = sFragment.lastIndexOf(sXMLNS,nPosEQ); + if( nPosXMLNS >= nPosURLSearchStart ) + { // an xmlns attribute is found, but it might belong to a schema + // get prefix if present + const OUString sPrefix = (sFragment.copy(nPosXMLNS+5,nPosEQ-(nPosXMLNS+5))).trim(); + // such prefix definition must start with colon (will be removed below) + bool bHasPrefix( (sPrefix.isEmpty()) ? false : sPrefix.toChar() == sal_Unicode(':') ); + // the math element starts either with '= 0) + { // xmlns attribute belongs to math element + isMathFile = true; + break; } } - - // MathStart has to be before the URL - if(pMathStart && pMathStart < pURL) - { - isMathFile = true; - } } + // MathML URL was wrong one, look for next + nPosURLSearchStart = nPosURL + nURLLength; } - + while ( nPosURLSearchStart + nURLLength <= nFragmentLength); + if(isMathFile) { static const sal_Char sFltrNm_2[] = MATHML_XML;