Closed Bug 184001 Opened 22 years ago Closed 22 years ago

saving to a file produces invalid xhtml (inserts DTD in a weird spot)

Categories

(Core :: XML, defect, P1)

defect

Tracking

()

RESOLVED FIXED
mozilla1.3beta

People

(Reporter: brant, Assigned: hjtoi-bugzilla)

References

()

Details

(Keywords: testcase, Whiteboard: document.internalSubset wrong)

Attachments

(2 files)

User-Agent:       Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.3a) Gecko/20021205
Build Identifier: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.3a) Gecko/20021205

If I save to a file, Mozilla's xhtml11.dtd is appended to the beginning, but the
comment strings are missing.  This makes the page invalid.

Reproducible: Always

Steps to Reproduce:
1. Open about:mozilla
2. Click File > Save.
3. Save it wherever you want.
Actual Results:  
The following is appended:

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd" * The contents of this file are
subject to the Mozilla Public
  * License Version 1.1 (the "License"); you may not use this file
  * except in compliance with the License. You may obtain a copy of
  * the License at http://www.mozilla.org/MPL/
  * 
  * Software distributed under the License is distributed on an "AS
  * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
  * implied. See the License for the specific language governing
  * rights and limitations under the License.
  * 
  * The Original Code is mozilla.org code.
  * 
  * The Initial Developer of the Original Code is Netscape
  * Communications Corporation.  Portions created by Netscape are 
  * Copyright (C) 2000 Netscape Communications Corporation.  All
  * Rights Reserved.
  * 
  * Contributor(s):
   


  * Predefined HTML entities to be loaded when parsing XHTML documents.
  * The contents match mozilla/htmlparser/src/nsHTMLEntityList.h,
  * except that Navigator entity extensions are not included.
 

 ISO 8859-1 entities 
<!ENTITY nbsp "&#160;">
<!ENTITY iexcl "&#161;">
<!ENTITY cent "&#162;">
<!ENTITY pound "&#163;">
<!ENTITY curren "&#164;">
<!ENTITY yen "&#165;">
<!ENTITY brvbar "&#166;">
<!ENTITY sect "&#167;">
<!ENTITY uml "&#168;">
<!ENTITY copy "&#169;">
<!ENTITY ordf "&#170;">
<!ENTITY laquo "&#171;">
<!ENTITY not "&#172;">
<!ENTITY shy "&#173;">
<!ENTITY reg "&#174;">
<!ENTITY macr "&#175;">
<!ENTITY deg "&#176;">
<!ENTITY plusmn "&#177;">
<!ENTITY sup2 "&#178;">
<!ENTITY sup3 "&#179;">
<!ENTITY acute "&#180;">
<!ENTITY micro "&#181;">
<!ENTITY para "&#182;">
<!ENTITY middot "&#183;">
<!ENTITY cedil "&#184;">
<!ENTITY sup1 "&#185;">
<!ENTITY ordm "&#186;">
<!ENTITY raquo "&#187;">
<!ENTITY frac14 "&#188;">
<!ENTITY frac12 "&#189;">
<!ENTITY frac34 "&#190;">
<!ENTITY iquest "&#191;">
<!ENTITY Agrave "&#192;">
<!ENTITY Aacute "&#193;">
<!ENTITY Acirc "&#194;">
<!ENTITY Atilde "&#195;">
<!ENTITY Auml "&#196;">
<!ENTITY Aring "&#197;">
<!ENTITY AElig "&#198;">
<!ENTITY Ccedil "&#199;">
<!ENTITY Egrave "&#200;">
<!ENTITY Eacute "&#201;">
<!ENTITY Ecirc "&#202;">
<!ENTITY Euml "&#203;">
<!ENTITY Igrave "&#204;">
<!ENTITY Iacute "&#205;">
<!ENTITY Icirc "&#206;">
<!ENTITY Iuml "&#207;">
<!ENTITY ETH "&#208;">
<!ENTITY Ntilde "&#209;">
<!ENTITY Ograve "&#210;">
<!ENTITY Oacute "&#211;">
<!ENTITY Ocirc "&#212;">
<!ENTITY Otilde "&#213;">
<!ENTITY Ouml "&#214;">
<!ENTITY times "&#215;">
<!ENTITY Oslash "&#216;">
<!ENTITY Ugrave "&#217;">
<!ENTITY Uacute "&#218;">
<!ENTITY Ucirc "&#219;">
<!ENTITY Uuml "&#220;">
<!ENTITY Yacute "&#221;">
<!ENTITY THORN "&#222;">
<!ENTITY szlig "&#223;">
<!ENTITY agrave "&#224;">
<!ENTITY aacute "&#225;">
<!ENTITY acirc "&#226;">
<!ENTITY atilde "&#227;">
<!ENTITY auml "&#228;">
<!ENTITY aring "&#229;">
<!ENTITY aelig "&#230;">
<!ENTITY ccedil "&#231;">
<!ENTITY egrave "&#232;">
<!ENTITY eacute "&#233;">
<!ENTITY ecirc "&#234;">
<!ENTITY euml "&#235;">
<!ENTITY igrave "&#236;">
<!ENTITY iacute "&#237;">
<!ENTITY icirc "&#238;">
<!ENTITY iuml "&#239;">
<!ENTITY eth "&#240;">
<!ENTITY ntilde "&#241;">
<!ENTITY ograve "&#242;">
<!ENTITY oacute "&#243;">
<!ENTITY ocirc "&#244;">
<!ENTITY otilde "&#245;">
<!ENTITY ouml "&#246;">
<!ENTITY divide "&#247;">
<!ENTITY oslash "&#248;">
<!ENTITY ugrave "&#249;">
<!ENTITY uacute "&#250;">
<!ENTITY ucirc "&#251;">
<!ENTITY uuml "&#252;">
<!ENTITY yacute "&#253;">
<!ENTITY thorn "&#254;">
<!ENTITY yuml "&#255;">

 Mathematical symbols and Greek letters 
<!ENTITY fnof "&#402;">
<!ENTITY Alpha "&#913;">
<!ENTITY Beta "&#914;">
<!ENTITY Gamma "&#915;">
<!ENTITY Delta "&#916;">
<!ENTITY Epsilon "&#917;">
<!ENTITY Zeta "&#918;">
<!ENTITY Eta "&#919;">
<!ENTITY Theta "&#920;">
<!ENTITY Iota "&#921;">
<!ENTITY Kappa "&#922;">
<!ENTITY Lambda "&#923;">
<!ENTITY Mu "&#924;">
<!ENTITY Nu "&#925;">
<!ENTITY Xi "&#926;">
<!ENTITY Omicron "&#927;">
<!ENTITY Pi "&#928;">
<!ENTITY Rho "&#929;">
<!ENTITY Sigma "&#931;">
<!ENTITY Tau "&#932;">
<!ENTITY Upsilon "&#933;">
<!ENTITY Phi "&#934;">
<!ENTITY Chi "&#935;">
<!ENTITY Psi "&#936;">
<!ENTITY Omega "&#937;">
<!ENTITY alpha "&#x03B1;">
<!ENTITY beta "&#946;">
<!ENTITY gamma "&#947;">
<!ENTITY delta "&#948;">
<!ENTITY epsilon "&#949;">
<!ENTITY zeta "&#950;">
<!ENTITY eta "&#951;">
<!ENTITY theta "&#952;">
<!ENTITY iota "&#953;">
<!ENTITY kappa "&#954;">
<!ENTITY lambda "&#955;">
<!ENTITY mu "&#956;">
<!ENTITY nu "&#957;">
<!ENTITY xi "&#958;">
<!ENTITY omicron "&#959;">
<!ENTITY pi "&#960;">
<!ENTITY rho "&#961;">
<!ENTITY sigmaf "&#962;">
<!ENTITY sigma "&#963;">
<!ENTITY tau "&#964;">
<!ENTITY upsilon "&#965;">
<!ENTITY phi "&#966;">
<!ENTITY chi "&#967;">
<!ENTITY psi "&#968;">
<!ENTITY omega "&#969;">
<!ENTITY thetasym "&#977;">
<!ENTITY upsih "&#978;">
<!ENTITY piv "&#982;">
<!ENTITY bull "&#8226;">
<!ENTITY hellip "&#8230;">
<!ENTITY prime "&#8242;">
<!ENTITY Prime "&#8243;">
<!ENTITY oline "&#8254;">
<!ENTITY frasl "&#8260;">
<!ENTITY weierp "&#8472;">
<!ENTITY image "&#8465;">
<!ENTITY real "&#8476;">
<!ENTITY trade "&#8482;">
<!ENTITY alefsym "&#8501;">
<!ENTITY larr "&#8592;">
<!ENTITY uarr "&#8593;">
<!ENTITY rarr "&#8594;">
<!ENTITY darr "&#8595;">
<!ENTITY harr "&#8596;">
<!ENTITY crarr "&#8629;">
<!ENTITY lArr "&#8656;">
<!ENTITY uArr "&#8657;">
<!ENTITY rArr "&#8658;">
<!ENTITY dArr "&#8659;">
<!ENTITY hArr "&#8660;">
<!ENTITY forall "&#8704;">
<!ENTITY part "&#8706;">
<!ENTITY exist "&#8707;">
<!ENTITY empty "&#8709;">
<!ENTITY nabla "&#8711;">
<!ENTITY isin "&#8712;">
<!ENTITY notin "&#8713;">
<!ENTITY ni "&#8715;">
<!ENTITY prod "&#8719;">
<!ENTITY sum "&#8721;">
<!ENTITY minus "&#8722;">
<!ENTITY lowast "&#8727;">
<!ENTITY radic "&#8730;">
<!ENTITY prop "&#8733;">
<!ENTITY infin "&#8734;">
<!ENTITY ang "&#8736;">
<!ENTITY and "&#8743;">
<!ENTITY or "&#8744;">
<!ENTITY cap "&#8745;">
<!ENTITY cup "&#8746;">
<!ENTITY int "&#8747;">
<!ENTITY there4 "&#8756;">
<!ENTITY sim "&#8764;">
<!ENTITY cong "&#8773;">
<!ENTITY asymp "&#8776;">
<!ENTITY ne "&#8800;">
<!ENTITY equiv "&#8801;">
<!ENTITY le "&#8804;">
<!ENTITY ge "&#8805;">
<!ENTITY sub "&#8834;">
<!ENTITY sup "&#8835;">
<!ENTITY nsub "&#8836;">
<!ENTITY sube "&#8838;">
<!ENTITY supe "&#8839;">
<!ENTITY oplus "&#8853;">
<!ENTITY otimes "&#8855;">
<!ENTITY perp "&#8869;">
<!ENTITY sdot "&#8901;">
<!ENTITY lceil "&#8968;">
<!ENTITY rceil "&#8969;">
<!ENTITY lfloor "&#8970;">
<!ENTITY rfloor "&#8971;">
<!ENTITY lang "&#9001;">
<!ENTITY rang "&#9002;">
<!ENTITY loz "&#9674;">
<!ENTITY spades "&#9824;">
<!ENTITY clubs "&#9827;">
<!ENTITY hearts "&#9829;">
<!ENTITY diams "&#9830;">

 Markup-significant and internationalization characters 
<!ENTITY quot "&#34;">
<!ENTITY amp "&#38;">
<!ENTITY lt "&#60;">
<!ENTITY gt "&#62;">
<!ENTITY OElig "&#338;">
<!ENTITY oelig "&#339;">
<!ENTITY Scaron "&#352;">
<!ENTITY scaron "&#353;">
<!ENTITY Yuml "&#376;">
<!ENTITY circ "&#710;">
<!ENTITY tilde "&#732;">
<!ENTITY ensp "&#8194;">
<!ENTITY emsp "&#8195;">
<!ENTITY thinsp "&#8201;">
<!ENTITY zwnj "&#8204;">
<!ENTITY zwj "&#8205;">
<!ENTITY lrm "&#8206;">
<!ENTITY rlm "&#8207;">
<!ENTITY ndash "&#8211;">
<!ENTITY mdash "&#8212;">
<!ENTITY lsquo "&#8216;">
<!ENTITY rsquo "&#8217;">
<!ENTITY sbquo "&#8218;">
<!ENTITY ldquo "&#8220;">
<!ENTITY rdquo "&#8221;">
<!ENTITY bdquo "&#8222;">
<!ENTITY dagger "&#8224;">
<!ENTITY Dagger "&#8225;">
<!ENTITY permil "&#8240;">
<!ENTITY lsaquo "&#8249;">
<!ENTITY rsaquo "&#8250;">
<!ENTITY euro "&#8364;">>

Expected Results:  
It should append the file correctly.

It looks like Mozilla is trying to include the DTD in place of the SYSTEM
reference.  However, it is being inserted in the wrong place.  Also, since there
is already a reference to the SYSTEM DTD, isn't the inclusion of the
Mozilla-specific variant somewhat superfluous.

Someone tells me that this may already be filed, but I could not find it.

Thinking this might be a global problem with application/xhtml+xml XHTML 1.1
files, I tried saving a file of that type and the problem did not occur. 
Perhaps it is limited to about: URLs.
to XML.  Happens on Linux too.
Assignee: dougt → heikki
Component: Networking → XML
OS: Windows XP → All
QA Contact: benc → rakeshmishra
Hardware: PC → All
Summary: saving to a file produces invalid xhtml → saving to a file produces invalid xhtml (inserts DTD in a weird spot)
Uh, that looks really weird & bad. I'll try to get to this ASAP.
Status: NEW → ASSIGNED
Priority: -- → P1
Target Milestone: --- → mozilla1.3beta
Additionally, you must save it as Web Page - Complete.  The HTML Only option
doesn't have a problem.  Additionally this happens with any XHTML file.  Use
<http://www.cherokeescouting.org/OtherUnits/Troop545IIN/brant.xhtml> for an
example.  I feel that this is severe enought to be a blocker.  If a user can't
save a file completely and be able to open it, they will be dissatisfied.  Also,
Web developers will have yet another reason to not use XHTML 1.1.
2003011508
Flags: blocking1.3b?
Keywords: clean-report
This is pretty badly broken. Heikki, can you get onto this for 1.3beta? That's
Tuesday or Wednesday...
Flags: blocking1.3b? → blocking1.3b+
I will soon be uploading some tests to try some diagnoses:
184001-1.xhtml:                XHTML Extension
184001-1-saved.xhtml:          Broken

184001-2.xml:                  XML Extension
184001-2-saved.xml:            Broken
Conclusion: Extension and therefore MIME type don't make a difference.

184001-3.xhtml:                No SYSTEM Doctype location provided
184001-3.xhtml:                Broken
Conclusion: None.  Mozilla apparently needs the SYSTEM Doctype (Or it might be
required by spec, not sure).

184001-4.xhtml:                No XML Prolog
184001-4-saved.xhtml:          Broken
Conclusion: XML Prolog is not necessary for reproduction.

184001-4-saved-nodownloadmanager-noprogress.xhtml: Broken
Conclusion: Download method makes no difference.

Hopefully these tests are helpful.  Build used for testing was 2003012508 WinXP SP1.

My guess as to what is happening:
Mozilla is not a validating XML parser so it does not read external DTDs. 
Because XHTML defines entities, Mozilla has its own DTD containing those
entities.  Because other browsers may not be validating parsers either, Mozilla
tries to include its DTD when the page is saved.  However, it does not put the
square brackets around the whole mess as the XML spec requires for internal subsets.

Other ideas I haven't tried due to time:
standalone="yes" in prolog since this should tell the parser that it doesn't
need external resources to render correctly and shouldn't need to include the DTD.

Other locations for external DTD.

Send page by e-mail.
Keywords: testcase
Attached file bug 184001 testcases
The real problem is that document.internalSubset somehow gets the external
DTD(s) appended to it. That it happens to do it in a way that makes it invalid
is just an added insult. But since it is in the DOM the serializer will
naturally serialize it - there is no bug in the serializer.

You can test this by loading the about page, and then typing in the URLbar:

javascript:alert(document.doctype.internalSubset);

URL: about:
Whiteboard: document.internalSubset wrong
Attached patch Proposed fixSplinter Review
This patch fixes several issues:
- this bug
- makes one function name self explaining
- check return value from one function that could fail
- remove unneeded variable
- make things more efficient by using smaller doctype buffer
- make some lines shorter to fit 80 col displays
Cathleen, could you run startup & open new window performance tests with this patch?
I should also mention that there is one decision that I needed to make
concerning the internal subset. Given an internal subset like this:

<!ENTITY % regionDTD SYSTEM "chrome://global-region/locale/region.dtd" >
%brandDTD;

what will end up in DOM is this:

<!ENTITY % regionDTD SYSTEM "chrome://global-region/locale/region.dtd" >

i.e. %brandDTD; disappeared - there is only a newline in its place.

We debated with harishd as to what to do here. The DOM spec leaves this wide
open for the implementors (basically anything goes). The reason we chose this
approach was that we automatically expand all entities, so if brandDTD defined
some entity that was used in this document, the entity will be expanded and
there is no longer any need for the external file.

If you think we should change our approach, let us know. The possible other
approaches I see are:

* Insert the %brandDTD% there anyway, even though it is no longer needed.
* Make it into a comment: <!-- %brandDTD; --> (we have some precedents about
this in Save As Complete...)
* Remove all references to external entities from the internal subset.
Attachment #113120 - Flags: superreview?(jst)
Attachment #113120 - Flags: review?(harishd)
Comment on attachment 113120 [details] [diff] [review]
Proposed fix

sr=jst
Attachment #113120 - Flags: superreview?(jst) → superreview+
**** PAGELOAD PERFORMANCE ****

win2k, 2.4 GHz, 1GB
---------------------
baseline:  313 ms
w/ patch:  316 ms

win2k, 650MHz, 512MB
---------------------
baseline:  632 ms
w/ patch:  631 ms

win98, 200MHz, 64MB
---------------------
baseline:  4687 ms
w/ patch:  4653 ms

Hard to say, possible pageload perf win on slow machine, but again it could also
be within the range of up/down swing for a test run.
Comment on attachment 113120 [details] [diff] [review]
Proposed fix

>-  mDoctypeText.SetCapacity(20480);
>+  // setting mDoctypeText's capacity to be 1K ( just a guesstimate! ).
>+  mDoctypeText.SetCapacity(1024);
>   return NS_OK;
> }

I'm not how the size reduction is going to benifit mozilla when loading pages
with large DOCTYPES. Let's hope that 1K is the optimal size.

>-  NS_NewUTF8ConverterStream(getter_AddRefs(uniIn), in, 1024);
>+  rv = NS_NewUTF8ConverterStream(getter_AddRefs(uniIn), in, 1024);
>+
>+  if (NS_FAILED(rv)) {
>+    return result;
>+  }

This message will get lost inside expat!
Attachment #113120 - Flags: review?(harishd) → review+
Cathleen: I don't think this change will improve html page load performance,
however, it should improve startup performance because the changes are xml related.
**** STARTUP PERFORMANCE ****

win2k, 2.4 GHz, 1GB
---------------------
baseline (ms):  1652 1386 1496  996  965 1199 1886 1855 1715     (lowest:   965)
w/ patch (ms):  1683 1246 1699 1558 1886 1761 1402 1277 1058     (lowest:  1058)

win2k, 650MHz, 512MB
---------------------
baseline (ms):  2257 2687 2620 1960 2261 2701 2161 2750 2390     (lowest:  1960)
w/ patch (ms):  2636 2098 2669 2070 2273 2756 3137 1877 2267     (lowest:  1877)
 
 win98, 200MHz, 64MB
---------------------
baseline (ms):  12790 12050 12580 11990 12850 12290 12080 12380  (lowest: 11990)
w/ patch (ms):  12620 12410 12210 12060 12140 12100 12170 12130  (lowest: 12060)

Also, hard to say here.  These numbers are in milliseconds, and they are all
very close.
This patch has review, super-review and the perf tests don't seem to suggest any
regressions. Is this ready to land?
Comment on attachment 113120 [details] [diff] [review]
Proposed fix

We used to copy external DTDs into the internal subset when the external DTD
was specified on the DOCTYPE line. All XHTML documents having DOCTYPE, and most
XUL files do this.

The change is to make us observe when we are parsing external DTD and not copy
into internal subset. (Because of this I was able to make our internal subset
member smaller.) This makes us correct, and in theory should improve startup
performance quite a bit. Unfortunately tests don't show that, but they don't
show a sure slowdown either.
Attachment #113120 - Flags: approval1.3b?
Comment on attachment 113120 [details] [diff] [review]
Proposed fix

a=asa (on behalf of drivers) for checkin to 1.3beta.
Attachment #113120 - Flags: approval1.3b? → approval1.3b+
Fixed.

I did some additional tests, and found why we didn't see startup win - there are
only 5 XUL files that benefit from this during startup. I checked all pref
panels and most dialogs etc. I could think of and at most the doctype string
length was 266 so 1K is more than enough for XUL. For documents on the web it
might be less than optimal, but we can tweak it as we see more XML content.
Status: ASSIGNED → RESOLVED
Closed: 22 years ago
Resolution: --- → FIXED
You need to log in before you can comment on or make changes to this bug.

Attachment

General

Creator:
Created:
Updated:
Size: