Source code

Revision control

Copy as Markdown

Other Tools

<!-- EDITOR NOTES -*- mode: Text; fill-column: 100 -*-
!
! Adding a new element involves editing the following sections:
! - section for the element itself
! - descriptions of the element's categories
! - images/content-venn.svg
! - syntax, if it's void or otherwise special
! - parser, if it's not phrasing-level
! - rendering
! - obsolete section
! - element, attribute, content model, and interface indexes
! - adding it to the section with ARIA mappings
!
!-->
<!--
!-->
<!--START complete--><!--START dev-html-->
<!DOCTYPE html>
<!--SET FINGERPRINT=<span title="fingerprinting vector" class="fingerprint"><img src="images/fingerprint.png" alt="(This is a fingerprinting vector.)" width=46 height=64></span>-->
<html lang="en-GB-x-hixie" class="big">
<head>
<title>HTML Standard</title>
<script>
var loadTimer = new Date();
var current_revision = "r" + "$Revision: 1 $".substr(11);
current_revision = current_revision.substr(0, current_revision.length - 2);
var last_known_revision = current_revision;
function F( /* varargs... */) {
var fragment = document.createDocumentFragment();
for (var index = 0; index < arguments.length; index += 1) {
if (arguments[index] instanceof Array) {
fragment.appendChild(F.apply(this, arguments[index]));
} else if (typeof arguments[index] == 'string') {
fragment.appendChild(document.createTextNode(arguments[index]));
} else {
fragment.appendChild(arguments[index]);
}
}
return fragment;
}
function E(name, /* optional */ attributes /*, varargs... */) {
var element = document.createElement(name);
var index = 1;
if ((arguments.length > 1) && (typeof attributes != 'string') &&
(!(attributes instanceof Node)) && (!(attributes instanceof Array))) {
for (var attName in attributes) {
if (typeof attributes[attName] == 'boolean') {
if (attributes[attName])
element.setAttribute(attName, '');
} else if (typeof attributes[attName] == 'function') {
element[attName] = attributes[attName];
} else {
element.setAttribute(attName, attributes[attName]);
}
}
index = 2;
}
for (; index < arguments.length; index += 1) {
if (arguments[index] instanceof Array) {
element.appendChild(F.apply(this, arguments[index]));
} else if (typeof arguments[index] == 'string') {
element.appendChild(document.createTextNode(arguments[index]));
} else {
element.appendChild(arguments[index]);
}
}
return element;
}
function getCookie(name) {
var params = location.search.substr(1).split("&");
for (var index = 0; index < params.length; index++) {
if (params[index] == name)
return "1";
var data = params[index].split("=");
if (data[0] == name)
return unescape(data[1]);
}
var cookies = document.cookie.split("; ");
for (var index = 0; index < cookies.length; index++) {
var data = cookies[index].split("=");
if (data[0] == name)
return unescape(data[1]);
}
return null;
}
var currentAlert;
var currentAlertTimeout;
function showAlert(s, href) {
if (!currentAlert) {
currentAlert = document.createElement('div');
currentAlert.id = 'alert';
var x = document.createElement('button');
x.textContent = '\u2573';
x.onclick = closeAlert2;
currentAlert.appendChild(x);
currentAlert.appendChild(document.createElement('span'));
currentAlert.onmousemove = function () {
clearTimeout(currentAlertTimeout);
currentAlert.className = '';
currentAlertTimeout = setTimeout(closeAlert, 10000);
}
document.body.appendChild(currentAlert);
} else {
clearTimeout(currentAlertTimeout);
currentAlert.className = '';
}
currentAlert.lastChild.textContent = '';
currentAlert.lastChild.appendChild(F(s));
if (href) {
var link = document.createElement('a');
link.href = href;
link.textContent = href;
currentAlert.lastChild.appendChild(F(' ', link));
}
currentAlertTimeout = setTimeout(closeAlert, 10000);
}
function closeAlert() {
clearTimeout(currentAlertTimeout);
if (currentAlert) {
currentAlert.className = 'closed';
currentAlertTimeout = setTimeout(closeAlert2, 3000);
}
}
function closeAlert2() {
clearTimeout(currentAlertTimeout);
if (currentAlert) {
currentAlert.parentNode.removeChild(currentAlert);
currentAlert = null;
}
}
window.addEventListener('keydown', function (event) {
if (event.keyCode == 27) {
if (currentAlert)
closeAlert2();
} else {
closeAlert();
}
}, false);
window.addEventListener('scroll', function (event) {
closeAlert();
}, false);
function load(script) {
var e = document.createElement('script');
e.setAttribute('src', '//www.whatwg.org/specs/web-apps/current-work/' + script);
document.body.appendChild(e);
}
var startedInit = 0;
function init() {
startedInit = 1;
if (location.search == '?slow-browser')
return;
load('reviewer.js');
if (document.documentElement.className == "big" || document.documentElement.className == "big split index")
load('toc.js');
load('updater.js');
load('dfn.js');
load('status.js');
if (getCookie('profile') == '1')
document.getElementsByTagName('h2')[0].textContent += '; load: ' + (new Date() - loadTimer) + 'ms';
}
if (document.documentElement.className == "")
setTimeout(function () {
if (!startedInit)
showAlert("Too slow? Try reading the multipage copy of the spec instead:", "http://whatwg.org/html");
}, 6000);
window.addEventListener('keypress', function (event) {
if ((event.which == 114) && (event.metaKey)) {
if (!confirm('Are you sure you want to reload this page?'))
event.preventDefault();
}
}, false);
</script>
<link rel="stylesheet" href="//www.whatwg.org/style/specification">
<link rel="icon" href="//www.whatwg.org/images/icon">
<style>
.proposal { border: blue solid; padding: 1em; }
.bad, .bad *:not(.XXX) { color: gray; border-color: gray; background: transparent; }
#updatesStatus { display: none; z-index: 10; }
#updatesStatus.relevant { display: block; position: fixed; right: 1em; top: 1em; padding: 0.5em; font: bold small sans-serif; min-width: 25em; width: 30%; max-width: 40em; height: auto; border: ridge 4px gray; background: #EEEEEE; color: black; }
div.head .logo { width: 11em; margin-bottom: 20em; }
#configUI { position: absolute; z-index: 20; top: auto; right: 0; width: 11em; padding: 0 0.5em 0 0.5em; font-size: small; background: gray; background: rgba(32,32,32,0.9); color: white; border-radius: 1em 0 0 1em; -moz-border-radius: 1em 0 0 1em; }
#configUI p { margin: 0.75em 0; padding: 0.3em; }
#configUI p label { display: block; }
#configUI #updateUI, #configUI .loginUI { text-align: center; }
#configUI input[type=button] { display: block; margin: auto; }
#configUI :link, #configUI :visited { color: white; }
#configUI :link:hover, #configUI :visited:hover { background: transparent; }
#alert { position: fixed; top: 20%; left: 20%; right: 20%; font-size: 2em; padding: 0.5em; z-index: 40; background: gray; background: rgba(32,32,32,0.9); color: white; border-radius: 1em; -moz-border-radius: 1em; -webkit-transition: opacity 1s linear; }
#alert.closed { opacity: 0; }
#alert button { position: absolute; top: -1em; right: 2em; border-radius: 1em 1em 0 0; border: none; line-height: 0.9; color: white; background: rgb(64,64,64); font-size: 0.6em; font-weight: 900; cursor: pointer; }
#alert :link, #alert :visited { color: white; }
#alert :link:hover, #alert :visited:hover { background: transparent; }
@media print { #configUI { display: none; } }
.rfc2119 { font-variant: small-caps; text-shadow: 0 0 0.5em yellow; position: static; }
.rfc2119::after { position: absolute; left: 0; width: 25px; text-align: center; color: yellow; text-shadow: 0.075em 0.075em 0.2em black; }
.rfc2119.m\ust::after { content: '\2605'; }
.rfc2119.s\hould::after { content: '\2606'; }
[hidden] { display: none; }
.fingerprint { float: right; }
.applies thead th > * { display: block; }
.applies thead code { display: block; }
.applies td { text-align: center; }
.applies .yes { background: yellow; }
.matrix, .matrix td { border: hidden; text-align: right; }
.matrix { margin-left: 2em; }
.vertical-summary-table tr > th[rowspan="2"]:first-child + th,
.vertical-summary-table tr > td[rowspan="2"]:first-child + td { border-bottom: hidden; }
.dice-example { border-collapse: collapse; border-style: hidden solid solid hidden; border-width: thin; margin-left: 3em; }
.dice-example caption { width: 30em; font-size: smaller; font-style: italic; padding: 0.75em 0; text-align: left; }
.dice-example td, .dice-example th { border: solid thin; width: 1.35em; height: 1.05em; text-align: center; padding: 0; }
td.eg { border-width: thin; text-align: center; }
#table-example-1 { border: solid thin; border-collapse: collapse; margin-left: 3em; }
#table-example-1 * { font-family: "Essays1743", serif; line-height: 1.01em; }
#table-example-1 caption { padding-bottom: 0.5em; }
#table-example-1 thead, #table-example-1 tbody { border: none; }
#table-example-1 th, #table-example-1 td { border: solid thin; }
#table-example-1 th { font-weight: normal; }
#table-example-1 td { border-style: none solid; vertical-align: top; }
#table-example-1 th { padding: 0.5em; vertical-align: middle; text-align: center; }
#table-example-1 tbody tr:first-child td { padding-top: 0.5em; }
#table-example-1 tbody tr:last-child td { padding-bottom: 1.5em; }
#table-example-1 tbody td:first-child { padding-left: 2.5em; padding-right: 0; width: 9em; }
#table-example-1 tbody td:first-child::after { content: leader(". "); }
#table-example-1 tbody td { padding-left: 2em; padding-right: 2em; }
#table-example-1 tbody td:first-child + td { width: 10em; }
#table-example-1 tbody td:first-child + td ~ td { width: 2.5em; }
#table-example-1 tbody td:first-child + td + td + td ~ td { width: 1.25em; }
.apple-table-examples { border: none; border-collapse: separate; border-spacing: 1.5em 0em; width: 40em; margin-left: 3em; }
.apple-table-examples * { font-family: "Times", serif; }
.apple-table-examples td, .apple-table-examples th { border: none; white-space: nowrap; padding-top: 0; padding-bottom: 0; }
.apple-table-examples tbody th:first-child { border-left: none; width: 100%; }
.apple-table-examples thead th:first-child ~ th { font-size: smaller; font-weight: bolder; border-bottom: solid 2px; text-align: center; }
.apple-table-examples tbody th::after, .apple-table-examples tfoot th::after { content: leader(". ") }
.apple-table-examples tbody th, .apple-table-examples tfoot th { font: inherit; text-align: left; }
.apple-table-examples td { text-align: right; vertical-align: top; }
.apple-table-examples.e1 tbody tr:last-child td { border-bottom: solid 1px; }
.apple-table-examples.e1 tbody + tbody tr:last-child td { border-bottom: double 3px; }
.apple-table-examples.e2 th[scope=row] { padding-left: 1em; }
.apple-table-examples sup { line-height: 0; }
.three-column-nowrap tr > td:first-child,
.three-column-nowrap tr > td:first-child + td,
.three-column-nowrap tr > td:first-child + td + td { white-space: nowrap; }
.details-example img { vertical-align: top; }
#base64-table {
white-space: nowrap;
font-size: 0.6em;
column-width: 6em;
column-count: 5;
column-gap: 1em;
-moz-column-width: 6em;
-moz-column-count: 5;
-moz-column-gap: 1em;
-webkit-column-width: 6em;
-webkit-column-count: 5;
-webkit-column-gap: 1em;
}
#base64-table thead { display: none; }
#base64-table * { border: none; }
#base64-table tbody td:first-child:after { content: ':'; }
#base64-table tbody td:last-child { text-align: right; }
#named-character-references-table {
white-space: nowrap;
font-size: 0.6em;
column-width: 30em;
column-gap: 1em;
-moz-column-width: 30em;
-moz-column-gap: 1em;
-webkit-column-width: 30em;
-webkit-column-gap: 1em;
}
#named-character-references-table > table > tbody > tr > td:first-child + td,
#named-character-references-table > table > tbody > tr > td:last-child { text-align: center; }
#named-character-references-table > table > tbody > tr > td:last-child:hover > span { position: absolute; top: auto; left: auto; margin-left: 0.5em; line-height: 1.2; font-size: 5em; border: outset; padding: 0.25em 0.5em; background: white; width: 1.25em; height: auto; text-align: center; }
#named-character-references-table > table > tbody > tr#entity-CounterClockwiseContourIntegral > td:first-child { font-size: 0.5em; }
.glyph.control { color: red; }
@font-face {
font-family: 'Essays1743';
src: url('//www.whatwg.org/specs/web-apps/current-work/fonts/Essays1743.ttf');
}
@font-face {
font-family: 'Essays1743';
font-weight: bold;
src: url('//www.whatwg.org/specs/web-apps/current-work/fonts/Essays1743-Bold.ttf');
}
@font-face {
font-family: 'Essays1743';
font-style: italic;
src: url('//www.whatwg.org/specs/web-apps/current-work/fonts/Essays1743-Italic.ttf');
}
@font-face {
font-family: 'Essays1743';
font-style: italic;
font-weight: bold;
src: url('//www.whatwg.org/specs/web-apps/current-work/fonts/Essays1743-BoldItalic.ttf');
}
</style>
<link rel="stylesheet" href="status.css">
</head>
<body onload="init()">
<header class="head with-buttons" id="head">
<p><a href="//www.whatwg.org/" class="logo"><img width="101" height="101" alt="WHATWG" src="/images/logo"></a></p>
<hgroup>
<h1 class="allcaps">HTML</h1>
<h2 class="no-num no-toc">Living Standard &mdash; Last Updated <span class="pubdate">[DATE: 01 Jan 1901]</span></h2>
</hgroup>
<div>
<div>
<a href="//whatwg.org/html"><span><strong>Multipage Version</strong> <code>whatwg.org/html</code></span></a>
<a href="//whatwg.org/c"><span><strong>One-Page Version</strong> <code>whatwg.org/c</code></span></a>
<a href="//whatwg.org/pdf"><span><strong>PDF Version</strong> <code>whatwg.org/pdf</code></span></a>
<a href="http://developers.whatwg.org/"><span><strong>Developer Version</strong> <code>developers.whatwg.org</code></span></a>
</div>
<div>
<a class="misc" href="//whatwg.org/faq"><span><strong>FAQ</strong> <code>whatwg.org/faq</code></span></a>
<a class="misc" href="http://validator.whatwg.org/"><span><strong>Validators</strong> <code>validator.whatwg.org</code></span></a>
</div>
<div>
<a class="comms" href="//www.whatwg.org/mailing-list"><span><strong>Join our Mailing List</strong> <code>whatwg@whatwg.org</code></span></a>
<a class="comms" href="http://wiki.whatwg.org/wiki/IRC"><span><strong>Join us on IRC</strong> <code>#whatwg on Freenode</code></span></a>
<a class="comms" href="http://forums.whatwg.org/"><span><strong>Join our Forums</strong> <code>forums.whatwg.org</code></span></a>
</div>
<div>
<!--<a class="changes" href="http://svn.whatwg.org/webapps"><span><strong>SVN Repository</strong> <code>svn.whatwg.org/webapps</code></span></a>-->
<a class="changes" href="http://html5.org/tools/web-apps-tracker"><span><strong>Change Log</strong> <code>html5.org's tracker</code></span></a>
<a class="changes" href="http://twitter.com/WHATWG"><span><strong>Twitter Updates</strong> <code>@WHATWG</code></span></a>
</div>
<div>
<a class="feedback" href="//www.whatwg.org/newbug"><span><strong>File a Bug</strong> <code>whatwg.org/newbug</code></span></a>
<a class="feedback" href="http://ian.hixie.ch/+"><span><strong>E-mail the Editor</strong> <code>ian@hixie.ch</code></span></a>
</div>
</div>
</header>
<hr>
<div id="configUI"></div>
<h2 class="no-num no-toc" id="contents">Table of contents</h2>
<!--toc-->
<hr>
<!--
<pre class="idl">
interface Screen { }; // CSSOM
interface URL { }; // URL API
interface Blob { }; // File API
interface File : Blob { }; // File API
interface FileList { }; // File API
interface WebGLRenderingContext { }; // WebGL
interface XMLDocument { }; // DOM
interface HTMLCollection { }; // DOM
interface DOMTokenList { }; // DOM
interface DOMSettableTokenList { attribute any value; }; // DOM
interface SVGMatrix { }; // SVG
// fake interfaces that map to JS object types:
interface ArrayBuffer { };
interface Int8Array { };
interface Uint8Array { };
interface Uint8ClampedArray { };
interface Int16Array { };
interface Uint16Array { };
interface Int32Array { };
interface Uint32Array { };
interface Float32Array { };
interface Float64Array { };
interface Uint8ClampedArray { };
</pre>
-->
<h2 id="introduction">Introduction</h2>
<div class="nodev">
<h3 id="abstract">Where does this specification fit?</h3>
<p>This specification defines a big part of the Web platform, in lots of detail. Its place in the
Web platform specification stack relative to other specifications can be best summed up as
follows:</p>
<p><img src="images/abstract.png" width="398" height="359" alt="It consists of everything else, above such core technologies as HTTP, URI/IRIs, DOM, XML, Unicode, and ECMAScript; below presentation-layer technologies like CSS and the NPAPI; and to the side of technologies like Geolocation, SVG, MathML, and XHR."></p>
</div>
<h3 id="is-this-html5?">Is this HTML5?</h3>
<!-- NON-NORMATIVE SECTION -->
<p>In short: Yes.</p>
<p>In more length: The term "HTML5" is widely used as a buzzword to refer to modern Web
technologies, many of which (though by no means all) are developed at the WHATWG. This document is
one such; others are available from <a href="http://www.whatwg.org/specs/">the WHATWG
specification index</a>.</p>
<p class="note">Although we have asked them to stop doing so, the W3C also republishes some parts
of this specification as separate documents. There are numerous differences between this
specification and the W3C forks; some minor, some major. Unfortunately these are not currently
accurately documented anywhere, so there is no way to know which are intentional and which are
not.</p>
<h3>Background</h3>
<!-- NON-NORMATIVE SECTION -->
<p>HTML is the World Wide Web's core markup language. Originally, HTML was primarily designed as a
language for semantically describing scientific documents. Its general design, however, has
enabled it to be adapted, over the subsequent years, to describe a number of other types of
documents and even applications.</p>
<h3>Audience</h3>
<!-- NON-NORMATIVE SECTION -->
<p>This specification is intended for authors of documents and scripts that use the features
defined in this specification<span class="nodev">, implementors of tools that operate on pages that
use the features defined in this specification, and individuals wishing to establish the
correctness of documents or implementations with respect to the requirements of this
specification</span>.</p>
<p>This document is probably not suited to readers who do not already have at least a passing
familiarity with Web technologies, as in places it sacrifices clarity for precision, and brevity
for completeness. More approachable tutorials and authoring guides can provide a gentler
introduction to the topic.</p>
<p>In particular, familiarity with the basics of DOM is necessary for a complete understanding of
some of the more technical parts of this specification. An understanding of Web IDL, HTTP, XML,
Unicode, character encodings, JavaScript, and CSS will also be helpful in places but is not
essential.</p>
<h3>Scope</h3>
<!-- NON-NORMATIVE SECTION -->
<p>This specification is limited to providing a semantic-level markup language and associated
semantic-level scripting APIs for authoring accessible pages on the Web ranging from static
documents to dynamic applications.</p>
<p>The scope of this specification does not include providing mechanisms for media-specific
customization of presentation (although default rendering rules for Web browsers are included at
the end of this specification, and several mechanisms for hooking into CSS are provided as part of
the language).</p>
<p>The scope of this specification is not to describe an entire operating system. In particular,
hardware configuration software, image manipulation tools, and applications that users would be
expected to use with high-end workstations on a daily basis are out of scope. In terms of
applications, this specification is targeted specifically at applications that would be expected
to be used by users on an occasional basis, or regularly but from disparate locations, with low
CPU requirements. Examples of such applications include online purchasing systems, searching
systems, games (especially multiplayer online games), public telephone books or address books,
communications software (e-mail clients, instant messaging clients, discussion software), document
editing software, etc.</p>
<h3>History</h3>
<!-- NON-NORMATIVE SECTION -->
<p>For its first five years (1990-1995), HTML went through a number of revisions and experienced a
number of extensions, primarily hosted first at CERN, and then at the IETF.</p>
<p>With the creation of the W3C, HTML's development changed venue again. A first abortive attempt
at extending HTML in 1995 known as HTML 3.0 then made way to a more pragmatic approach known as
HTML 3.2, which was completed in 1997. HTML4 quickly followed later that same year.</p>
<p>The following year, the W3C membership decided to stop evolving HTML and instead begin work on
an XML-based equivalent, called XHTML. <!-- http://www.w3.org/MarkUp/future/#summary --> This
effort started with a reformulation of HTML4 in XML, known as XHTML 1.0, which added no new
features except the new serialisation, and which was completed in 2000. After XHTML 1.0, the W3C's
focus turned to making it easier for other working groups to extend XHTML, under the banner of
XHTML Modularization. In parallel with this, the W3C also worked on a new language that was not
compatible with the earlier HTML and XHTML languages, calling it XHTML2.</p>
<p>Around the time that HTML's evolution was stopped in 1998, parts of the API for HTML developed
by browser vendors were specified and published under the name DOM Level 1 (in 1998) and DOM Level
2 Core and DOM Level 2 HTML (starting in 2000 and culminating in 2003). These efforts then petered
out, with some DOM Level 3 specifications published in 2004 but the working group being closed
before all the Level 3 drafts were completed.</p>
<p>In 2003, the publication of XForms, a technology which was positioned as the next generation of
Web forms, sparked a renewed interest in evolving HTML itself, rather than finding replacements
for it. This interest was borne from the realization that XML's deployment as a Web technology was
limited to entirely new technologies (like RSS and later Atom), rather than as a replacement for
existing deployed technologies (like HTML).</p>
<p>A proof of concept to show that it was possible to extend HTML4's forms to provide many of the
features that XForms 1.0 introduced, without requiring browsers to implement rendering engines
that were incompatible with existing HTML Web pages, was the first result of this renewed
interest. At this early stage, while the draft was already publicly available, and input was
already being solicited from all sources, the specification was only under Opera Software's
copyright.</p>
<p>The idea that HTML's evolution should be reopened was tested at a W3C workshop in 2004, where
some of the principles that underlie the HTML5 work (described below), as well as the
aforementioned early draft proposal covering just forms-related features, were presented to the
W3C jointly by Mozilla and Opera. The proposal was rejected on the grounds that the proposal
conflicted with the previously chosen direction for the Web's evolution; the W3C staff and
membership voted to continue developing XML-based replacements instead.</p>
<p>Shortly thereafter, Apple, Mozilla, and Opera jointly announced their intent to continue
working on the effort under the umbrella of a new venue called the WHATWG. A public mailing list
was created, and the draft was moved to the WHATWG site. The copyright was subsequently amended to
be jointly owned by all three vendors, and to allow reuse of the specification.</p>
<p>The WHATWG was based on several core principles, in particular that technologies need to be
backwards compatible, that specifications and implementations need to match even if this means
changing the specification rather than the implementations, and that specifications need to be
detailed enough that implementations can achieve complete interoperability without
reverse-engineering each other.</p>
<p>The latter requirement in particular required that the scope of the HTML5 specification include
what had previously been specified in three separate documents: HTML4, XHTML1, and DOM2 HTML. It
also meant including significantly more detail than had previously been considered the norm.</p>
<p>In 2006, the W3C indicated an interest to participate in the development of HTML5 after all,
and in 2007 formed a working group chartered to work with the WHATWG on the development of the
HTML5 specification. Apple, Mozilla, and Opera allowed the W3C to publish the specification under
the W3C copyright, while keeping a version with the less restrictive license on the WHATWG
site.</p>
<p>For a number of years, both groups then worked together. In 2011, however, the groups came to
the conclusion that they had different goals: the W3C wanted to publish a "finished" version of
"HTML5", while the WHATWG wanted to continue working on a Living Standard for HTML, continuously
maintaining the specification rather than freezing it in a state with known problems, and adding
new features as needed to evolve the platform.</p>
<p>Since then, the WHATWG has been working on this specification (amongst others), and the W3C has
been copying fixes made by the WHATWG into their fork of the document, as well as making other
changes, some intentional and some not, with no documentation listing or explaining the
differences.</p>
<h3>Design notes</h3>
<!-- NON-NORMATIVE SECTION -->
<p>It must be admitted that many aspects of HTML appear at first glance to be nonsensical and
inconsistent.</p>
<p>HTML, its supporting DOM APIs, as well as many of its supporting technologies, have been
developed over a period of several decades by a wide array of people with different priorities
who, in many cases, did not know of each other's existence.</p>
<p>Features have thus arisen from many sources, and have not always been designed in especially
consistent ways. Furthermore, because of the unique characteristics of the Web, implementation
bugs have often become de-facto, and now de-jure, standards, as content is often unintentionally
written in ways that rely on them before they can be fixed.</p>
<p>Despite all this, efforts have been made to adhere to certain design goals. These are described
in the next few subsections.</p>
<h4>Serializability of script execution</h4>
<!-- NON-NORMATIVE SECTION -->
<p>To avoid exposing Web authors to the complexities of multithreading, the HTML and DOM APIs are
designed such that no script can ever detect the simultaneous execution of other scripts. Even
with <span data-x="Worker">workers</span>, the intent is that the behavior of implementations can
be thought of as completely serializing the execution of all scripts in all <span data-x="browsing
context">browsing contexts</span>.</p>
<p class="note">The <code
data-x="dom-navigator-yieldForStorageUpdates">navigator.yieldForStorageUpdates()</code> method, in
this model, is equivalent to allowing other scripts to run while the calling script is
blocked.</p>
<h4>Compliance with other specifications</h4>
<!-- NON-NORMATIVE SECTION -->
<p>This specification interacts with and relies on a wide variety of other specifications. In
certain circumstances, unfortunately, conflicting needs have led to this specification violating
the requirements of these other specifications. Whenever this has occurred, the transgressions
have each been noted as a "<dfn>willful violation</dfn>", and the reason for the violation has
been noted.</p>
<h4>Extensibility</h4>
<!-- NON-NORMATIVE SECTION -->
<p>HTML has a wide array of extensibility mechanisms that can be used for adding semantics in a
safe manner:</p>
<ul>
<li><p>Authors can use the <code data-x="attr-class">class</code> attribute to extend elements,
effectively creating their own elements, while using the most applicable existing "real" HTML
element, so that browsers and other tools that don't know of the extension can still support it
somewhat well. This is the tack used by microformats, for example.</p></li>
<li><p>Authors can include data for inline client-side scripts or server-side site-wide scripts
to process using the <code data-x="attr-data-*">data-*=""</code> attributes. These are guaranteed
to never be touched by browsers, and allow scripts to include data on HTML elements that scripts
can then look for and process.</p></li>
<li><p>Authors can use the <code data-x="meta">&lt;meta name="" content=""></code> mechanism to
include page-wide metadata by registering <span data-x="concept-meta-extensions">extensions to
the predefined set of metadata names</span>.</p></li>
<li><p>Authors can use the <code data-x="attr-hyperlink-rel">rel=""</code> mechanism to annotate
links with specific meanings by registering <span data-x="concept-rel-extensions">extensions to
the predefined set of link types</span>. This is also used by microformats.</p></li>
<li><p>Authors can embed raw data using the <code data-x="script">&lt;script type=""></code>
mechanism with a custom type, for further handling by inline or server-side scripts.</p></li>
<li><p>Authors can create <span data-x="plugin">plugins</span> and invoke them using the
<code>embed</code> element. This is how Flash works.</p></li>
<li><p>Authors can extend APIs using the JavaScript prototyping mechanism. This is widely used by
script libraries, for instance.</p></li>
<li><p>Authors can use the microdata feature (the <code
data-x="attr-itemscope">itemscope=""</code> and <code data-x="attr-itemprop">itemprop=""</code>
attributes) to embed nested name-value pairs of data to be shared with other applications and
sites.</p></li>
</ul>
<h3>HTML vs XHTML</h3>
<!-- NON-NORMATIVE SECTION -->
<p>This specification defines an abstract language for describing documents and applications, and
some APIs for interacting with in-memory representations of resources that use this language.</p>
<p>The in-memory representation is known as "DOM HTML", or "the DOM" for short.</p>
<p>There are various concrete syntaxes that can be used to transmit resources that use this
abstract language, two of which are defined in this specification.</p>
<p>The first such concrete syntax is the HTML syntax. This is the format suggested for most
authors. It is compatible with most legacy Web browsers. If a document is transmitted with the
<code>text/html</code> <span>MIME type</span>, then it will be processed as an HTML document by
Web browsers. This specification defines the latest HTML syntax, known simply as "HTML".</p>
<p>The second concrete syntax is the XHTML syntax, which is an application of XML. When a document
is transmitted with an <span>XML MIME type</span>, such as <code>application/xhtml+xml</code>,
then it is treated as an XML document by Web browsers, to be parsed by an XML processor. Authors
are reminded that the processing for XML and HTML differs; in particular, even minor syntax errors
will prevent a document labeled as XML from being rendered fully, whereas they would be ignored in
the HTML syntax. This specification defines the latest XHTML syntax, known simply as "XHTML".</p>
<p>The DOM, the HTML syntax, and the XHTML syntax cannot all represent the same content. For
example, namespaces cannot be represented using the HTML syntax, but they are supported in the DOM
and in the XHTML syntax. Similarly, documents that use the <code>noscript</code> feature can be
represented using the HTML syntax, but cannot be represented with the DOM or in the XHTML syntax.
Comments that contain the string "<code data-x="">--&gt;</code>" can only be represented in the
DOM, not in the HTML and XHTML syntaxes.</p>
<h3>Structure of this specification</h3>
<!-- NON-NORMATIVE SECTION -->
<p>This specification is divided into the following major sections:</p>
<dl>
<dt><a href="#introduction">Introduction</a></dt>
<dd>Non-normative materials providing a context for the HTML standard.</dd>
<dt><a href="#infrastructure">Common infrastructure</a></dt>
<dd>The conformance classes, algorithms, definitions, and the common underpinnings of the rest of
the specification.</dd>
<dt><a href="#dom">Semantics, structure, and APIs of HTML documents</a></dt>
<dd>Documents are built from elements. These elements form a tree using the DOM. This section
defines the features of this DOM, as well as introducing the features common to all elements, and
the concepts used in defining elements.</dd>
<dt><a href="#semantics">The elements of HTML</a></dt>
<dd>Each element has a predefined meaning, which is explained in this section. Rules for authors
on how to use the element<span class="nodev">, along with user agent requirements for how to
handle each element,</span> are also given. This includes large signature features of HTML such
as video playback and subtitles, form controls and form submission, and a 2D graphics API known
as the HTML canvas.</dd>
<dt><a href="#microdata">Microdata</a></dt>
<dd>This specification introduces a mechanism for adding machine-readable annotations to
documents, so that tools can extract trees of name-value pairs from the document. This section
describes this mechanism<span class="nodev"> and some algorithms that can be used to convert HTML
documents into other formats</span>. This section also defines some sample Microdata vocabularies
for contact information, calendar events, and licensing works.</dd>
<dt><a href="#editing">User interaction</a></dt>
<dd>HTML documents can provide a number of mechanisms for users to interact with and modify
content, which are described in this section, such as how focus works, and drag-and-drop.</dd>
<dt><a href="#browsers">Loading Web pages</a></dt>
<dd>HTML documents do not exist in a vacuum &mdash; this section defines many of the features
that affect environments that deal with multiple pages, such as Web browsers and offline
caching of Web applications.</dd>
<dt><a href="#webappapis">Web application APIs</a></dt>
<dd>This section introduces basic features for scripting of applications in HTML.</dd>
<dt><a href="#workers">Web workers</a></dt>
<dd>This section defines an API for background threads in JavaScript.</dd>
<dt><a href="#comms">The communication APIs</a></dt>
<dd>This section describes some mechanisms that applications written in HTML can use to
communicate with other applications from different domains running on the same client. It also
introduces a server-push event stream mechanism known as Server Sent Events or
<code>EventSource</code>, and a two-way full-duplex socket protocol for scripts known as Web
Sockets.
</dd>
<dt><a href="#webstorage">Web storage</a></dt>
<dd>This section defines a client-side storage mechanism based on name-value pairs.</dd>
<dt><a href="#syntax">The HTML syntax</a></dt>
<dt><a href="#xhtml">The XHTML syntax</a></dt>
<dd>All of these features would be for naught if they couldn't be represented in a serialized
form and sent to other people, and so these sections define the syntaxes of HTML and XHTML<span
class="nodev">, along with rules for how to parse content using those syntaxes</span>.</dd>
<dt><a href="#rendering">Rendering</a></dt>
<dd>This section defines the default rendering rules for Web browsers.</dd>
</dl>
<p>There are also some appendices, listing <a href="#obsolete">obsolete features</a> and <a
href="#iana">IANA considerations</a>, and several indices.</p>
<h4>How to read this specification</h4>
<p>This specification should be read like all other specifications. First, it should be read
cover-to-cover, multiple times. Then, it should be read backwards at least once. Then it should be
read by picking random sections from the contents list and following all the cross-references.</p>
<p>As described in the conformance requirements section below, this specification describes
conformance criteria for a variety of conformance classes. In particular, there are conformance
requirements that apply to <em>producers</em>, for example authors and the documents they create,
and there are conformance requirements that apply to <em>consumers</em>, for example Web browsers.
They can be distinguished by what they are requiring: a requirement on a producer states what is
allowed, while a requirement on a consumer states how software is to act.</p>
<div class="example">
<p>For example, "the <code data-x="">foo</code> attribute's value must be a <span>valid
integer</span>" is a requirement on producers, as it lays out the allowed values; in contrast,
the requirement "the <code data-x="">foo</code> attribute's value must be parsed using the
<span>rules for parsing integers</span>" is a requirement on consumers, as it describes how to
process the content.</p>
</div>
<p><strong>Requirements on producers have no bearing whatsoever on consumers.</strong></p>
<div class="example">
<p>Continuing the above example, a requirement stating that a particular attribute's value is
constrained to being a <span>valid integer</span> emphatically does <em>not</em> imply anything
about the requirements on consumers. It might be that the consumers are in fact required to treat
the attribute as an opaque string, completely unaffected by whether the value conforms to the
requirements or not. It might be (as in the previous example) that the consumers are required to
parse the value using specific rules that define how invalid (non-numeric in this case) values
are to be processed.</p>
</div>
<h4>Typographic conventions</h4>
<p>This is a definition, requirement, or explanation.</p>
<p class="note">This is a note.</p>
<p class="example">This is an example.</p>
<p class="&#x0058;&#x0058;&#x0058;">This is an open issue.</p>
<p class="warning">This is a warning.</p>
<pre class="idl extract">interface <dfn data-x="">Example</dfn> {
// this is an IDL definition
};</pre>
<dl class="domintro">
<dt><var data-x="">variable</var> = <var data-x="">object</var> . <code data-x="">method</code>( [ <var data-x="">optionalArgument</var> ] )</dt>
<dd>
<p>This is a note to authors describing the usage of an interface.</p>
</dd>
</dl>
<pre class="css">/* this is a CSS fragment */</pre>
<p>The defining instance of a term is marked up like <dfn data-x="x-this">this</dfn>. Uses of that
term are marked up like <span data-x="x-this">this</span> or like <i data-x="x-this">this</i>.</p>
<p>The defining instance of an element, attribute, or API is marked up like <dfn
data-x="x-that"><code>this</code></dfn>. References to that element, attribute, or API are marked
up like <code data-x="x-that">this</code>.</p>
<p>Other code fragments are marked up <code data-x="">like this</code>.</p>
<p>Variables are marked up like <var data-x="">this</var>.</p>
<p>In an algorithm, steps in <span data-x="synchronous section">synchronous sections</span> are
marked with &#x231B;.</p>
<p>In some cases, requirements are given in the form of lists with conditions and corresponding
requirements. In such cases, the requirements that apply to a condition are always the first set
of requirements that follow the condition, even in the case of there being multiple sets of
conditions for those requirements. Such cases are presented as follows:</p>
<dl class="switch">
<dt>This is a condition
<dt>This is another condition
<dd>This is the requirement that applies to the conditions above.
<dt>This is a third condition
<dd>This is the requirement that applies to the third condition.
</dl>
<h3 id="fingerprint">Privacy concerns</h3>
<!-- NON-NORMATIVE SECTION -->
<p>Some features of HTML trade user convenience for a measure of user privacy.</p>
<p>In general, due to the Internet's architecture, a user can be distinguished from another by the
user's IP address. IP addresses do not perfectly match to a user; as a user moves from device to
device, or from network to network, their IP address will change; similarly, NAT routing, proxy
servers, and shared computers enable packets that appear to all come from a single IP address to
actually map to multiple users. Technologies such as onion routing can be used to further
anonymise requests so that requests from a single user at one node on the Internet appear to come
from many disparate parts of the network.</p>
<p>However, the IP address used for a user's requests is not the only mechanism by which a user's
requests could be related to each other. Cookies, for example, are designed specifically to enable
this, and are the basis of most of the Web's session features that enable you to log into a site
with which you have an account.</p>
<p>There are other mechanisms that are more subtle. Certain characteristics of a user's system can
be used to distinguish groups of users from each other; by collecting enough such information, an
individual user's browser's "digital fingerprint" can be computed, which can be as good, if not
better, as an IP address in ascertaining which requests are from the same user.</p>
<p>Grouping requests in this manner, especially across multiple sites, can be used for both benign
(and even arguably positive) purposes, as well as for malevolent purposes. An example of a
reasonably benign purpose would be determining whether a particular person seems to prefer sites
with dog illustrations as opposed to sites with cat illustrations (based on how often they visit
the sites in question) and then automatically using the preferred illustrations on subsequent
visits to participating sites. Malevolent purposes, however, could include governments combining
information such as the person's home address (determined from the addresses they use when getting
driving directions on one site) with their apparent political affiliations (determined by
examining the forum sites that they participate in) to determine whether the person should be
prevented from voting in an election.</p>
<p>Since the malevolent purposes can be remarkably evil, user agent implementors are encouraged to
consider how to provide their users with tools to minimise leaking information that could be used
to fingerprint a user.</p>
<p>Unfortunately, as the first paragraph in this section implies, sometimes there is great benefit
to be derived from exposing the very information that can also be used for fingerprinting
purposes, so it's not as easy as simply blocking all possible leaks. For instance, the ability to
log into a site to post under a specific identity requires that the user's requests be
identifiable as all being from the same user, more or less by definition. More subtly, though,
information such as how wide text is, which is necessary for many effects that involve drawing
text onto a canvas (e.g. any effect that involves drawing a border around the text) also leaks
information that can be used to group a user's requests. (In this case, by potentially exposing,
via a brute force search, which fonts a user has installed, information which can vary
considerably from user to user.)</p>
<p>Features in this specification which can be <dfn data-x="fingerprinting vector">used to
fingerprint the user</dfn> are marked as this paragraph is.
<!--INSERT FINGERPRINT-->
</p>
<p>Other features in the platform can be used for the same purpose, though, including, though not
limited to:</p>
<ul>
<li>The exact list of which features a user agents supports.</li>
<li>The maximum allowed stack depth for recursion in script.</li>
<li>Features that describe the user's environment, like Media Queries and the <code>Screen</code>
object. <a href="#refsMQ">[MQ]</a> <a href="#refsCSSOMVIEW">[CSSOMVIEW]</a></li>
<li>The user's time zone.</li>
</ul>
<h3>A quick introduction to HTML</h3>
<!-- NON-NORMATIVE SECTION -->
<p>A basic HTML document looks like this:</p>
<pre id="intro-early-example">&lt;!DOCTYPE html>
&lt;html>
&lt;head>
&lt;title>Sample page&lt;/title>
&lt;/head>
&lt;body>
&lt;h1>Sample page&lt;/h1>
&lt;p>This is a &lt;a href="demo.html">simple&lt;/a> sample.&lt;/p>
&lt;!-- this is a comment -->
&lt;/body>
&lt;/html></pre>
<p>HTML documents consist of a tree of elements and text. Each element is denoted in the source by
a <span data-x="syntax-start-tag">start tag</span>, such as "<code data-x="">&lt;body></code>", and
an <span data-x="syntax-end-tag">end tag</span>, such as "<code data-x="">&lt;/body></code>".
(Certain start tags and end tags can in certain cases be <span
data-x="syntax-tag-omission">omitted</span> and are implied by other tags.)</p>
<p>Tags have to be nested such that elements are all completely within each other, without
overlapping:</p>
<pre class="bad">&lt;p>This is &lt;em>very &lt;strong>wrong&lt;/em>!&lt;/strong>&lt;/p></pre>
<pre>&lt;p>This &lt;em>is &lt;strong>correct&lt;/strong>.&lt;/em>&lt;/p></pre>
<p>This specification defines a set of elements that can be used in HTML, along with rules about
the ways in which the elements can be nested.</p>
<p>Elements can have attributes, which control how the elements work. In the example below, there
is a <span>hyperlink</span>, formed using the <code>a</code> element and its <code
data-x="attr-hyperlink-href">href</code> attribute:</p>
<pre>&lt;a href="demo.html">simple&lt;/a></pre>
<p><span data-x="syntax-attributes">Attributes</span> are placed inside the start tag, and consist
of a <span data-x="syntax-attribute-name">name</span> and a <span
data-x="syntax-attribute-value">value</span>, separated by an "<code data-x="">=</code>" character.
The attribute value can remain <a href="#unquoted">unquoted</a> if it doesn't contain <span
data-x="space character">space characters</span> or any of <code data-x="">"</code> <code
data-x="">'</code> <code data-x="">`</code> <code data-x="">=</code> <code data-x="">&lt;</code> or
<code data-x="">&gt;</code>. Otherwise, it has to be quoted using either single or double quotes.
The value, along with the "<code data-x="">=</code>" character, can be omitted altogether if the
value is the empty string.</p>
<pre>&lt;!-- empty attributes -->
&lt;input name=address disabled>
&lt;input name=address disabled="">
&lt;!-- attributes with a value -->
&lt;input name=address maxlength=200>
&lt;input name=address maxlength='200'>
&lt;input name=address maxlength="200"></pre>
<p>HTML user agents (e.g. Web browsers) then <i>parse</i> this markup, turning it into a DOM
(Document Object Model) tree. A DOM tree is an in-memory representation of a document.</p>
<p>DOM trees contain several kinds of nodes, in particular a <code>DocumentType</code> node,
<code>Element</code> nodes, <code>Text</code> nodes, <code>Comment</code> nodes, and in some cases
<code>ProcessingInstruction</code> nodes.</p>
<p>The <a href="#intro-early-example">markup snippet at the top of this section</a> would be
turned into the following DOM tree:</p>
<ul class="domTree"><li class="t10">DOCTYPE: <code data-x="">html</code></li><li class="t1"><code>html</code><ul><li class="t1"><code>head</code><ul><li class="t3"><code>#text</code>: <span data-x="">&#x23CE;&#x2423;&#x2423;</span></li><li class="t1"><code>title</code><ul><li class="t3"><code>#text</code>: <span data-x="">Sample page</span></li></ul></li><li class="t3"><code>#text</code>: <span data-x="">&#x23CE;&#x2423;</span></li></ul></li><li class="t3"><code>#text</code>: <span data-x="">&#x23CE;&#x2423;</span></li><li class="t1"><code>body</code><ul><li class="t3"><code>#text</code>: <span data-x="">&#x23CE;&#x2423;&#x2423;</span></li><li class="t1"><code>h1</code><ul><li class="t3"><code>#text</code>: <span data-x="">Sample page</span></li></ul></li><li class="t3"><code>#text</code>: <span data-x="">&#x23CE;&#x2423;&#x2423;</span></li><li class="t1"><code>p</code><ul><li class="t3"><code>#text</code>: <span data-x="">This is a <!--grammar-check-override--></span></li><li class="t1"><code>a</code> <span data-x="" class="t2"><code class="attribute name">href</code>="<code class="attribute value">demo.html</code>"</span><ul><li class="t3"><code>#text</code>: <span data-x="">simple</span></li></ul></li><li class="t3"><code>#text</code>: <span data-x=""> sample.</span></li></ul></li><li class="t3"><code>#text</code>: <span data-x="">&#x23CE;&#x2423;&#x2423;</span></li><li class="t8"><code>#comment</code>: <span data-x=""> this is a comment </span></li><li class="t3"><code>#text</code>: <span data-x="">&#x23CE;&#x2423;&#x23CE;</span></li></ul></li></ul></li></ul>
<p>The <span>root element</span> of this tree is the <code>html</code> element, which is the
element always found at the root of HTML documents. It contains two elements, <code>head</code>
and <code>body</code>, as well as a <code>Text</code> node between them.</p>
<p>There are many more <code>Text</code> nodes in the DOM tree than one would initially expect,
because the source contains a number of spaces (represented here by "&#x2423;") and line breaks
("&#x23CE;") that all end up as <code>Text</code> nodes in the DOM. However, for historical
reasons not all of the spaces and line breaks in the original markup appear in the DOM. In
particular, all the whitespace before <code>head</code> start tag ends up being dropped silently,
and all the whitespace after the <code>body</code> end tag ends up placed at the end of the
<code>body</code>.</p>
<p>The <code>head</code> element contains a <code>title</code> element, which itself contains a
<code>Text</code> node with the text "Sample page". Similarly, the <code>body</code> element
contains an <code>h1</code> element, a <code>p</code> element, and a comment.</p>
<hr>
<p>This DOM tree can be manipulated from scripts in the page. Scripts (typically in JavaScript)
are small programs that can be embedded using the <code>script</code> element or using <span>event
handler content attributes</span>. For example, here is a form with a script that sets the value
of the form's <code>output</code> element to say "Hello World":</p>
<pre>&lt;<span>form</span> <span data-x="attr-form-name">name</span>="main">
Result: &lt;<span>output</span> <span data-x="attr-fe-name">name</span>="result">&lt;/output>
&lt;<span>script</span>>
<span data-x="Document">document</span>.<span data-x="dom-document-forms">forms</span>.main.<span data-x="dom-form-elements">elements</span>.result.<span data-x="dom-output-value">value</span> = 'Hello World';
&lt;/script>
&lt;/form></pre>
<p>Each element in the DOM tree is represented by an object, and these objects have APIs so that
they can be manipulated. For instance, a link (e.g. the <code>a</code> element in the tree above)
can have its "<code data-x="attr-hyperlink-href">href</code>" attribute changed in several
ways:</p>
<pre>var a = <span data-x="Document">document</span>.<span data-x="dom-document-links">links</span>[0]; // obtain the first link in the document
a.<span data-x="dom-url-href">href</span> = 'sample.html'; // change the destination URL of the link
a.<span data-x="dom-url-protocol">protocol</span> = 'https'; // change just the scheme part of the URL
a.setAttribute('href', 'http://example.com/'); // change the content attribute directly</pre>
<p>Since DOM trees are used as the way to represent HTML documents when they are processed and
presented by implementations (especially interactive implementations like Web browsers), this
specification is mostly phrased in terms of DOM trees, instead of the markup described above.</p>
<hr>
<p>HTML documents represent a media-independent description of interactive content. HTML documents
might be rendered to a screen, or through a speech synthesiser, or on a braille display. To
influence exactly how such rendering takes place, authors can use a styling language such as
CSS.</p>
<p>In the following example, the page has been made yellow-on-blue using CSS.</p>
<pre>&lt;!DOCTYPE html>
&lt;html>
&lt;head>
&lt;title>Sample styled page&lt;/title>
&lt;style>
body { background: navy; color: yellow; }
&lt;/style>
&lt;/head>
&lt;body>
&lt;h1>Sample styled page&lt;/h1>
&lt;p>This page is just a demo.&lt;/p>
&lt;/body>
&lt;/html></pre>
<p>For more details on how to use HTML, authors are encouraged to consult tutorials and guides.
Some of the examples included in this specification might also be of use, but the novice author is
cautioned that this specification, by necessity, defines the language with a level of detail that
might be difficult to understand at first.</p>
<!--ADD-TOPIC:Security-->
<h4>Writing secure applications with HTML</h4>
<!-- NON-NORMATIVE SECTION -->
<p>When HTML is used to create interactive sites, care needs to be taken to avoid introducing
vulnerabilities through which attackers can compromise the integrity of the site itself or of the
site's users.</p>
<p>A comprehensive study of this matter is beyond the scope of this document, and authors are
strongly encouraged to study the matter in more detail. However, this section attempts to provide
a quick introduction to some common pitfalls in HTML application development.</p>
<p>The security model of the Web is based on the concept of "origins", and correspondingly many of
the potential attacks on the Web involve cross-origin actions. <a
href="#refsORIGIN">[ORIGIN]</a></p>
<dl>
<dt>Not validating user input</dt>
<dt>Cross-site scripting (XSS)</dt>
<dt>SQL injection</dt>
<dd>
<p>When accepting untrusted input, e.g. user-generated content such as text comments, values in
URL parameters, messages from third-party sites, etc, it is imperative that the data be
validated before use, and properly escaped when displayed. Failing to do this can allow a
hostile user to perform a variety of attacks, ranging from the potentially benign, such as
providing bogus user information like a negative age, to the serious, such as running scripts
every time a user looks at a page that includes the information, potentially propagating the
attack in the process, to the catastrophic, such as deleting all data in the server.</p>
<p>When writing filters to validate user input, it is imperative that filters always be
whitelist-based, allowing known-safe constructs and disallowing all other input. Blacklist-based
filters that disallow known-bad inputs and allow everything else are not secure, as not
everything that is bad is yet known (for example, because it might be invented in the
future).</p>
<div class="example">
<p>For example, suppose a page looked at its URL's query string to determine what to display,
and the site then redirected the user to that page to display a message, as in:</p>
<pre>&lt;ul>
&lt;li>&lt;a href="message.cgi?say=Hello">Say Hello&lt;/a>
&lt;li>&lt;a href="message.cgi?say=Welcome">Say Welcome&lt;/a>
&lt;li>&lt;a href="message.cgi?say=Kittens">Say Kittens&lt;/a>
&lt;/ul></pre>
<p>If the message was just displayed to the user without escaping, a hostile attacker could
then craft a URL that contained a script element:</p>
<p>If the attacker then convinced a victim user to visit this page, a script of the attacker's
choosing would run on the page. Such a script could do any number of hostile actions, limited
only by what the site offers: if the site is an e-commerce shop, for instance, such a script
could cause the user to unknowingly make arbitrarily many unwanted purchases.</p>
<p>This is called a cross-site scripting attack.</p>
</div>
<p>There are many constructs that can be used to try to trick a site into executing code. Here
are some that authors are encouraged to consider when writing whitelist filters:</p>
<ul>
<li>When allowing harmless-seeming elements like <code>img</code>, it is important to whitelist
any provided attributes as well. If one allowed all attributes then an attacker could, for
instance, use the <code data-x="handler-onload">onload</code> attribute to run arbitrary
script.</li>
<li>When allowing URLs to be provided (e.g. for links), the scheme of each URL also needs to be
explicitly whitelisted, as there are many schemes that can be abused. The most prominent
example is "<code data-x="javascript-protocol">javascript:</code>", but user agents can
implement (and indeed, have historically implemented) others.</li> <!-- IE had vbscript:,
Netscape had livescript:, etc. -->
<li>Allowing a <code>base</code> element to be inserted means any <code>script</code> elements
in the page with relative links can be hijacked, and similarly that any form submissions can
get redirected to a hostile site.</li>
</ul>
</dd>
<dt>Cross-site request forgery (CSRF)</dt>
<dd>
<p>If a site allows a user to make form submissions with user-specific side-effects, for example
posting messages on a forum under the user's name, making purchases, or applying for a passport,
it is important to verify that the request was made by the user intentionally, rather than by
another site tricking the user into making the request unknowingly.</p>
<p>This problem exists because HTML forms can be submitted to other origins.</p>
<p>Sites can prevent such attacks by populating forms with user-specific hidden tokens, or by
checking <code data-x="http-origin">Origin</code> headers on all requests.</p>
</dd>
<dt>Clickjacking</dt>
<dd>
<p>A page that provides users with an interface to perform actions that the user might not wish
to perform needs to be designed so as to avoid the possibility that users can be tricked into
activating the interface.</p>
<p>One way that a user could be so tricked is if a hostile site places the victim site in a
small <code>iframe</code> and then convinces the user to click, for instance by having the user
play a reaction game. Once the user is playing the game, the hostile site can quickly position
the iframe under the mouse cursor just as the user is about to click, thus tricking the user
into clicking the victim site's interface.</p>
<p>To avoid this, sites that do not expect to be used in frames are encouraged to only enable
their interface if they detect that they are not in a frame (e.g. by comparing the <code
data-x="dom-window">window</code> object to the value of the <code data-x="dom-top">top</code>
attribute).</p>
</dd>
</dl>
<!--REMOVE-TOPIC:Security-->
<h4>Common pitfalls to avoid when using the scripting APIs</h4>
<!-- NON-NORMATIVE SECTION -->
<p>Scripts in HTML have "run-to-completion" semantics, meaning that the browser will generally run
the script uninterrupted before doing anything else, such as firing further events or continuing
to parse the document.</p>
<p>On the other hand, parsing of HTML files happens asynchronously and incrementally, meaning that
the parser can pause at any point to let scripts run. This is generally a good thing, but it does
mean that authors need to be careful to avoid hooking event handlers after the events could have
possibly fired.</p>
<p>There are two techniques for doing this reliably: use <span>event handler content
attributes</span>, or create the element and add the event handlers in the same script. The latter
is safe because, as mentioned earlier, scripts are run to completion before further events can
fire.</p>
<div class="example">
<p>One way this could manifest itself is with <code>img</code> elements and the <code
data-x="event-load">load</code> event. The event could fire as soon as the element has been
parsed, especially if the image has already been cached (which is common).</p>
<p>Here, the author uses the <code data-x="handler-onload">onload</code> handler on an
<code>img</code> element to catch the <code data-x="event-load">load</code> event:</p>
<pre>&lt;img src="games.png" alt="Games" onload="gamesLogoHasLoaded(event)"></pre>
<p>If the element is being added by script, then so long as the event handlers are added in the
same script, the event will still not be missed:</p>
<pre>&lt;script>
var img = new Image();
img.src = 'games.png';
img.alt = 'Games';
img.onload = gamesLogoHasLoaded;
// img.addEventListener('load', gamesLogoHasLoaded, false); // would work also
&lt;/script></pre>
<p>However, if the author first created the <code>img</code> element and then in a separate
script added the event listeners, there's a chance that the <code data-x="event-load">load</code>
event would be fired in between, leading it to be missed:</p>
<pre class="bad">&lt;!-- Do not use this style, it has a race condition! -->
&lt;img id="games" src="games.png" alt="Games">
&lt;!-- the 'load' event might fire here while the parser is taking a
break, in which case you will not see it! -->
&lt;script>
var img = document.getElementById('games');
img.onload = gamesLogoHasLoaded; // might never fire!
&lt;/script></pre>
</div>
<h4>How to catch mistakes when writing HTML: validators and conformance checkers</h4>
<!-- NON-NORMATIVE SECTION -->
<p>Authors are encouraged to make use of conformance checkers (also known as <i>validators</i>) to
catch common mistakes. The WHATWG maintains a list of such tools at: <a
<h3>Conformance requirements for authors</h3>
<!-- NON-NORMATIVE SECTION -->
<p>Unlike previous versions of the HTML specification, this specification defines in some detail
the required processing for invalid documents as well as valid documents.</p> <!-- This has led to
some questioning the purpose of conformance criteria: if there is no ambiguity in how something
will be processed, why disallow it? -->
<p>However, even though the processing of invalid content is in most cases well-defined,
conformance requirements for documents are still important: in practice, interoperability (the
situation in which all implementations process particular content in a reliable and identical or
equivalent way) is not the only goal of document conformance requirements. This section details
some of the more common reasons for still distinguishing between a conforming document and one
with errors.</p>
<h4>Presentational markup</h4>
<!-- NON-NORMATIVE SECTION -->
<p>The majority of presentational features from previous versions of HTML are no longer allowed.
Presentational markup in general has been found to have a number of problems:</p>
<dl>
<dt>The use of presentational elements leads to poorer accessibility</dt>
<dd>
<p>While it is possible to use presentational markup in a way that provides users of assistive
technologies (ATs) with an acceptable experience (e.g. using ARIA), doing so is significantly
more difficult than doing so when using semantically-appropriate markup. Furthermore, even using
such techniques doesn't help make pages accessible for non-AT non-graphical users, such as users
of text-mode browsers.</p>
<p>Using media-independent markup, on the other hand, provides an easy way for documents to be
authored in such a way that they work for more users (e.g. text browsers).</p>
</dd>
<dt>Higher cost of maintenance</dt>
<dd>
<p>It is significantly easier to maintain a site written in such a way that the markup is
style-independent. For example, changing the colour of a site that uses
<code>&lt;font&nbsp;color=""></code> throughout requires changes across the entire site, whereas
a similar change to a site based on CSS can be done by changing a single file.</p>
</dd>
<dt>Larger document sizes</dt>
<dd>
<p>Presentational markup tends to be much more redundant, and thus results in larger document
sizes.</p>
</dd>
</dl>
<p>For those reasons, presentational markup has been removed from HTML in this version. This
change should not come as a surprise; HTML4 deprecated presentational markup many years ago and
provided a mode (HTML4 Transitional) to help authors move away from presentational markup; later,
XHTML 1.1 went further and obsoleted those features altogether.</p>
<p>The only remaining presentational markup features in HTML are the <code
data-x="attr-style">style</code> attribute and the <code>style</code> element. Use of the <code
data-x="attr-style">style</code> attribute is somewhat discouraged in production environments, but
it can be useful for rapid prototyping (where its rules can be directly moved into a separate
style sheet later) and for providing specific styles in unusual cases where a separate style sheet
would be inconvenient. Similarly, the <code>style</code> element can be useful in syndication or
for page-specific styles, but in general an external style sheet is likely to be more convenient
when the styles apply to multiple pages.</p>
<p>It is also worth noting that some elements that were previously presentational have been
redefined in this specification to be media-independent: <code>b</code>, <code>i</code>,
<code>hr</code>, <code>s</code>, <code>small</code>, and <code>u</code>.</p>
<h4>Syntax errors</h4>
<!-- NON-NORMATIVE SECTION -->
<p>The syntax of HTML is constrained to avoid a wide variety of problems.</p>
<dl>
<dt>Unintuitive error-handling behavior</dt>
<dd>
<p>Certain invalid syntax constructs, when parsed, result in DOM trees that are highly
unintuitive.</p>
<div class="example">
<p>For example, the following markup fragment results in a DOM with an <code>hr</code> element
that is an <em>earlier</em> sibling of the corresponding <code>table</code> element:</p>
<pre class="bad">&lt;table>&lt;hr>...</pre>
</div>
</dd>
<dt>Errors with optional error recovery</dt>
<dd>
<p>To allow user agents to be used in controlled environments without having to implement the
more bizarre and convoluted error handling rules, user agents are permitted to fail whenever
encountering a <span>parse error</span>.</p>
</dd>
<dt>Errors where the error-handling behavior is not compatible with streaming user agents</dt>
<dd>
<p>Some error-handling behavior, such as the behavior for the <code
data-x="">&lt;table>&lt;hr>...</code> example mentioned above, are incompatible with streaming
user agents (user agents that process HTML files in one pass, without storing state). To avoid
interoperability problems with such user agents, any syntax resulting in such behavior is
considered invalid.</p>
</dd>
<dt>Errors that can result in infoset coercion</dt>
<dd>
<p>When a user agent based on XML is connected to an HTML parser, it is possible that certain
invariants that XML enforces, such as comments never containing two consecutive hyphens, will be
violated by an HTML file. Handling this can require that the parser coerce the HTML DOM into an
XML-compatible infoset. Most syntax constructs that require such handling are considered
invalid.</p>
</dd>
<dt>Errors that result in disproportionally poor performance</dt>
<dd>
<p>Certain syntax constructs can result in disproportionally poor performance. To discourage the
use of such constructs, they are typically made non-conforming.</p>
<div class="example">
<p>For example, the following markup results in poor performance, since all the unclosed
<code>i</code> elements have to be reconstructed in each paragraph, resulting in progressively
more elements in each paragraph:</p>
<pre class="bad">&lt;p>&lt;i>He dreamt.
&lt;p>&lt;i>He dreamt that he ate breakfast.
&lt;p>&lt;i>Then lunch.
&lt;p>&lt;i>And finally dinner.</pre>
<p>The resulting DOM for this fragment would be:</p>
<ul class="domTree"><li class="t1"><code>p</code><ul><li class="t1"><code>i</code><ul><li class="t3"><code>#text</code>: <span data-x="">He dreamt.</span></li></ul></li></ul></li><li class="t1"><code>p</code><ul><li class="t1"><code>i</code><ul><li class="t1"><code>i</code><ul><li class="t3"><code>#text</code>: <span data-x="">He dreamt that he ate breakfast.</span></li></ul></li></ul></li></ul></li><li class="t1"><code>p</code><ul><li class="t1"><code>i</code><ul><li class="t1"><code>i</code><ul><li class="t1"><code>i</code><ul><li class="t3"><code>#text</code>: <span data-x="">Then lunch.</span></li></ul></li></ul></li></ul></li></ul></li><li class="t1"><code>p</code><ul><li class="t1"><code>i</code><ul><li class="t1"><code>i</code><ul><li class="t1"><code>i</code><ul><li class="t1"><code>i</code><ul><li class="t3"><code>#text</code>: <span data-x="">And finally dinner.</span></li></ul></li></ul></li></ul></li></ul></li></ul></li></ul>
</div>
</dd>
<dt>Errors involving fragile syntax constructs</dt>
<dd>
<p>There are syntax constructs that, for historical reasons, are relatively fragile. To help
reduce the number of users who accidentally run into such problems, they are made
non-conforming.</p>
<div class="example">
<p>For example, the parsing of certain named character references in attributes happens even
with the closing semicolon being omitted. It is safe to include an ampersand followed by
letters that do not form a named character reference, but if the letters are changed to a
string that <em>does</em> form a named character reference, they will be interpreted as that
character instead.</p>
<p>In this fragment, the attribute's value is "<code data-x="">?bill&amp;ted</code>":</p>
<pre class="bad">&lt;a href="?bill&amp;ted">Bill and Ted&lt;/a></pre>
<p>In the following fragment, however, the attribute's value is actually "<code
data-x="">?art&copy;</code>", <em>not</em> the intended "<code data-x="">?art&amp;copy</code>",
because even without the final semicolon, "<code data-x="">&amp;copy</code>" is handled the same
as "<code data-x="">&amp;copy;</code>" and thus gets interpreted as "<code
data-x="">&copy;</code>":</p>
<pre class="bad">&lt;a href="?art&amp;copy">Art and Copy&lt;/a></pre>
<p>To avoid this problem, all named character references are required to end with a semicolon,
and uses of named character references without a semicolon are flagged as errors.</p>
<p>Thus, the correct way to express the above cases is as
follows:</p>
<pre>&lt;a href="?bill&amp;ted">Bill and Ted&lt;/a> &lt;!-- &amp;ted is ok, since it's not a named character reference --></pre>
<pre>&lt;a href="?art&amp;amp;copy">Art and Copy&lt;/a> &lt;!-- the &amp; has to be escaped, since &amp;copy <em>is</em> a named character reference --></pre>
</div>
</dd>
<dt>Errors involving known interoperability problems in legacy user agents</dt>
<dd>
<p>Certain syntax constructs are known to cause especially subtle or serious problems in legacy
user agents, and are therefore marked as non-conforming to help authors avoid them.</p>
<div class="example">
<p>For example, this is why the U+0060 GRAVE ACCENT character (`) is not allowed in unquoted
attributes. In certain legacy user agents, <!-- namely IE --> it is sometimes treated as a
quote character.</p>
</div>
<div class="example">
<p>Another example of this is the DOCTYPE, which is required to trigger <span>no-quirks
mode</span>, because the behavior of legacy user agents in <span>quirks mode</span> is often
largely undocumented.</p>
</div>
</dd>
<!--ADD-TOPIC:Security-->
<dt>Errors that risk exposing authors to security attacks</dt>
<dd>
<p>Certain restrictions exist purely to avoid known security problems.</p>
<div class="example">
<p>For example, the restriction on using UTF-7 exists purely to avoid authors falling prey to a
known cross-site-scripting attack using UTF-7. <a href="#refsUTF7">[UTF7]</a></p>
</div>
</dd>
<!--REMOVE-TOPIC:Security-->
<dt>Cases where the author's intent is unclear</dt>
<dd>
<p>Markup where the author's intent is very unclear is often made non-conforming. Correcting
these errors early makes later maintenance easier.</p>
<div class="example">
<p>For example, it is unclear whether the author intended the following to be an
<code>h1</code> heading or an <code>h2</code> heading:</p>
<pre class="bad">&lt;h1>Contact details&lt;/h2></pre>
</div>
</dd>
<dt>Cases that are likely to be typos</dt>
<dd>
<p>When a user makes a simple typo, it is helpful if the error can be caught early, as this can
save the author a lot of debugging time. This specification therefore usually considers it an
error to use element names, attribute names, and so forth, that do not match the names defined
in this specification.</p>
<div class="example">
<p>For example, if the author typed <code>&lt;capton></code> instead of
<code>&lt;caption></code>, this would be flagged as an error and the author could correct the
typo immediately.</p>
</div>
</dd>
<dt>Errors that could interfere with new syntax in the future</dt>
<dd>
<p>In order to allow the language syntax to be extended in the future, certain otherwise
harmless features are disallowed.</p>
<div class="example">
<p>For example, "attributes" in end tags are ignored currently, but they are invalid, in case a
future change to the language makes use of that syntax feature without conflicting with
already-deployed (and valid!) content.</p>
</div>
</dd>
</dl>
<p>Some authors find it helpful to be in the practice of always quoting all attributes and always
including all optional tags, preferring the consistency derived from such custom over the minor
benefits of terseness afforded by making use of the flexibility of the HTML syntax. To aid such
authors, conformance checkers can provide modes of operation wherein such conventions are
enforced.</p>
<h4>Restrictions on content models and on attribute values</h4>
<!-- NON-NORMATIVE SECTION -->
<p>Beyond the syntax of the language, this specification also places restrictions on how elements
and attributes can be specified. These restrictions are present for similar reasons:</p>
<dl>
<dt>Errors involving content with dubious semantics</dt>
<dd>
<p>To avoid misuse of elements with defined meanings, content models are defined that restrict
how elements can be nested when such nestings would be of dubious value.</p>
<p class="example">For example, this specification disallows nesting a <code>section</code>
element inside a <code>kbd</code> element, since it is highly unlikely for an author to indicate
that an entire section should be keyed in.</p>
</dd>
<dt>Errors that involve a conflict in expressed semantics</dt>
<dd>
<p>Similarly, to draw the author's attention to mistakes in the use of elements, clear
contradictions in the semantics expressed are also considered conformance errors.</p>
<div class="example">
<p>In the fragments below, for example, the semantics are nonsensical: a separator cannot
simultaneously be a cell, nor can a radio button be a progress bar.</p>
<pre class="bad">&lt;hr role="cell"></pre>
<pre class="bad">&lt;input type=radio role=progressbar></pre>
</div>
<p class="example">Another example is the restrictions on the content models of the
<code>ul</code> element, which only allows <code>li</code> element children. Lists by definition
consist just of zero or more list items, so if a <code>ul</code> element contains something
other than an <code>li</code> element, it's not clear what was meant.</p>
</dd>
<dt>Cases where the default styles are likely to lead to confusion</dt>
<dd>
<p>Certain elements have default styles or behaviors that make certain combinations likely to
lead to confusion. Where these have equivalent alternatives without this problem, the confusing
combinations are disallowed.</p>
<p class="example">For example, <code>div</code> elements are rendered as block boxes, and
<code>span</code> elements as inline boxes. Putting a block box in an inline box is
unnecessarily confusing; since either nesting just <code>div</code> elements, or nesting just
<code>span</code> elements, or nesting <code>span</code> elements inside <code>div</code>
elements all serve the same purpose as nesting a <code>div</code> element in a <code>span</code>
element, but only the latter involves a block box in an inline box, the latter combination is
disallowed.</p>
<p class="example">Another example would be the way <span>interactive content</span> cannot be
nested. For example, a <code>button</code> element cannot contain a <code>textarea</code>
element. This is because the default behavior of such nesting interactive elements would be
highly confusing to users. Instead of nesting these elements, they can be placed side by
side.</p>
</dd>
<dt>Errors that indicate a likely misunderstanding of the specification</dt>
<dd>
<p>Sometimes, something is disallowed because allowing it would likely cause author
confusion.</p>
<p class="example">For example, setting the <code data-x="attr-fe-disabled">disabled</code>
attribute to the value "<code data-x="">false</code>" is disallowed, because despite the
appearance of meaning that the element is enabled, it in fact means that the element is
<em>disabled</em> (what matters for implementations is the presence of the attribute, not its
value).</p>
</dd>
<dt>Errors involving limits that have been imposed merely to simplify the language</dt>
<dd>
<p>Some conformance errors simplify the language that authors need to learn.</p>
<p class="example">For example, the <code>area</code> element's <code
data-x="attr-area-shape">shape</code> attribute, despite accepting both <code
data-x="attr-area-shape-keyword-circ">circ</code> and <code
data-x="attr-area-shape-keyword-circle">circle</code> values in practice as synonyms, disallows
the use of the <code data-x="attr-area-shape-keyword-circ">circ</code> value, so as to simplify
tutorials and other learning aids. There would be no benefit to allowing both, but it would
cause extra confusion when teaching the language.</p>
</dd>
<dt>Errors that involve peculiarities of the parser</dt>
<dd>
<p>Certain elements are parsed in somewhat eccentric ways (typically for historical reasons),
and their content model restrictions are intended to avoid exposing the author to these
issues.</p>
<div class="example">
<p>For example, a <code>form</code> element isn't allowed inside <span>phrasing content</span>,
because when parsed as HTML, a <code>form</code> element's start tag will imply a
<code>p</code> element's end tag. Thus, the following markup results in two <span
data-x="paragraph">paragraphs</span>, not one:</p>
<pre>&lt;p>Welcome. &lt;form>&lt;label>Name:&lt;/label> &lt;input>&lt;/form></pre>
<p>It is parsed exactly like the following:</p>
<pre>&lt;p>Welcome. &lt;/p>&lt;form>&lt;label>Name:&lt;/label> &lt;input>&lt;/form></pre>
</div>
</dd>
<dt>Errors that would likely result in scripts failing in hard-to-debug ways</dt>
<dd>
<p>Some errors are intended to help prevent script problems that would be hard to debug.</p>
<p class="example">This is why, for instance, it is non-conforming to have two <code
data-x="attr-id">id</code> attributes with the same value. Duplicate IDs lead to the wrong
element being selected, with sometimes disastrous effects whose cause is hard to determine.</p>
</dd>
<dt>Errors that waste authoring time</dt>
<dd>
<p>Some constructs are disallowed because historically they have been the cause of a lot of
wasted authoring time, and by encouraging authors to avoid making them, authors can save time in
future efforts.</p>
<p class="example">For example, a <code>script</code> element's <code
data-x="attr-script-src">src</code> attribute causes the element's contents to be ignored.
However, this isn't obvious, especially if the element's contents appear to be executable script
&mdash; which can lead to authors spending a lot of time trying to debug the inline script
without realizing that it is not executing. To reduce this problem, this specification makes it
non-conforming to have executable script in a <code>script</code> element when the <code
data-x="attr-script-src">src</code> attribute is present. This means that authors who are
validating their documents are less likely to waste time with this kind of mistake.</p>
</dd>
<dt>Errors that involve areas that affect authors migrating to and from XHTML</dt>
<dd>
<p>Some authors like to write files that can be interpreted as both XML and HTML with similar
results. Though this practice is discouraged in general due to the myriad of subtle
complications involved (especially when involving scripting, styling, or any kind of automated
serialisation), this specification has a few restrictions intended to at least somewhat mitigate
the difficulties. This makes it easier for authors to use this as a transitionary step when
migrating between HTML and XHTML.</p>
<p class="example">For example, there are somewhat complicated rules surrounding the <code
data-x="attr-lang">lang</code> and <code data-x="attr-xml-lang">xml:lang</code> attributes
intended to keep the two synchronized.</p>
<p class="example">Another example would be the restrictions on the values of <code
data-x="">xmlns</code> attributes in the HTML serialisation, which are intended to ensure that
elements in conforming documents end up in the same namespaces whether processed as HTML or
XML.</p>
</dd>
<dt>Errors that involve areas reserved for future expansion</dt>
<dd>
<p>As with the restrictions on the syntax intended to allow for new syntax in future revisions
of the language, some restrictions on the content models of elements and values of attributes
are intended to allow for future expansion of the HTML vocabulary.</p>
<p class="example">For example, limiting the values of the <code
data-x="attr-hyperlink-target">target</code> attribute that start with an U+005F LOW LINE
character (_) to only specific predefined values allows new predefined values to be introduced
at a future time without conflicting with author-defined values.</p>
</dd>
<dt>Errors that indicate a mis-use of other specifications</dt>
<dd>
<p>Certain restrictions are intended to support the restrictions made by other
specifications.</p>
<p class="example">For example, requiring that attributes that take media queries use only
<em>valid</em> media queries reinforces the importance of following the conformance rules of
that specification.</p>
</dd>
</dl>
<h3>Suggested reading</h3>
<!-- NON-NORMATIVE SECTION -->
<p>The following documents might be of interest to readers of this specification.</p>
<dl>
<dt><cite>Character Model for the World Wide Web 1.0: Fundamentals</cite> <a href="#refsCHARMOD">[CHARMOD]</a></dt>
<dd><blockquote><p>This Architectural Specification provides authors of specifications, software
developers, and content developers with a common reference for interoperable text manipulation on
the World Wide Web, building on the Universal Character Set, defined jointly by the Unicode
Standard and ISO/IEC 10646. Topics addressed include use of the terms 'character', 'encoding' and
'string', a reference processing model, choice and identification of character encodings,
character escaping, and string indexing.</p></blockquote></dd>
<dt><cite>Unicode Security Considerations</cite> <a href="#refsUTR36">[UTR36]</a></dt>
<dd><blockquote><p>Because Unicode contains such a large number of characters and incorporates
the varied writing systems of the world, incorrect usage can expose programs or systems to
possible security attacks. This is especially important as more and more products are
internationalized. This document describes some of the security considerations that programmers,
system analysts, standards developers, and users should take into account, and provides specific
recommendations to reduce the risk of problems.</p></blockquote></dd>
<dt><cite>Web Content Accessibility Guidelines (WCAG) 2.0</cite> <a href="#refsWCAG">[WCAG]</a></dt>
<dd><blockquote><p>Web Content Accessibility Guidelines (WCAG) 2.0 covers a wide range of
recommendations for making Web content more accessible. Following these guidelines will make
content accessible to a wider range of people with disabilities, including blindness and low
vision, deafness and hearing loss, learning disabilities, cognitive limitations, limited
movement, speech disabilities, photosensitivity and combinations of these. Following these
guidelines will also often make your Web content more usable to users in
general.</p></blockquote></dd>
<dt class="nodev"><cite>Authoring Tool Accessibility Guidelines (ATAG) 2.0</cite> <a href="#refsATAG">[ATAG]</a></dt>
<dd class="nodev"><blockquote><p>This specification provides guidelines for designing Web content
authoring tools that are more accessible for people with disabilities. An authoring tool that
conforms to these guidelines will promote accessibility by providing an accessible user interface
to authors with disabilities as well as by enabling, supporting, and promoting the production of
accessible Web content by all authors.</p></blockquote></dd>
<dt class="nodev"><cite>User Agent Accessibility Guidelines (UAAG) 2.0</cite> <a href="#refsUAAG">[UAAG]</a></dt>
<dd class="nodev"><blockquote><p>This document provides guidelines for designing user agents that
lower barriers to Web accessibility for people with disabilities. User agents include browsers
and other types of software that retrieve and render Web content. A user agent that conforms to
these guidelines will promote accessibility through its own user interface and through other
internal facilities, including its ability to communicate with other technologies (especially
assistive technologies). Furthermore, all users, not just users with disabilities, should find
conforming user agents to be more usable.</p></blockquote></dd>
</dl>
<h2 id="infrastructure">Common infrastructure</h2>
<h3>Terminology</h3>
<p>This specification refers to both HTML and XML attributes and IDL attributes, often in the same
context. When it is not clear which is being referred to, they are referred to as <dfn
data-x="">content attributes</dfn> for HTML and XML attributes, and <dfn data-x="">IDL
attributes</dfn> for those defined on IDL interfaces. Similarly, the term "properties" is used for
both JavaScript object properties and CSS properties. When these are ambiguous they are qualified
as <dfn data-x="">object properties</dfn> and <dfn data-x="">CSS properties</dfn> respectively.</p>
<p>Generally, when the specification states that a feature applies to <span>the HTML syntax</span>
or <span>the XHTML syntax</span>, it also includes the other. When a feature specifically only
applies to one of the two languages, it is called out by explicitly stating that it does not apply
to the other format, as in "for HTML, ... (this does not apply to XHTML)".</p>
<p>This specification uses the term <dfn data-x="">document</dfn> to refer to any use of HTML,
ranging from short static documents to long essays or reports with rich multimedia, as well as to
fully-fledged interactive applications. The term is used to refer both to <code>Document</code>
objects and their descendant DOM trees, and to serialised byte streams using the <span data-x="the
HTML syntax">HTML syntax</span> or <span data-x="the XHTML syntax">XHTML syntax</span>, depending
on context.</p>
<p>In the context of the DOM structures, the terms <span data-x="HTML documents">HTML
document</span> and <span data-x="XML documents">XML document</span> are used as defined in the DOM
specification, and refer specifically to two different modes that <code>Document</code> objects
can find themselves in. <a href="#refsDOM">[DOM]</a> (Such uses are always hyperlinked to their
definition.)</p>
<p>In the context of byte streams, the term HTML document refers to resources labeled as
<code>text/html</code>, and the term XML document refers to resources labeled with an <span>XML
MIME type</span>.</p>
<p>The term <dfn>XHTML document</dfn> is used to refer to both <code>Document</code>s in the <span
data-x="XML documents">XML document</span> mode that contains element nodes in the <span>HTML
namespace</span>, and byte streams labeled with an <span>XML MIME type</span> that contain
elements from the <span>HTML namespace</span>, depending on context.</p>
<hr>
<p>For simplicity, terms such as <dfn data-x="">shown</dfn>, <dfn data-x="">displayed</dfn>, and
<dfn data-x="">visible</dfn> might sometimes be used when referring to the way a document is
rendered to the user. These terms are not meant to imply a visual medium; they must be considered
to apply to other media in equivalent ways.</p>
<div class="nodev">
<p>When an algorithm B says to return to another algorithm A, it implies that A called B. Upon
returning to A, the implementation must continue from where it left off in calling B.</p>
</div>
<!-- should find somewhere more appropriate to put this -->
<p>The term "transparent black" refers to the colour with red, green, blue, and alpha channels all
set to zero.</p>
<h4>Resources</h4>
<p>The specification uses the term <dfn data-x="">supported</dfn> when referring to whether a user
agent has an implementation capable of decoding the semantics of an external resource. A format or
type is said to be <i>supported</i> if the implementation can process an external resource of that
format or type without critical aspects of the resource being ignored. Whether a specific resource
is <i>supported</i> can depend on what features of the resource's format are in use.</p>
<p class="example">For example, a PNG image would be considered to be in a supported format if its
pixel data could be decoded and rendered, even if, unbeknownst to the implementation, the image
also contained animation data.</p>
<p class="example">An MPEG-4 video file would not be considered to be in a supported format if the
compression format used was not supported, even if the implementation could determine the
dimensions of the movie from the file's metadata.</p>
<p>What some specifications, in particular the HTTP specification, refer to as a
<i>representation</i> is referred to in this specification as a <dfn data-x="">resource</dfn>. <a
href="#refsHTTP">[HTTP]</a></p>
<p>The term <dfn>MIME type</dfn> is used to refer to what is sometimes called an <i>Internet media
type</i> in protocol literature. The term <i>media type</i> in this specification is used to refer
to the type of media intended for presentation, as used by the CSS specifications. <a
href="#refsRFC2046">[RFC2046]</a> <a href="#refsMQ">[MQ]</a></p>
<p>A string is a <dfn>valid MIME type</dfn> if it matches the <code data-x="">media-type</code>
rule defined in section 3.7 "Media Types" of RFC 2616. In particular, a <span>valid MIME
type</span> may include MIME type parameters. <a href="#refsHTTP">[HTTP]</a></p>
<p>A string is a <dfn>valid MIME type with no parameters</dfn> if it matches the <code
data-x="">media-type</code> rule defined in section 3.7 "Media Types" of RFC 2616, but does not
contain any U+003B SEMICOLON characters (;). In other words, if it consists only of a type and
subtype, with no MIME Type parameters. <a href="#refsHTTP">[HTTP]</a></p>
<p>The term <dfn>HTML MIME type</dfn> is used to refer to the <span>MIME type</span>
<code>text/html</code>.</p>
<p>A resource's <dfn>critical subresources</dfn> are those that the resource needs to have
available to be correctly processed. Which resources are considered critical or not is defined by
the specification that defines the resource's format.</p>
<p>The term <dfn data-x="data protocol"><code data-x="">data:</code> URL</dfn> refers to <span
data-x="URL">URLs</span> that use the <code data-x="">data:</code> scheme. <a
href="#refsRFC2397">[RFC2397]</a></p>
<h4>XML</h4>
<p id="html-namespace">To ease migration from HTML to XHTML, UAs conforming to this specification
will place elements in HTML in the <code>http://www.w3.org/1999/xhtml</code> namespace, at least
for the purposes of the DOM and CSS. The term "<dfn>HTML elements</dfn>", when used in this
specification, refers to any element in that namespace, and thus refers to both HTML and XHTML
elements.</p>
<p>Except where otherwise stated, all elements defined or mentioned in this specification are in
the <span>HTML namespace</span> ("<code>http://www.w3.org/1999/xhtml</code>"), and all attributes
defined or mentioned in this specification have no namespace.</p>
<p>The term <dfn>element type</dfn> is used to refer to the set of elements that have a given
local name and namespace. For example, <code>button</code> elements are elements with the element
type <code>button</code>, meaning they have the local name "<code data-x="">button</code>" and
(implicitly as defined above) the <span>HTML namespace</span>.</p>
<p>Attribute names are said to be <dfn>XML-compatible</dfn> if they match the <a
href="http://www.w3.org/TR/xml/#NT-Name"><code data-x="">Name</code></a> production defined in XML
and they contain no U+003A COLON characters (:). <a href="#refsXML">[XML]</a></p>
<p>The term <dfn>XML MIME type</dfn> is used to refer to the <span data-x="MIME type">MIME
types</span> <code data-x="">text/xml</code>, <code data-x="">application/xml</code>, and any
<span>MIME type</span> whose subtype ends with the four characters "<code data-x="">+xml</code>".
<a href="#refsRFC3023">[RFC3023]</a></p>
<h4>DOM trees</h4>
<p>The <dfn>root element of a <code>Document</code> object</dfn> is that <code>Document</code>'s
first element child, if any. If it does not have one then the <code>Document</code> has no root
element.</p>
<p>The term <dfn>root element</dfn>, when not referring to a <code>Document</code> object's root
element, means the furthest ancestor element node of whatever node is being discussed, or the node
itself if it has no ancestors. When the node is a part of the document, then the node's <span>root
element</span> is indeed the document's root element; however, if the node is not currently part
of the document tree, the root element will be an orphaned node.</p>
<p>When an element's <span>root element</span> is the <span>root element of a
<code>Document</code> object</span>, it is said to be <dfn>in a <code>Document</code></dfn>. An
element is said to have been <dfn data-x="insert an element into a document">inserted into a
document</dfn> when its <span>root element</span> changes and is now the document's <span>root
element</span>. Analogously, an element is said to have been <dfn data-x="remove an element from a
document">removed from a document</dfn> when its <span>root element</span> changes from being the
document's <span>root element</span> to being another element.</p>
<p>A node's <dfn>home subtree</dfn> is the subtree rooted at that node's <span>root
element</span>. When a node is <span>in a <code>Document</code></span>, its <span>home
subtree</span> is that <code>Document</code>'s tree.</p>
<p>The <code>Document</code> of a <code>Node</code> (such as an element) is the
<code>Document</code> that the <code>Node</code>'s <code
data-x="dom-Node-ownerDocument">ownerDocument</code> IDL attribute returns. When a
<code>Node</code> is <span>in a <code>Document</code></span> then that <code>Document</code> is
always the <code>Node</code>'s <code>Document</code>, and the <code>Node</code>'s <code
data-x="dom-Node-ownerDocument">ownerDocument</code> IDL attribute thus always returns that
<code>Document</code>.</p>
<p>The <code>Document</code> of a content attribute is the <code>Document</code> of the
attribute's element.</p>
<p>The term <dfn>tree order</dfn> means a pre-order, depth-first traversal of DOM nodes involved
(through the <code data-x="dom-Node-parentNode">parentNode</code>/<code
data-x="dom-Node-childNodes">childNodes</code> relationship).</p>
<p>When it is stated that some element or attribute is <dfn data-x="ignore">ignored</dfn>, or
treated as some other value, or handled as if it was something else, this refers only to the
processing of the node after it is in the DOM. <span class="nodev">A user agent must not mutate the
DOM in such situations.</span></p>
<p>A content attribute is said to <dfn data-x="">change</dfn> value only if its new value is
different than its previous value; setting an attribute to a value it already has does not change
it.</p>
<p>The term <dfn data-x="">empty</dfn>, when used of an attribute value, <code>Text</code> node, or
string, means that the length of the text is zero (i.e. not even containing spaces or <span>control
characters</span>).</p>
<h4>Scripting</h4>
<p>The construction "a <code>Foo</code> object", where <code>Foo</code> is actually an interface,
is sometimes used instead of the more accurate "an object implementing the interface
<code>Foo</code>".</p>
<p>An IDL attribute is said to be <dfn data-x="">getting</dfn> when its value is being retrieved
(e.g. by author script), and is said to be <dfn data-x="">setting</dfn> when a new value is
assigned to it.</p>
<p>If a DOM object is said to be <dfn>live</dfn>, then the attributes and methods on that object
<span class="nodev">must</span> operate on the actual underlying data, not a snapshot of the
data.</p>
<p>In the contexts of events, the terms <i>fire</i> and <i>dispatch</i> are used as defined in the
DOM specification: <dfn data-x="concept-event-fire">firing</dfn> an event means to create and <span
data-x="concept-event-dispatch">dispatch</span> it, and <dfn
data-x="concept-event-dispatch">dispatching</dfn> an event means to follow the steps that propagate
the event through the tree. The term <dfn data-x="concept-events-trusted">trusted event</dfn> is
used to refer to events whose <code data-x="dom-event-isTrusted">isTrusted</code> attribute is
initialised to true. <a href="#refsDOM">[DOM]</a></p>
<h4>Plugins</h4>
<p>The term <dfn>plugin</dfn> refers to a user-agent defined set of content handlers used by the
user agent that can take part in the user agent's rendering of a <code>Document</code> object, but
that neither act as <span data-x="child browsing context">child browsing contexts</span> of the
<code>Document</code> nor introduce any <code>Node</code> objects to the <code>Document</code>'s
DOM.</p>
<p>Typically such content handlers are provided by third parties, though a user agent can also
designate built-in content handlers as plugins.</p>
<div class="nodev">
<p>A user agent must not consider the types <code>text/plain</code> and
<code>application/octet-stream</code> as having a registered <span>plugin</span>.</p> <!-- because
of the way <object> elements handles those types, if nothing else (it also doesn't make any sense
to have a plugin registered for those types, of course) -->
</div>
<p class="example">One example of a plugin would be a PDF viewer that is instantiated in a
<span>browsing context</span> when the user navigates to a PDF file. This would count as a plugin
regardless of whether the party that implemented the PDF viewer component was the same as that
which implemented the user agent itself. However, a PDF viewer application that launches separate
from the user agent (as opposed to using the same interface) is not a plugin by this
definition.</p>
<p class="note">This specification does not define a mechanism for interacting with plugins, as it
is expected to be user-agent- and platform-specific. Some UAs might opt to support a plugin
mechanism such as the Netscape Plugin API; others might use remote content converters or have
built-in support for certain types. Indeed, this specification doesn't require user agents to
support plugins at all. <a href="#refsNPAPI">[NPAPI]</a></p>
<p>A plugin can be <dfn data-x="concept-plugin-secure">secured</dfn> if it honors the semantics of
the <code data-x="attr-iframe-sandbox">sandbox</code> attribute.</p>
<p class="example">For example, a secured plugin would prevent its contents from creating pop-up
windows when the plugin is instantiated inside a sandboxed <code>iframe</code>.</p>
<div class="nodev">
<p class="warning">Browsers should take extreme care when interacting with external content
intended for <span data-x="plugin">plugins</span>. When third-party software is run with the same
privileges as the user agent itself, vulnerabilities in the third-party software become as
dangerous as those in the user agent.</p>
<p>Since different users having differents sets of <span data-x="plugin">plugins</span> provides a
fingerprinting vector that increases the chances of users being uniquely identified, user agents
are encouraged to support the exact same set of <span data-x="plugin">plugins</span> for each
user.
<!--INSERT FINGERPRINT-->
</p>
</div>
<h4 id="encoding-terminology">Character encodings</h4>
<p>A <dfn data-x="encoding">character encoding</dfn>, or just <i>encoding</i> where that is not
ambiguous, is a defined way to convert between byte streams and Unicode strings, as defined in the
WHATWG Encoding standard. An <span>encoding</span> has an <dfn>encoding name</dfn> and one or more
<dfn data-x="encoding label">encoding labels</dfn>, referred to as the encoding's <i>name</i> and
<i>labels</i> in the Encoding standard. <a href="#refsENCODING">[ENCODING]</a></p>
<p>An <dfn>ASCII-compatible character encoding</dfn> is a single-byte or variable-length
<span>encoding</span> in which the bytes 0x09, 0x0A, 0x0C, 0x0D, 0x20 - 0x22, 0x26, 0x27, 0x2C -
0x3F, 0x41 - 0x5A, and 0x61 - 0x7A<!-- is that list ok? do any character sets we want to support
do things outside that range? -->, ignoring bytes that are the second and later bytes of multibyte
sequences, all correspond to single-byte sequences that map to the same Unicode characters as
those bytes in Windows-1252<!--ANSI_X3.4-1968 (US-ASCII)-->. <a href="#refsENCODING">[ENCODING]</a></p>
<p class="note">This includes such encodings as Shift_JIS, HZ-GB-2312, and variants of ISO-2022,
even though it is possible in these encodings for bytes like 0x70 to be part of longer sequences
that are unrelated to their interpretation as ASCII. It excludes UTF-16 variants, as well as
obsolete legacy encodings such as UTF-7, GSM03.38, and EBCDIC variants.</p>
<!--
We'll have to change that if anyone comes up with a way to have a document that is valid as two
different encodings at once, with different <meta charset> elements applying in each case.
-->
<p>The term <dfn>a UTF-16 encoding</dfn> refers to any variant of UTF-16: UTF-16LE or UTF-16BE,
regardless of the presence or absence of a BOM. <a href="#refsENCODING">[ENCODING]</a></p>
<p>The term <dfn>code unit</dfn> is used as defined in the Web IDL specification: a 16 bit
unsigned integer, the smallest atomic component of a <code>DOMString</code>. (This is a narrower
definition than the one used in Unicode, and is not the same as a <i>code point</i>.) <a
href="#refsWEBIDL">[WEBIDL]</a></p>
<p>The term <dfn>Unicode code point</dfn> means a <i data-x="">Unicode scalar value</i> where
possible, and an isolated surrogate code point when not. When a conformance requirement is defined
in terms of characters or Unicode code points, a pair of <span data-x="code unit">code units</span>
consisting of a high surrogate followed by a low surrogate must be treated as the single code
point represented by the surrogate pair, but isolated surrogates must each be treated as the
single code point with the value of the surrogate. <a href="#refsUNICODE">[UNICODE]</a></p>
<p>In this specification, the term <dfn>character</dfn>, when not qualified as <em>Unicode</em>
character, is synonymous with the term <span>Unicode code point</span>.</p>
<p>The term <dfn>Unicode character</dfn> is used to mean a <i data-x="">Unicode scalar value</i>
(i.e. any Unicode code point that is not a surrogate code point). <a
href="#refsUNICODE">[UNICODE]</a></p>
<p>The <dfn>code-unit length</dfn> of a string is the number of <span data-x="code unit">code
units</span> in that string.</p>
<p class="note">This complexity results from the historical decision to define the DOM API in
terms of 16 bit (UTF-16) <span data-x="code unit">code units</span>, rather than in terms of <span
data-x="Unicode character">Unicode characters</span>.</p>
<div class="nodev">
<h3>Conformance requirements</h3>
<p>All diagrams, examples, and notes in this specification are non-normative, as are all sections
explicitly marked non-normative. Everything else in this specification is normative.</p>
<p>The key words "MUST", "MUST NOT", <!--"REQUIRED",--> <!--"SHALL", "SHALL NOT",--> "SHOULD", "SHOULD
NOT", <!--"RECOMMENDED", "NOT RECOMMENDED",--> "MAY", and "OPTIONAL" in the normative parts of
this document are to be interpreted as described in RFC2119. The key word "OPTIONALLY" in the
normative parts of this document is to be interpreted with the same normative meaning as "MAY" and
"OPTIONAL". For readability, these words do not appear in all uppercase letters in this
specification. <a href="#refsRFC2119">[RFC2119]</a></p>
<p>Requirements phrased in the imperative as part of algorithms (such as "strip any leading space
characters" or "return false and abort these steps") are to be interpreted with the meaning of the
key word ("must", "should", "may", etc) used in introducing the algorithm.</p>
<div class="example">
<p>For example, were the spec to say:</p>
<pre>To eat an orange, the user must:
1. Peel the orange.
2. Separate each slice of the orange.
3. Eat the orange slices.</pre>
<p>...it would be equivalent to the following:</p>
<pre>To eat an orange:
1. The user must peel the orange.
2. The user must separate each slice of the orange.
3. The user must eat the orange slices.</pre>
<p>Here the key word is "must".</p>
<p>The former (imperative) style is generally preferred in this specification for stylistic
reasons.</p>
</div>
<p>Conformance requirements phrased as algorithms or specific steps may be implemented in any
manner, so long as the end result is equivalent. (In particular, the algorithms defined in this
specification are intended to be easy to follow, and not intended to be performant.)</p>
</div>
<div class="nodev">
<h4>Conformance classes</h4>
<p>This specification describes the conformance criteria for <span class="nodev">user agents
(relevant to implementors) and</span> documents<span class="nodev"> (relevant to authors and
authoring tool implementors)</span>.</p>
<p><dfn>Conforming documents</dfn> are those that comply with all the conformance criteria for
documents. For readability, some of these conformance requirements are phrased as conformance
requirements on authors; such requirements are implicitly requirements on documents: by
definition, all documents are assumed to have had an author. (In some cases, that author may
itself be a user agent &mdash; such user agents are subject to additional rules, as explained
below.)</p>
<p class="example">For example, if a requirement states that "authors must not use the <code
data-x="">foobar</code> element", it would imply that documents are not allowed to contain elements
named <code data-x="">foobar</code>.</p>
<p class="note impl">There is no implied relationship between document conformance requirements
and implementation conformance requirements. User agents are not free to handle non-conformant
documents as they please; the processing model described in this specification applies to
implementations regardless of the conformity of the input documents.</p>
<p>User agents fall into several (overlapping) categories with different conformance
requirements.</p>
<dl>
<dt id="interactive">Web browsers and other interactive user agents</dt>
<dd>
<p>Web browsers that support <span>the XHTML syntax</span> must process elements and attributes
from the <span>HTML namespace</span> found in XML documents as described in this specification,
so that users can interact with them, unless the semantics of those elements have been
overridden by other specifications.</p>
<p class="example">A conforming XHTML processor would, upon finding an XHTML <code>script</code>
element in an XML document, execute the script contained in that element. However, if the
element is found within a transformation expressed in XSLT (assuming the user agent also
supports XSLT), then the processor would instead treat the <code>script</code> element as an
opaque element that forms part of the transform.</p>
<p>Web browsers that support <span>the HTML syntax</span> must process documents labeled with an
<span>HTML MIME type</span> as described in this specification, so that users can interact with
them.</p>
<p>User agents that support scripting must also be conforming implementations of the IDL
fragments in this specification, as described in the Web IDL specification. <a
href="#refsWEBIDL">[WEBIDL]</a></p>
<p class="note">Unless explicitly stated, specifications that override the semantics of HTML
elements do not override the requirements on DOM objects representing those elements. For
example, the <code>script</code> element in the example above would still implement the
<code>HTMLScriptElement</code> interface.</p>
</dd>
<dt id="non-interactive">Non-interactive presentation user agents</dt>
<dd>
<p>User agents that process HTML and XHTML documents purely to render non-interactive versions
of them must comply to the same conformance criteria as Web browsers, except that they are
exempt from requirements regarding user interaction.</p>
<p class="note">Typical examples of non-interactive presentation user agents are printers
(static UAs) and overhead displays (dynamic UAs). It is expected that most static
non-interactive presentation user agents will also opt to <a href="#non-scripted">lack scripting
support</a>.</p>
<p class="example">A non-interactive but dynamic presentation UA would still execute scripts,
allowing forms to be dynamically submitted, and so forth. However, since the concept of "focus"
is irrelevant when the user cannot interact with the document, the UA would not need to support
any of the focus-related DOM APIs.</p>
</dd>
<dt id="renderingUA">Visual user agents that support the suggested default rendering</dt>
<dd>
<p>User agents, whether interactive or not, may be designated (possibly as a user option) as
supporting the suggested default rendering defined by this specification.</p>
<p>This is not required. In particular, even user agents that do implement the suggested default
rendering are encouraged to offer settings that override this default to improve the experience
for the user, e.g. changing the colour contrast, using different focus styles, or otherwise
making the experience more accessible and usable to the user.</p>
<p>User agents that are designated as supporting the suggested default rendering must, while so
designated, implement the rules in <a href="#rendering">the rendering section</a> that that
section defines as the behavior that user agents are <em>expected</em> to implement.</p>
</dd>
<dt id="non-scripted">User agents with no scripting support</dt>
<dd>
<p>Implementations that do not support scripting (or which have their scripting features
disabled entirely) are exempt from supporting the events and DOM interfaces mentioned in this
specification. For the parts of this specification that are defined in terms of an events model
or in terms of the DOM, such user agents must still act as if events and the DOM were
supported.</p>
<p class="note">Scripting can form an integral part of an application. Web browsers that do not
support scripting, or that have scripting disabled, might be unable to fully convey the author's
intent.</p>
</dd>
<dt>Conformance checkers</dt>
<dd id="conformance-checkers">
<p>Conformance checkers must verify that a document conforms to the applicable conformance
criteria described in this specification. Automated conformance checkers are exempt from
detecting errors that require interpretation of the author's intent (for example, while a
document is non-conforming if the content of a <code>blockquote</code> element is not a quote,
conformance checkers running without the input of human judgement do not have to check that
<code>blockquote</code> elements only contain quoted material).</p>
<p>Conformance checkers must check that the input document conforms when parsed without a
<span>browsing context</span> (meaning that no scripts are run, and that the parser's
<span>scripting flag</span> is disabled), and should also check that the input document conforms
when parsed with a <span>browsing context</span> in which scripts execute, and that the scripts
never cause non-conforming states to occur other than transiently during script execution
itself. (This is only a "SHOULD" and not a "MUST" requirement because it has been proven to be
impossible. <a href="#refsCOMPUTABLE">[COMPUTABLE]</a>)</p>
<p>The term "HTML validator" can be used to refer to a conformance checker that itself conforms
to the applicable requirements of this specification.</p>
<div class="note">
<p>XML DTDs cannot express all the conformance requirements of this specification. Therefore, a
validating XML processor and a DTD cannot constitute a conformance checker. Also, since neither
of the two authoring formats defined in this specification are applications of SGML, a
validating SGML system cannot constitute a conformance checker either.</p>
<p>To put it another way, there are three types of conformance criteria:</p>
<ol>
<li>Criteria that can be expressed in a DTD.</li>
<li>Criteria that cannot be expressed by a DTD, but can still be checked by a machine.</li>
<li>Criteria that can only be checked by a human.</li>
</ol>
<p>A conformance checker must check for the first two. A simple DTD-based validator only checks
for the first class of errors and is therefore not a conforming conformance checker according
to this specification.</p>
</div>
</dd>
<dt>Data mining tools</dt>
<dd id="data-mining">
<p>Applications and tools that process HTML and XHTML documents for reasons other than to either
render the documents or check them for conformance should act in accordance with the semantics
of the documents that they process.</p>
<p class="example">A tool that generates <span data-x="outline">document outlines</span> but
increases the nesting level for each paragraph and does not increase the nesting level for each
section would not be conforming.</p>
</dd>
<dt id="editors">Authoring tools and markup generators</dt>
<dd>
<p>Authoring tools and markup generators must generate <span>conforming documents</span>.
Conformance criteria that apply to authors also apply to authoring tools, where appropriate.</p>
<p>Authoring tools are exempt from the strict requirements of using elements only for their
specified purpose, but only to the extent that authoring tools are not yet able to determine
author intent. However, authoring tools must not automatically misuse elements or encourage
their users to do so.</p>
<p class="example">For example, it is not conforming to use an <code>address</code> element for
arbitrary contact information; that element can only be used for marking up contact information
for the author of the document or section. However, since an authoring tool is likely unable to
determine the difference, an authoring tool is exempt from that requirement. This does not mean,
though, that authoring tools can use <code>address</code> elements for any block of italics text
(for instance); it just means that the authoring tool doesn't have to verify that when the user
uses a tool for inserting contact information for a section, that the user really is doing that
and not inserting something else instead.</p>
<p class="note">In terms of conformance checking, an editor has to output documents that conform
to the same extent that a conformance checker will verify.</p>
<p>When an authoring tool is used to edit a non-conforming document, it may preserve the
conformance errors in sections of the document that were not edited during the editing session
(i.e. an editing tool is allowed to round-trip erroneous content). However, an authoring tool
must not claim that the output is conformant if errors have been so preserved.</p>
<p>Authoring tools are expected to come in two broad varieties: tools that work from structure
or semantic data, and tools that work on a What-You-See-Is-What-You-Get media-specific editing
basis (WYSIWYG).</p>
<p>The former is the preferred mechanism for tools that author HTML, since the structure in the
source information can be used to make informed choices regarding which HTML elements and
attributes are most appropriate.</p>
<p>However, WYSIWYG tools are legitimate. WYSIWYG tools should use elements they know are
appropriate, and should not use elements that they do not know to be appropriate. This might in
certain extreme cases mean limiting the use of flow elements to just a few elements, like
<code>div</code>, <code>b</code>, <code>i</code>, and <code>span</code> and making liberal use
of the <code data-x="attr-style">style</code> attribute.</p>
<p>All authoring tools, whether WYSIWYG or not, should make a best effort attempt at enabling
users to create well-structured, semantically rich, media-independent content.</p>
</dd>
</dl>
<p id="hardwareLimitations">User agents may impose implementation-specific limits on otherwise
unconstrained inputs, e.g. to prevent denial of service attacks, to guard against running out of
memory, or to work around platform-specific limitations.
<!--INSERT FINGERPRINT-->
</p>
<p>For compatibility with existing content and prior specifications, this specification describes
two authoring formats: one based on XML (referred to as <span>the XHTML syntax</span>), and one
using a <a href="#writing">custom format</a> inspired by SGML (referred to as <span>the HTML
syntax</span>). Implementations must support at least one of these two formats, although
supporting both is encouraged.</p>
<p>Some conformance requirements are phrased as requirements on elements, attributes, methods or
objects. Such requirements fall into two categories: those describing content model restrictions,
and those describing implementation behavior. Those in the former category are requirements on
documents and authoring tools. Those in the second category are requirements on user agents.
Similarly, some conformance requirements are phrased as requirements on authors; such requirements
are to be interpreted as conformance requirements on the documents that authors produce. (In other
words, this specification does not distinguish between conformance criteria on authors and
conformance criteria on documents.)</p>
</div>
<div class="nodev">
<h4>Dependencies</h4>
<p>This specification relies on several other underlying specifications.</p>
<dl>
<dt>Unicode and Encoding</dt>
<dd>
<p>The Unicode character set is used to represent textual data, and the WHATWG Encoding standard
defines requirements around <span data-x="encoding">character encodings</span>. <a
href="#refsUNICODE">[UNICODE]</a></p>
<p class="note">This specification <a href="#encoding-terminology">introduces terminology</a>
based on the terms defined in those specifications, as described earlier.</p>
<p>The following terms are used as defined in the WHATWG Encoding standard: <a
href="#refsENCODING">[ENCODING]</a></p>
<ul class="brief">
<li><dfn>Getting an encoding</dfn>
<li>The <dfn>encoder</dfn> and <dfn>decoder</dfn> algorithms for various encodings, including
the <dfn>UTF-8 encoder</dfn> and <dfn>UTF-8 decoder</dfn>
<li>The generic <dfn>decode</dfn> algorithm which takes a byte stream and an encoding and
returns a character stream
<li>The <dfn>UTF-8 decode</dfn> algorithm which takes a byte stream and returns a character
stream, additionally stripping one leading UTF-8 Byte Order Mark (BOM), if any
</ul>
<p class="note">The <span>UTF-8 decoder</span> is distinct from the <i>UTF-8 decode
algorithm</i>. The latter first strips a Byte Order Mark (BOM), if any, and then invokes the
former.</p>
<p>For readability, character encodings are sometimes referenced in this specification with a
case that differs from the canonical case given in the WHATWG Encoding standard. (For example,
"UTF-16LE" instead of "utf-16le".)</p>
</dd>
<dt>XML</dt>
<dd>
<p>Implementations that support <span>the XHTML syntax</span> must support some version of XML,
as well as its corresponding namespaces specification, because that syntax uses an XML
serialisation with namespaces. <a href="#refsXML">[XML]</a> <a href="#refsXMLNS">[XMLNS]</a></p>
</dd>
<dt>URLs</dt>
<dd>
<p>The following terms are defined in the WHATWG URL standard: <a href="#refsURL">[URL]</a></p>
<ul class="brief">
<li><dfn>URL</dfn>
<li><dfn>Absolute URL</dfn>
<li><dfn>Relative URL</dfn>
<li><dfn data-x="concept-url-scheme-relative">Relative schemes</dfn>
<li>The <dfn>URL parser</dfn>
<li><dfn>Parsed URL</dfn>
<li>The <dfn data-x="concept-url-scheme">scheme</dfn> component of a <span>parsed URL</span>
<li>The <dfn data-x="concept-url-scheme-data">scheme data</dfn> component of a <span>parsed URL</span>
<li>The <dfn data-x="concept-url-username">username</dfn> component of a <span>parsed URL</span>
<li>The <dfn data-x="concept-url-password">password</dfn> component of a <span>parsed URL</span>
<li>The <dfn data-x="concept-url-host">host</dfn> component of a <span>parsed URL</span>
<li>The <dfn data-x="concept-url-port">port</dfn> component of a <span>parsed URL</span>
<li>The <dfn data-x="concept-url-path">path</dfn> component of a <span>parsed URL</span>
<li>The <dfn data-x="concept-url-query">query</dfn> component of a <span>parsed URL</span>
<li>The <dfn data-x="concept-url-fragment">fragment</dfn> component of a <span>parsed URL</span>
<li><dfn data-x="concept-url-parse-error">Parse errors</dfn> from the <span>URL parser</span>
<li>The <dfn data-x="concept-url-serializer">URL serializer</dfn>
<li><dfn>Default encode set</dfn>
<li><dfn>Percent encode</dfn>
<li><dfn>UTF-8 percent encode</dfn>
<li><dfn>Percent decode</dfn>
<li><dfn>Decoder error</dfn>
<li>The <dfn>domain label to ASCII</dfn> algorithm</li>
<li>The <dfn>domain label to Unicode</dfn> algorithm</li>
<li><dfn><code>URLUtils</code></dfn> interface
<li><dfn><code>URLUtilsReadOnly</code></dfn> interface
<li><dfn data-x="dom-url-href"><code>href</code> attribute</dfn>
<li><dfn data-x="dom-url-protocol"><code>protocol</code> attribute</dfn>
<li>The <dfn data-x="concept-uu-get-the-base">get the base</dfn> hook for <code>URLUtils</code>
<li>The <dfn data-x="concept-uu-update">update steps</dfn> hook for <code>URLUtils</code>
<li>The <dfn data-x="concept-uu-set-the-input">set the input</dfn> algorithm for <code>URLUtils</code>
<li>The <dfn data-x="concept-uu-query-encoding">query encoding</dfn> of an <code>URLUtils</code> object
<li>The <dfn data-x="concept-uu-input">input</dfn> of an <code>URLUtils</code> object
<li>The <dfn data-x="concept-uu-url">url</dfn> of an <code>URLUtils</code> object
</ul>
</dd>
<dt>Cookies</dt>
<dd>
<p>The following terms are defined in the Cookie specification: <a
href="#refsCOOKIES">[COOKIES]</a></p>
<ul class="brief">
<li><dfn>cookie-string</dfn>
<li><dfn>receives a set-cookie-string</dfn>
</ul>
</dd>
<dt>Fetch</dt>
<dd>
<p>The following terms are defined in the WHATWG Fetch specification: <a href="#refsFETCH">[FETCH]</a></p>
<ul class="brief">
<li><dfn>cross-origin request</dfn>
<li><dfn>cross-origin request status</dfn>
<li><dfn>custom request headers</dfn>
<li><dfn>simple cross-origin request</dfn>
<li><dfn>redirect steps</dfn>
<li><dfn>omit credentials flag</dfn>
<li><dfn>resource sharing check</dfn>
</ul>
<p class="note">This specification does not yet use the "fetch" algorithm from the WHATWG Fetch
specification. It will be updated to do so in due course.</p>
</dd>
<!--TOPIC:DOM APIs-->
<dt>Web IDL</dt>
<dd>
<p>The IDL fragments in this specification must be interpreted as required for conforming IDL
fragments, as described in the Web IDL specification. <a href="#refsWEBIDL">[WEBIDL]</a></p>
<p>The terms <dfn>supported property indices</dfn>, <dfn>determine the value of an indexed
property</dfn>, <dfn>support named properties</dfn>, <dfn>supported property names</dfn>,
<dfn>unenumerable</dfn>, <dfn>determine the value of a named property</dfn>, <dfn>platform array
objects</dfn>, and <dfn data-x="dfn-read-only-array">read only</dfn> (when applied to arrays)
are used as defined in the Web IDL specification. The algorithm to <dfn>convert a DOMString to a
sequence of Unicode characters</dfn> is similarly that defined in the Web IDL specification.</p>
<p>When this specification requires a user agent to <dfn>create a <code>Date</code> object</dfn>
representing a particular time (which could be the special value Not-a-Number), the milliseconds
component of that time, if any, must be truncated to an integer, and the time value of the newly
created <code>Date</code> object must represent the resulting truncated time.</p>
<p class="example">For instance, given the time 23045 millionths of a second after 01:00 UTC on
January 1st 2000, i.e. the time 2000-01-01T00:00:00.023045Z, then the <code>Date</code> object
created representing that time would represent the same time as that created representing the
time 2000-01-01T00:00:00.023Z, 45 millionths earlier. If the given time is NaN, then the result
is a <code>Date</code> object that represents a time value NaN (indicating that the object does
not represent a specific instant of time).</p>
</dd>
<dt>JavaScript</dt>
<dd>
<p>Some parts of the language described by this specification only support JavaScript as the
underlying scripting language. <a href="#refsECMA262">[ECMA262]</a></p>
<p class="note">The term "JavaScript" is used to refer to ECMA262, rather than the official term
ECMAScript, since the term JavaScript is more widely known. Similarly, the <span>MIME
type</span> used to refer to JavaScript in this specification is <code
data-x="">text/javascript</code>, since that is the most commonly used type, <span data-x="willful
violation">despite it being an officially obsoleted type</span> according to RFC 4329. <a
href="#refsRFC4329">[RFC4329]</a></p>
<p>The term <dfn>JavaScript global environment</dfn> refers to the <i data-x="">global
environment</i> concept defined in the ECMAScript specification.</p>
<p>The ECMAScript <dfn data-x="js-SyntaxError"><code>SyntaxError</code></dfn> exception is also
defined in the ECMAScript specification. <a href="#refsECMA262">[ECMA262]</a></p>
<p>The <dfn>ArrayBuffer</dfn> and related object types and underlying concepts from the
ECMAScript Specification are used for several features in this specification. <a
href="#refsECMA262">[ECMA262]</a></p>
<p>The following helper IDL is used for referring to <code>ArrayBuffer</code>-related types:</p>
<pre class="idl">typedef (<dfn>Int8Array</dfn> or <dfn>Uint8Array</dfn> or <dfn>Uint8ClampedArray</dfn> or
<dfn>Int16Array</dfn> or <dfn>Uint16Array</dfn> or
<dfn>Int32Array</dfn> or <dfn>Uint32Array</dfn> or
<dfn>Float32Array</dfn> or <dfn>Float64Array</dfn> or
<dfn>DataView</dfn>) <dfn>ArrayBufferView</dfn>;</pre>
<p class="note">In particular, the <code>Uint8ClampedArray</code> type is used by some <span
data-x="ImageData">2D canvas APIs</span>, and the <a href="#network"><code>WebSocket</code>
API</a> uses <code>ArrayBuffer</code> objects for handling binary frames.</p>
</dd>
<dt>DOM</dt>
<dd>
<p>The Document Object Model (DOM) is a representation &mdash; a model &mdash; of a document and
its content. The DOM is not just an API; the conformance criteria of HTML implementations are
defined, in this specification, in terms of operations on the DOM. <a
href="#refsDOM">[DOM]</a></p>
<p>Implementations must support DOM and the events defined in DOM Events, because this
specification is defined in terms of the DOM, and some of the features are defined as extensions
to the DOM interfaces. <a href="#refsDOM">[DOM]</a> <a href="#refsDOMEVENTS">[DOMEVENTS]</a></p>
<p>In particular, the following features are defined in the DOM specification: <a
href="#refsDOM">[DOM]</a></p> <!-- aka DOM Core or DOMCORE -->
<ul class="brief">
<li><dfn><code>Attr</code></dfn> interface</li>
<li><dfn><code>Comment</code></dfn> interface</li>
<li><dfn><code>DOMImplementation</code></dfn> interface</li>
<li><dfn data-x="DOM Document"><code>Document</code></dfn> interface</li>
<li><dfn><code>XMLDocument</code></dfn> interface</li>
<li><dfn><code>DocumentFragment</code></dfn> interface</li>
<li><dfn><code>DocumentType</code></dfn> interface</li>
<li><dfn><code>DOMException</code></dfn> interface</li>
<li><dfn><code>ChildNode</code></dfn> interface</li>
<li><dfn><code>Element</code></dfn> interface</li>
<li><dfn><code>Node</code></dfn> interface</li>
<li><dfn><code>NodeList</code></dfn> interface</li>
<li><dfn><code>ProcessingInstruction</code></dfn> interface</li>
<li><dfn><code>Text</code></dfn> interface</li>
<li><dfn><code>HTMLCollection</code></dfn> interface</li>
<li><dfn data-x="dom-HTMLCollection-item"><code>item()</code></dfn> method</li>
<li>The terms <dfn>collections</dfn> and <dfn>represented by the collection</dfn></li>
<li><dfn><code>DOMTokenList</code></dfn> interface</li>
<li><dfn><code>DOMSettableTokenList</code></dfn> interface</li>
<li><dfn data-x="dom-DOMImplementation-createDocument"><code>createDocument()</code></dfn> method</li>
<li><dfn data-x="dom-DOMImplementation-createHTMLDocument"><code>createHTMLDocument()</code></dfn> method</li>
<li><dfn data-x="dom-Document-createElement"><code>createElement()</code></dfn> method</li>
<li><dfn data-x="dom-Document-createElementNS"><code>createElementNS()</code></dfn> method</li>
<li><dfn data-x="dom-Document-getElementById"><code>getElementById()</code></dfn> method</li>
<li><dfn data-x="dom-Node-insertBefore"><code>insertBefore()</code></dfn> method</li>
<li><dfn data-x="dom-Node-ownerDocument"><code>ownerDocument</code></dfn> attribute</li>
<li><dfn data-x="dom-Node-childNodes"><code>childNodes</code></dfn> attribute</li>
<li><dfn data-x="dom-Node-localName"><code>localName</code></dfn> attribute</li>
<li><dfn data-x="dom-Node-parentNode"><code>parentNode</code></dfn> attribute</li>
<li><dfn data-x="dom-Node-namespaceURI"><code>namespaceURI</code></dfn> attribute</li>
<li><dfn data-x="dom-Element-tagName"><code>tagName</code></dfn> attribute</li>
<li><dfn data-x="dom-Element-id"><code>id</code></dfn> attribute</li>
<li><dfn><code>textContent</code></dfn> attribute</li>
<li>The <dfn data-x="concept-node-insert">insert</dfn>, <dfn data-x="concept-node-append">append</dfn>, <dfn data-x="concept-node-remove">remove</dfn>, <dfn data-x="concept-node-replace">replace</dfn>, and <dfn data-x="concept-node-adopt">adopt</dfn> algorithms for nodes</li>
<li>The <dfn>nodes are inserted</dfn> and <dfn>nodes are removed</dfn> concepts</li>
<li>An element's <dfn data-x="concept-node-adopt-ext">adopting steps</dfn></li>
<li>The <dfn>attribute list</dfn> concept.</li>
<li>The <dfn data-x="concept-cd-data">data</dfn> of a text node.</li>
<li><dfn><code>Event</code></dfn> interface</li>
<li><dfn><code>EventTarget</code></dfn> interface</li>
<li><dfn><code>EventInit</code></dfn> dictionary type</li>
<li><dfn data-x="dom-Event-target"><code>target</code></dfn> attribute</li>
<li><dfn data-x="dom-Event-isTrusted"><code>isTrusted</code></dfn> attribute</li>
<li>The <dfn data-x="concept-event-type">type</dfn> of an event</li>
<li>The concept of an <dfn data-x=concept-event-listener>event listener</dfn> and the <span data-x=concept-event-listener>event listeners</span> associated with an <code>EventTarget</code></li>
<li>The concept of a <dfn>target override</dfn></li>
<li>The concept of a regular <dfn>event parent</dfn> and a <dfn>cross-boundary event parent</dfn></li> <!-- see bug 18780 -->
<li>The <dfn data-x="document's character encoding">encoding</dfn> (herein the <i>character encoding</i>) and <dfn data-x="concept-document-content-type">content type</dfn> of a <code>Document</code></li>
<li>The distinction between <dfn>XML documents</dfn> and <dfn>HTML documents</dfn></li>
<li>The terms <dfn>quirks mode</dfn>, <dfn>limited-quirks mode</dfn>, and <dfn>no-quirks mode</dfn></li>
<li>The algorithm to <dfn data-x="concept-node-clone">clone</dfn> a <code>Node</code>, and the concept of <dfn data-x="concept-node-clone-ext">cloning steps</dfn> used by that algorithm</li>
<li>The concept of <dfn>base URL change steps</dfn> and the definition of what happens when an element is <dfn>affected by a base URL change</dfn></li>
<li>The concept of an element's <dfn data-x="concept-id">unique identifier (ID)</dfn></li>
<li>The concept of a DOM <dfn data-x="concept-range">range</dfn>, and the terms <dfn data-x="concept-range-start">start</dfn>, <dfn data-x="concept-range-end">end</dfn>, and <dfn data-x="concept-range-bp">boundary point</dfn> as applied to ranges.</li>
<li><dfn><code>MutationObserver</code></dfn> interface</li>
<li>The <dfn data-x="concept-mo-invoke">invoke <code>MutationObserver</code> objects</dfn> algorithm</li>
<li><dfn>Promise</dfn> interface</li>
<li>The <dfn data-x="concept-resolver">resolver</dfn> concept</li>
<li>The <dfn data-x="concept-resolver-fulfill">fulfill</dfn> and <dfn data-x="concept-resolver-reject">reject</dfn> algorithms</li>
</ul>
<p>The term <dfn>throw</dfn> in this specification is used as defined in the DOM specification.
The following <code>DOMException</code> types are defined in the DOM specification: <a
href="#refsDOM">[DOM]</a></p>
<ol class="brief">
<li value="1"><dfn><code>IndexSizeError</code></dfn></li>
<li value="3"><dfn><code>HierarchyRequestError</code></dfn></li>
<li value="4"><dfn><code>WrongDocumentError</code></dfn></li>
<li value="5"><dfn><code>InvalidCharacterError</code></dfn></li>
<li value="7"><dfn><code>NoModificationAllowedError</code></dfn></li>
<li value="8"><dfn><code>NotFoundError</code></dfn></li>
<li value="9"><dfn><code>NotSupportedError</code></dfn></li>
<li value="11"><dfn><code>InvalidStateError</code></dfn></li>
<li value="12"><dfn><code>SyntaxError</code></dfn></li>
<li value="13"><dfn><code>InvalidModificationError</code></dfn></li>
<li value="14"><dfn><code>NamespaceError</code></dfn></li>
<li value="15"><dfn><code>InvalidAccessError</code></dfn></li>
<li value="18"><dfn><code>SecurityError</code></dfn></li>
<li value="19"><dfn><code>NetworkError</code></dfn></li>
<li value="20"><dfn><code>AbortError</code></dfn></li>
<li value="21"><dfn><code>URLMismatchError</code></dfn></li>
<li value="22"><dfn><code>QuotaExceededError</code></dfn></li>
<li value="23"><dfn><code>TimeoutError</code></dfn></li>
<li value="24"><dfn><code>InvalidNodeTypeError</code></dfn></li>
<li value="25"><dfn><code>DataCloneError</code></dfn></li>
</ol>
<p class="example">For example, to <i>throw a <code>TimeoutError</code> exception</i>, a user
agent would construct a <code>DOMException</code> object whose type was the string "<code
data-x="">TimeoutError</code>" (and whose code was the number 23, for legacy reasons) and
actually throw that object as an exception.</p>
<p>The following features are defined in the DOM Events specification: <a
href="#refsDOMEVENTS">[DOMEVENTS]</a></p>
<ul class="brief">
<li><dfn><code>MouseEvent</code></dfn> interface</li>
<li><dfn><code>MouseEventInit</code></dfn> dictionary type</li>
<li>The <dfn><code>FocusEvent</code></dfn> interface and its <dfn data-x="dom-FocusEvent-relatedTarget"><code>relatedTarget</code></dfn> attribute</li>
<li>The <dfn><code>UIEvent</code></dfn> interface's <dfn data-x="dom-UIEvent-detail"><code>detail</code></dfn> attribute</li>
<li><dfn data-x="event-click"><code>click</code></dfn> event</li>
<li><dfn data-x="event-dblclick"><code>dblclick</code></dfn> event</li>
<li><dfn data-x="event-mousedown"><code>mousedown</code></dfn> event</li>
<li><dfn data-x="event-mouseenter"><code>mouseenter</code></dfn> event</li>
<li><dfn data-x="event-mouseleave"><code>mouseleave</code></dfn> event</li>
<li><dfn data-x="event-mousemove"><code>mousemove</code></dfn> event</li>
<li><dfn data-x="event-mouseout"><code>mouseout</code></dfn> event</li>
<li><dfn data-x="event-mouseover"><code>mouseover</code></dfn> event</li>
<li><dfn data-x="event-mouseup"><code>mouseup</code></dfn> event</li>
<li><dfn data-x="event-mousewheel"><code>mousewheel</code></dfn> event</li>
<li><dfn data-x="event-keydown"><code>keydown</code></dfn> event</li>
<li><dfn data-x="event-keyup"><code>keyup</code></dfn> event</li>
<li><dfn data-x="event-keypress"><code>keypress</code></dfn> event</li>
</ul>
<p>The following features are defined in the Touch Events specification: <a
href="#refsTOUCH">[TOUCH]</a></p>
<ul class="brief">
<li><dfn><code>Touch</code></dfn> interface</li>
<li><dfn>Touch point</dfn> concept</li>
</ul>
<p>This specification sometimes uses the term <dfn data-x="">name</dfn> to refer to the event's
<code data-x="dom-event-type">type</code>; as in, "an event named <code data-x="">click</code>"
or "if the event name is <code data-x="">keypress</code>". The terms "name" and "type" for
events are synonymous.</p>
<p>The following features are defined in the DOM Parsing and Serialisation specification: <a
href="#refsDOMPARSING">[DOMPARSING]</a></p>
<ul class="brief">
<li><dfn data-x="dom-innerHTML"><code>innerHTML</code></dfn></li>
<li><dfn data-x="dom-outerHTML"><code>outerHTML</code></dfn></li>
</ul>
<p class="note">User agents are also encouraged to implement the features described in the
<cite>HTML Editing APIs</cite> and <cite><code>UndoManager</code> and DOM Transaction</cite>
specifications.
<a href="#refsEDITING">[EDITING]</a>
<a href="#refsUNDO">[UNDO]</a>
</p>
<p>The following parts of the Fullscreen specification are referenced from this specification,
in part to define the rendering of <code>dialog</code> elements, and also to define how the
Fullscreen API interacts with the sandboxing features in HTML: <a
href="#refsFULLSCREEN">[FULLSCREEN]</a></p>
<ul class="brief">
<li>The <dfn>top layer</dfn> concept</li>
<li><dfn data-x="dom-element-requestFullscreen"><code>requestFullscreen()</code></dfn>
<li>The <dfn>fullscreen enabled flag</dfn></li>
<li>The <dfn>fully exit fullscreen</dfn> algorithm</li>
</ul>
</dd>
<dt>File API</dt>
<dd>
<p>This specification uses the following features defined in the File API specification: <a
href="#refsFILEAPI">[FILEAPI]</a></p>
<ul class="brief">
<li><dfn><code>Blob</code></dfn></li>
<li><dfn><code>File</code></dfn></li>
<li><dfn><code>FileList</code></dfn></li>
<li><dfn data-x="dom-Blob-close"><code>Blob.close()</code></dfn></li>
<li><dfn data-x="dom-Blob-type"><code>Blob.type</code></dfn></li>
<li>The concept of <dfn data-x="file-error-read">read errors</dfn></li>
</ul>
</dd>
<dt>XMLHttpRequest</dt>
<dd>
<p>This specification references the XMLHttpRequest specification to describe how the two
specifications interact and to use its <code>ProgressEvent</code> features. The following
features and terms are defined in the XMLHttpRequest specification: <a
href="#refsXHR">[XHR]</a></p>
<ul class="brief">
<li><dfn><code>XMLHttpRequest</code></dfn>
<li><dfn><code>ProgressEvent</code></dfn>
<li><dfn data-x="fire a progress event">Fire a progress event named <var data-x="">e</var></dfn>
</ul>
</dd>
<!--TOPIC:HTML-->
<dt>Media Queries</dt>
<dd>
<p>Implementations must support the Media Queries language. <a href="#refsMQ">[MQ]</a></p>
</dd>
<dt>CSS modules</dt>
<dd>
<p>While support for CSS as a whole is not required of implementations of this specification
(though it is encouraged, at least for Web browsers), some features are defined in terms of
specific CSS requirements.</p>
<p>In particular, some features require that a string be <dfn>parsed as a CSS &lt;color&gt;
value</dfn>. When parsing a CSS value, user agents are required by the CSS specifications to
apply some error handling rules. These apply to this specification also. <a
href="#refsCSSCOLOR">[CSSCOLOR]</a> <a href="#refsCSS">[CSS]</a></p>
<p class="example">For example, user agents are required to close all open constructs upon
finding the end of a style sheet unexpectedly. Thus, when parsing the string "<code
data-x="">rgb(0,0,0</code>" (with a missing close-parenthesis) for a colour value, the close
parenthesis is implied by this error handling rule, and a value is obtained (the colour 'black').
However, the similar construct "<code data-x="">rgb(0,0,</code>" (with both a missing parenthesis
and a missing "blue" value) cannot be parsed, as closing the open construct does not result in a
viable value.</p>
<p>The term <dfn>CSS element reference identifier</dfn> is used as defined in the <cite>CSS
Image Values and Replaced Content</cite> specification to define the API that declares
identifiers for use with the CSS 'element()' function. <a
href="#refsCSSIMAGES">[CSSIMAGES]</a></p>
<p>Similarly, the term <dfn>provides a paint source</dfn> is used as defined in the <cite>CSS
Image Values and Replaced Content</cite> specification to define the interaction of certain HTML
elements with the CSS 'element()' function. <a href="#refsCSSIMAGES">[CSSIMAGES]</a></p>
<p>The term <dfn>default object size</dfn> is also defined in the <cite>CSS Image Values and
Replaced Content</cite> specification. <a href="#refsCSSIMAGES">[CSSIMAGES]</a></p>
<p>Implementations that support scripting must support the CSS Object Model. The following
features and terms are defined in the CSSOM specifications: <a href="#refsCSSOM">[CSSOM]</a> <a
href="#refsCSSOMVIEW">[CSSOMVIEW]</a>
<ul class="brief">
<li><dfn><code>Screen</code></dfn></li>
<li><dfn><code>LinkStyle</code></dfn></li>
<li><dfn><code>CSSStyleDeclaration</code></dfn></li>
<li><dfn data-x="dom-CSSStyleDeclaration-cssText"><code>cssText</code></dfn> attribute of <code>CSSStyleDeclaration</code></li>
<li><dfn><code>StyleSheet</code></dfn></li>
<li>The terms <dfn>create a CSS style sheet</dfn>, <dfn>remove a CSS style sheet</dfn>, and <dfn>associated CSS style sheet</dfn></li>
<li><dfn data-x="CSS style sheet">CSS style sheets</dfn> and their properties:
<dfn data-x="concept-css-style-sheet-type">type</dfn>,
<dfn data-x="concept-css-style-sheet-location">location</dfn>,
<dfn data-x="concept-css-style-sheet-parent-CSS-style-sheet">parent CSS style sheet</dfn>,
<dfn data-x="concept-css-style-sheet-owner-node">owner node</dfn>,
<dfn data-x="concept-css-style-sheet-owner-CSS-rule">owner CSS rule</dfn>,
<dfn data-x="concept-css-style-sheet-media">media</dfn>,
<dfn data-x="concept-css-style-sheet-title">title</dfn>,
<dfn data-x="concept-css-style-sheet-alternate-flag">alternate flag</dfn>,
<dfn data-x="concept-css-style-sheet-disabeld-flag">disabled flag</dfn>,
<dfn data-x="concept-css-style-sheet-CSS-rules">CSS rules</dfn>,
<dfn data-x="concept-css-style-sheet-origin-clean-flag">origin-clean flag</dfn>
</li>
<li><dfn>Alternative style sheet sets</dfn> and the <dfn>preferred style sheet set</dfn></li>
<li><dfn>Serializing a CSS value</dfn></li>
<li><dfn>Scroll an element into view</dfn></li>
<li><dfn>Scroll to the beginning of the document</dfn></li>
<li>The <dfn data-x="event-resize"><code>resize</code></dfn> event</li>
<li>The <dfn data-x="event-scroll"><code>scroll</code></dfn> event</li>
</ul>
<p>The term <dfn>environment encoding</dfn> is defined in the <cite>CSS Syntax</cite>
specifications. <a href="#refsCSSSYNTAX">[CSSSYNTAX]</a></p>
<p>The term <dfn>CSS styling attribute</dfn> is defined in the <cite>CSS Style Attributes</cite>
specification. <a href="#refsCSSATTR">[CSSATTR]</a></p>
<p>The <code>CanvasRenderingContext2D</code> object's use of fonts depends on the features
described in the CSS <cite>Fonts</cite> and <cite>Font Load Events</cite> specifications, including in particular
<dfn><code>FontLoader</code></dfn>. <a href="#refsCSSFONTS">[CSSFONTS]</a> <a
href="#refsCSSFONTLOAD">[CSSFONTLOAD]</a></p>
</dd>
<!--TOPIC:Canvas-->
<dt>SVG</dt>
<dd>
<p>The following interface is defined in the SVG specification: <a href="#refsSVG">[SVG]</a></p>
<ul class="brief">
<li><dfn><code>SVGMatrix</code></dfn>
</ul>
<!-- mention that the parser supports it? -->
</dd>
<dt>WebGL</dt>
<dd>
<p>The following interface is defined in the WebGL specification: <a
href="#refsWEBGL">[WEBGL]</a></p>
<ul class="brief">
<li><dfn><code>WebGLRenderingContext</code></dfn>
</ul>
</dd>
<!--TOPIC:HTML-->
<!-- mention that the parser supports mathml? -->
<!--TOPIC:Video Text Tracks-->
<dt>WebVTT</dt>
<dd>
<p>Implementations may support <dfn>WebVTT</dfn> as a text track format for subtitles, captions,
chapter titles, metadata, etc, for media resources. <a href="#refsWEBVTT">[WEBVTT]</a></p>
<p>The following terms, used in this specification, are defined in the WebVTT specification:</p>
<ul class="brief">
<li><dfn>WebVTT file</dfn>
<li><dfn>WebVTT file using cue text</dfn>
<li><dfn>WebVTT file using chapter title text</dfn>
<li><dfn>WebVTT file using only nested cues</dfn>
<li><dfn>WebVTT parser</dfn>
<li>The <dfn>rules for updating the display of WebVTT text tracks</dfn>
<li>The <dfn>rules for interpreting WebVTT cue text</dfn>
<li>The WebVTT <dfn>text track cue writing direction</dfn>
</ul>
</dd>
<!--TOPIC:WebSocket API-->
<dt>The WebSocket protocol</dt>
<dd>
<p>The following terms are defined in the WebSocket protocol specification: <a
href="#refsWSP">[WSP]</a></p>
<ul class="brief">
<li><dfn>establish a WebSocket connection</dfn>
<li><dfn>the WebSocket connection is established</dfn>
<li><dfn>validate the server's response</dfn>
<li><dfn>extensions in use</dfn>
<li><dfn>subprotocol in use</dfn>
<li><dfn>headers to send appropriate cookies</dfn>
<li><dfn>cookies set during the server's opening handshake</dfn>
<li><dfn>a WebSocket message has been received</dfn>
<li><dfn>send a WebSocket Message</dfn>
<li><dfn>fail the WebSocket connection</dfn>
<li><dfn>close the WebSocket connection</dfn>
<li><dfn>start the WebSocket closing handshake</dfn>
<li><dfn>the WebSocket closing handshake is started</dfn>
<li><dfn>the WebSocket connection is closed</dfn> (possibly <i data-x="">cleanly</i>)
<li><dfn>the WebSocket connection close code</dfn>
<li><dfn>the WebSocket connection close reason</dfn>
</ul>
</dd>
<!--TOPIC:HTML-->
<dt>ARIA</dt>
<dd>
<p>The terms <dfn>strong native semantics</dfn> is used as defined in the ARIA specification.
The term <dfn>default implicit ARIA semantics</dfn> has the same meaning as the term <i>implicit
WAI-ARIA semantics</i> as used in the ARIA specification. <a href="#refsARIA">[ARIA]</a></p>
<p>The <dfn data-x="attr-aria-role"><code>role</code></dfn> and <code data-x="">aria-*</code>
attributes are defined in the ARIA specification. <a href="#refsARIA">[ARIA]</a></p>
</dd>
</dl>
<p>This specification does not <em>require</em> support of any particular network protocol, style
sheet language, scripting language, or any of the DOM specifications beyond those required in the
list above. However, the language described by this specification is biased towards CSS as the
styling language, JavaScript as the scripting language, and HTTP as the network protocol, and
several features assume that those languages and protocols are in use.</p>
<p>A user agent that implements the HTTP protocol must implement the Web Origin Concept
specification and the HTTP State Management Mechanism specification (Cookies) as well. <a
href="#refsHTTP">[HTTP]</a> <a href="#refsORIGIN">[ORIGIN]</a> <a
href="#refsCOOKIES">[COOKIES]</a></p>
<p class="note">This specification might have certain additional requirements on character
encodings, image formats, audio formats, and video formats in the respective sections.</p>
</div>
</div>
<h4>Extensibility</h4>
<p>Vendor-specific proprietary user agent extensions to this specification are strongly
discouraged. Documents must not use such extensions, as doing so reduces interoperability and
fragments the user base, allowing only users of specific user agents to access the content in
question.</p>
<div class="nodev">
<p>If such extensions are nonetheless needed, e.g. for experimental purposes, then vendors are
strongly urged to use one of the following extension mechanisms:</p>
<ul>
<li><p>For markup-level features that can be limited to the XML serialisation and need not be
supported in the HTML serialisation, vendors should use the namespace mechanism to define custom
namespaces in which the non-standard elements and attributes are supported.</p>
<li>
<p>For markup-level features that are intended for use with <span>the HTML syntax</span>,
extensions should be limited to new attributes of the form "<code data-x="">x-<var
data-x="">vendor</var>-<var data-x="">feature</var></code>", where <var data-x="">vendor</var> is a
short string that identifies the vendor responsible for the extension, and <var
data-x="">feature</var> is the name of the feature. New element names should not be created.
Using attributes for such extensions exclusively allows extensions from multiple vendors to
co-exist on the same element, which would not be possible with elements. Using the "<code
data-x="">x-<var data-x="">vendor</var>-<var data-x="">feature</var></code>" form allows extensions
to be made without risk of conflicting with future additions to the specification.</p>
<div class="example">
<p>For instance, a browser named "FerretBrowser" could use "ferret" as a vendor prefix, while a
browser named "Mellblom Browser" could use "mb". If both of these browsers invented extensions
that turned elements into scratch-and-sniff areas, an author experimenting with these features
could write:</p>
<pre>&lt;p>This smells of lemons!
&lt;span x-ferret-smellovision x-ferret-smellcode="LEM01"
x-mb-outputsmell x-mb-smell="lemon juice">&lt;/span>&lt;/p></pre>
</div>
</li>
</ul>
<p>Attribute names beginning with the two characters "<code data-x="">x-</code>" are reserved for
user agent use and are guaranteed to never be formally added to the HTML language. For
flexibility, attributes names containing underscores (the U+005F LOW LINE character) are also
reserved for experimental purposes and are guaranteed to never be formally added to the HTML
language.</p>
<p class="note">Pages that use such attributes are by definition non-conforming.</p>
<p>For DOM extensions, e.g. new methods and IDL attributes, the new members should be prefixed by
vendor-specific strings to prevent clashes with future versions of this specification.</p>
<p>For events, experimental event types should be prefixed with vendor-specific strings.</p>
<div class="example">
<p>For example, if a user agent called "Pleas<!--e h-->old" were to add an event to indicate when
the user is going up in an elevator, it could use the prefix "<code data-x="">pleasold</code>" and
thus name the event "<code data-x="">pleasoldgoingup</code>", possibly with an event handler
attribute named "<code data-x="">onpleasoldgoingup</code>".</p>
</div>
<p>All extensions must be defined so that the use of extensions neither contradicts nor causes the
non-conformance of functionality defined in the specification.</p> <!-- thanks to QA Framework -->
<div class="example">
<p>For example, while strongly discouraged from doing so, an implementation "Foo Browser" could
add a new IDL attribute "<code data-x="">fooTypeTime</code>" to a control's DOM interface that
returned the time it took the user to select the current value of a control (say). On the other
hand, defining a new control that appears in a form's <code
data-x="dom-form-elements">elements</code> array would be in violation of the above requirement,
as it would violate the definition of <code data-x="dom-form-elements">elements</code> given in
this specification.</p>
</div>
<p>When adding new <span data-x="reflect">reflecting</span> IDL attributes corresponding to content
attributes of the form "<code data-x="">x-<var data-x="">vendor</var>-<var
data-x="">feature</var></code>", the IDL attribute should be named "<code data-x=""><var
data-x="">vendor</var><var data-x="">Feature</var></code>" (i.e. the "<code data-x="">x</code>" is
dropped from the IDL attribute's name).</p>
</div>
<hr>
<p>When vendor-neutral extensions to this specification are needed, either this specification can
be updated accordingly, or an extension specification can be written that overrides the
requirements in this specification. When someone applying this specification to their activities
decides that they will recognise the requirements of such an extension specification, it becomes
an <dfn data-x="other applicable specifications">applicable specification</dfn> for the purposes of
conformance requirements in this specification.</p>
<p class="note">Someone could write a specification that defines any arbitrary byte stream as
conforming, and then claim that their random junk is conforming. However, that does not mean that
their random junk actually is conforming for everyone's purposes: if someone else decides that
that specification does not apply to their work, then they can quite legitimately say that the
aforementioned random junk is just that, junk, and not conforming at all. As far as conformance
goes, what matters in a particular community is what that community <em>agrees</em> is
applicable.</p>
<div class="nodev">
<hr>
<p>User agents must treat elements and attributes that they do not understand as semantically
neutral; leaving them in the DOM (for DOM processors), and styling them according to CSS (for CSS
processors), but not inferring any meaning from them.</p>
<!--ADD-TOPIC:Security-->
<p>When support for a feature is disabled (e.g. as an emergency measure to mitigate a security
problem, or to aid in development, or for performance reasons), user agents must act as if they
had no support for the feature whatsoever, and as if the feature was not mentioned in this
specification. For example, if a particular feature is accessed via an attribute in a Web IDL
interface, the attribute itself would be omitted from the objects that implement that interface
&mdash; leaving the attribute on the object but making it return null or throw an exception is
insufficient.</p>
<!--REMOVE-TOPIC:Security-->
</div>
<div class="nodev">
<h4>Interactions with XPath and XSLT</h4>
<p id="xpath-1.0-processors">Implementations of XPath 1.0 that operate on <span>HTML
documents</span> parsed or created in the manners described in this specification (e.g. as part of
the <code data-x="">document.evaluate()</code> API) must act as if the following edit was applied
to the XPath 1.0 specification.</p>
<p>First, remove this paragraph:</p>
<p>A <a href="http://www.w3.org/TR/REC-xml-names#NT-QName">QName</a> in the node test is expanded
using the namespace declarations from the expression context. This is the same way expansion is
done for element type names in start and end-tags except that the default namespace declared with
<code data-x="">xmlns</code> is not used: if the <a
href="http://www.w3.org/TR/REC-xml-names#NT-QName">QName</a> does not have a prefix, then the
namespace URI is null (this is the same way attribute names are expanded). It is an error if the
<a href="http://www.w3.org/TR/REC-xml-names#NT-QName">QName</a> has a prefix for which there is
no namespace declaration in the expression context.</p>
</blockquote>
<p>Then, insert in its place the following:</p>
<p>A QName in the node test is expanded into an expanded-name using the namespace declarations
from the expression context. If the QName has a prefix, then there must be a<!-- added 2009-10-27
- http://www.w3.org/Bugs/Public/show_bug.cgi?id=8062 --> namespace declaration for this prefix in
the expression context, and the corresponding<!-- typo fixed 2009-10-27 -
http://www.w3.org/Bugs/Public/show_bug.cgi?id=8063 --> namespace URI is the one that is
associated with this prefix. It is an error if the QName has a prefix for which there is no
namespace declaration in the expression context. </p>
<p>If the QName has no prefix and the principal node type of the axis is element, then the
default element namespace is used. Otherwise if the QName has no prefix, the namespace URI is
null. The default element namespace is a member of the context for the XPath expression. The
value of the default element namespace when executing an XPath expression through the DOM3 XPath
API is determined in the following way:</p>
<ol>
<li>If the context node is from an HTML DOM, the default element namespace is
<li>Otherwise, the default element namespace URI is null.</li>
</ol>
<p class="note">This is equivalent to adding the default element namespace feature of XPath 2.0
to XPath 1.0, and using the HTML namespace as the default element namespace for HTML documents.
It is motivated by the desire to have implementations be compatible with legacy HTML content
while still supporting the changes that this specification introduces to HTML regarding the
namespace used for HTML elements, and by the desire to use XPath 1.0 rather than XPath 2.0.</p>
</blockquote>
<p class="note">This change is a <span>willful violation</span> of the XPath 1.0 specification,
motivated by desire to have implementations be compatible with legacy content while still
supporting the changes that this specification introduces to HTML regarding which namespace is
used for HTML elements. <a href="#refsXPATH10">[XPATH10]</a></p> <!-- note: version matters for
this ref -->
<hr>
<p id="dom-based-xslt-1.0-processors">XSLT 1.0 processors outputting to a DOM when the output
method is "html" (either explicitly or via the defaulting rule in XSLT 1.0) are affected as
follows:</p>
<p>If the transformation program outputs an element in no namespace, the processor must, prior to
constructing the corresponding DOM element node, change the namespace of the element to the
<span>HTML namespace</span>, <span data-x="converted to ASCII lowercase">ASCII-lowercase</span> the
element's local name, and <span data-x="converted to ASCII lowercase">ASCII-lowercase</span> the
names of any non-namespaced attributes on the element.</p>
<p class="note">This requirement is a <span>willful violation</span> of the XSLT 1.0
specification, required because this specification changes the namespaces and case-sensitivity
rules of HTML in a manner that would otherwise be incompatible with DOM-based XSLT
transformations. (Processors that serialise the output are unaffected.) <a
href="#refsXSLT10">[XSLT10]</a></p> <!-- note: version matters for this ref -->
<hr>
<p>This specification does not specify precisely how XSLT processing interacts with the <span>HTML
parser</span> infrastructure (for example, whether an XSLT processor acts as if it puts any
elements into a <span>stack of open elements</span>). However, XSLT processors must <span>stop
parsing</span> if they successfully complete, and must set the <span>current document
readiness</span> first to "<code data-x="">interactive</code>"<!-- this synchronously fires an
event --> and then to "<code data-x="">complete</code>"<!-- this also synchronously fires an event
--> if they are aborted.</p>
<hr>
<p>This specification does not specify how XSLT interacts with the <span
data-x="navigate">navigation</span> algorithm, how it fits in with the <span>event loop</span>, nor
how error pages are to be handled (e.g. whether XSLT errors are to replace an incremental XSLT
output, or are rendered inline, etc).</p>
<p class="note">There are also additional non-normative comments regarding the interaction of XSLT
and HTML <a href="#scriptTagXSLT">in the <code>script</code> element section</a>, and of
XSLT, XPath, and HTML <a href="#template-XSLT-XPath">in the <code>template</code> element
section</a>.</p>
</div>
<h3>Case-sensitivity and string comparison</h3>
<p>Comparing two strings in a <dfn>case-sensitive</dfn> manner means comparing them exactly, code
point for code point.</p>
<p>Comparing two strings in an <dfn>ASCII case-insensitive</dfn> manner means comparing them
exactly, code point for code point, except that the characters in the range U+0041 to U+005A (i.e.
LATIN CAPITAL LETTER A to LATIN CAPITAL LETTER Z) and the corresponding characters in the range
U+0061 to U+007A (i.e. LATIN SMALL LETTER A to LATIN SMALL LETTER Z) are considered to also
match.</p>
<p>Comparing two strings in a <dfn>compatibility caseless</dfn> manner means using the Unicode
<i>compatibility caseless match</i> operation to compare the two strings, with no language-specific tailoirings. <a
href="#refsUNICODE">[UNICODE]</a></p>
<p>Except where otherwise stated, string comparisons must be performed in a
<span>case-sensitive</span> manner.</p>
<div class="nodev">
<p><dfn data-x="converted to ASCII uppercase">Converting a string to ASCII uppercase</dfn> means
replacing all characters in the range U+0061 to U+007A (i.e. LATIN SMALL LETTER A to LATIN SMALL
LETTER Z) with the corresponding characters in the range U+0041 to U+005A (i.e. LATIN CAPITAL
LETTER A to LATIN CAPITAL LETTER Z).</p>
<p><dfn data-x="converted to ASCII lowercase">Converting a string to ASCII lowercase</dfn> means
replacing all characters in the range U+0041 to U+005A (i.e. LATIN CAPITAL LETTER A to LATIN
CAPITAL LETTER Z) with the corresponding characters in the range U+0061 to U+007A (i.e. LATIN
SMALL LETTER A to LATIN SMALL LETTER Z).</p>
</div>
<p>A string <var data-x="">pattern</var> is a <dfn>prefix match</dfn> for a string <var
data-x="">s</var> when <var data-x="">pattern</var> is not longer than <var data-x="">s</var> and
truncating <var data-x="">s</var> to <var data-x="">pattern</var>'s length leaves the two strings as
matches of each other.</p>
<h3>Common microsyntaxes</h3>
<p>There are various places in HTML that accept particular data types, such as dates or numbers.
This section describes what the conformance criteria for content in those formats is, and how to
parse them.</p>
<div class="nodev">
<p class="note">Implementors are strongly urged to carefully examine any third-party libraries
they might consider using to implement the parsing of syntaxes described below. For example, date
libraries are likely to implement error handling behavior that differs from what is required in
this specification, since error-handling behavior is often not defined in specifications that
describe date syntaxes similar to those used in this specification, and thus implementations tend
to vary greatly in how they handle errors.</p>
</div>
<div class="nodev">
<h4>Common parser idioms</h4>
</div>
<p>The <dfn data-x="space character">space characters</dfn>, for the purposes of this
specification, are U+0020 SPACE, U+0009 CHARACTER TABULATION (tab), U+000A LINE FEED (LF), U+000C
FORM FEED (FF), and U+000D CARRIAGE RETURN (CR).</p>
<p>The <dfn data-x="White_Space">White_Space characters</dfn> are those that have the Unicode
property "White_Space" in the Unicode <code data-x="">PropList.txt</code> data file. <a
href="#refsUNICODE">[UNICODE]</a></p>
<p class="note">This should not be confused with the "White_Space" value (abbreviated "WS") of the
"Bidi_Class" property in the <code data-x="">Unicode.txt</code> data file.</p>
<p>The <dfn>control characters</dfn> are those whose Unicode "General_Category" property has the
value "Cc" in the Unicode <code data-x="">UnicodeData.txt</code> data file. <a
href="#refsUNICODE">[UNICODE]</a></p>
<p>The <dfn>uppercase ASCII letters</dfn> are the characters in the range U+0041 LATIN CAPITAL
LETTER A to U+005A LATIN CAPITAL LETTER Z.</p>
<p>The <dfn>lowercase ASCII letters</dfn> are the characters in the range U+0061 LATIN SMALL
LETTER A to U+007A LATIN SMALL LETTER Z.</p>
<p>The <dfn>ASCII digits</dfn> are the characters in the range U+0030 DIGIT ZERO (0) to U+0039
DIGIT NINE (9).</p>
<p>The <dfn>alphanumeric ASCII characters</dfn> are those that are either <span>uppercase ASCII
letters</span>, <span>lowercase ASCII letters</span>, or <span>ASCII digits</span>.</p>
<p>The <dfn>ASCII hex digits</dfn> are the characters in the ranges U+0030 DIGIT ZERO (0) to
U+0039 DIGIT NINE (9), U+0041 LATIN CAPITAL LETTER A to U+0046 LATIN CAPITAL LETTER F, and U+0061
LATIN SMALL LETTER A to U+0066 LATIN SMALL LETTER F.</p>
<p>The <dfn>uppercase ASCII hex digits</dfn> are the characters in the ranges U+0030 DIGIT ZERO (0) to
U+0039 DIGIT NINE (9) and U+0041 LATIN CAPITAL LETTER A to U+0046 LATIN CAPITAL LETTER F only.</p>
<p>The <dfn>lowercase ASCII hex digits</dfn> are the characters in the ranges U+0030 DIGIT ZERO
(0) to U+0039 DIGIT NINE (9) and U+0061 LATIN SMALL LETTER A to U+0066 LATIN SMALL LETTER F
only.</p>
<div class="nodev">
<p>Some of the micro-parsers described below follow the pattern of having an <var
data-x="">input</var> variable that holds the string being parsed, and having a <var
data-x="">position</var> variable pointing at the next character to parse in <var
data-x="">input</var>.</p>
<p>For parsers based on this pattern, a step that requires the user agent to <dfn>collect a
sequence of characters</dfn> means that the following algorithm must be run, with <var
data-x="">characters</var> being the set of characters that can be collected:</p>
<ol>
<li><p>Let <var data-x="">input</var> and <var data-x="">position</var> be the same variables as
those of the same name in the algorithm that invoked these steps.</p></li>
<li><p>Let <var data-x="">result</var> be the empty string.</p></li>
<li><p>While <var data-x="">position</var> doesn't point past the end of <var data-x="">input</var>
and the character at <var data-x="">position</var> is one of the <var data-x="">characters</var>,
append that character to the end of <var data-x="">result</var> and advance <var
data-x="">position</var> to the next character in <var data-x="">input</var>.</p></li>
<li><p>Return <var data-x="">result</var>.</p></li>
</ol>
<p>The step <dfn>skip whitespace</dfn> means that the user agent must <span>collect a sequence of
characters</span> that are <span data-x="space character">space characters</span>. The step
<dfn>skip White_Space characters</dfn> means that the user agent must <span>collect a sequence of
characters</span> that are <span>White_Space</span> characters. In both cases, the collected
characters are not used. <a href="#refsUNICODE">[UNICODE]</a></p>
<p>When a user agent is to <dfn>strip line breaks</dfn> from a string, the user agent must remove
any U+000A LINE FEED (LF) and U+000D CARRIAGE RETURN (CR) characters from that string.</p>
<p>When a user agent is to <dfn>strip leading and trailing whitespace</dfn> from a string, the
user agent must remove all <span data-x="space character">space characters</span> that are at the
start or end of the string.</p>
<p>When a user agent is to <dfn>strip and collapse whitespace</dfn> in a string, it must replace
any sequence of one or more consecutive <span data-x="space character">space characters</span> in
that string with a single U+0020 SPACE character, and then <span>strip leading and trailing
whitespace</span> from that string.</p>
<p>When a user agent has to <dfn>strictly split a string</dfn> on a particular delimiter character
<var data-x="">delimiter</var>, it must use the following algorithm:</p>
<ol>
<li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
<li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
pointing at the start of the string.</p></li>
<li><p>Let <var data-x="">tokens</var> be an ordered list of tokens, initially empty.</p></li>
<li><p>While <var data-x="">position</var> is not past the end of <var data-x="">input</var>:</p>
<ol>
<li><p><span>Collect a sequence of characters</span> that are not the <var
data-x="">delimiter</var> character.</p></li>
<li><p>Append the string collected in the previous step to <var data-x="">tokens</var>.</p></li>
<li><p>Advance <var data-x="">position</var> to the next character in <var
data-x="">input</var>.</p></li> <!-- skips past the delimiter -->
</ol>
</li>
<li><p>Return <var data-x="">tokens</var>.</p></li>
</ol>
<p class="note">For the special cases of splitting a string <span data-x="split a string on
spaces">on spaces</span> and <span data-x="split a string on commas">on commas</span>, this
algorithm does not apply (those algorithms also perform <span data-x="strip leading and trailing
whitespace">whitespace trimming</span>).</p>
</div>
<h4>Boolean attributes</h4>
<p>A number of attributes are <dfn data-x="boolean attribute">boolean attributes</dfn>. The
presence of a boolean attribute on an element represents the true value, and the absence of the
attribute represents the false value.</p>
<p>If the attribute is present, its value must either be the empty string or a value that is an
<span>ASCII case-insensitive</span> match for the attribute's canonical name, with no leading or
trailing whitespace.</p>
<p class="note">The values "true" and "false" are not allowed on boolean attributes. To represent
a false value, the attribute has to be omitted altogether.</p>
<div class="example">
<p>Here is an example of a checkbox that is checked and disabled. The <code
data-x="attr-input-checked">checked</code> and <code data-x="attr-fe-disabled">disabled</code>
attributes are the boolean attributes.</p>
<pre>&lt;label>&lt;input type=checkbox checked name=cheese disabled> Cheese&lt;/label></pre>
<p>This could be equivalently written as this:
<pre>&lt;label>&lt;input type=checkbox checked=checked name=cheese disabled=disabled> Cheese&lt;/label></pre>
<p>You can also mix styles; the following is still equivalent:</p>
<pre>&lt;label>&lt;input type='checkbox' checked name=cheese disabled=""> Cheese&lt;/label></pre>
</div>
<h4>Keywords and enumerated attributes</h4>
<p>Some attributes are defined as taking one of a finite set of keywords. Such attributes are
called <dfn data-x="enumerated attribute">enumerated attributes</dfn>. The keywords are each
defined to map to a particular <em>state</em> (several keywords might map to the same state, in
which case some of the keywords are synonyms of each other; additionally, some of the keywords can
be said to be non-conforming, and are only in the specification for historical reasons). In
addition, two default states can be given. The first is the <i>invalid value default</i>, the
second is the <i>missing value default</i>.</p>
<p>If an enumerated attribute is specified, the attribute's value must be an <span>ASCII
case-insensitive</span> match for one of the given keywords that are not said to be
non-conforming, with no leading or trailing whitespace.</p>
<p>When the attribute is specified, if its value is an <span>ASCII case-insensitive</span> match
for one of the given keywords then that keyword's state is the state that the attribute
represents. If the attribute value matches none of the given keywords, but the attribute has an
<i>invalid value default</i>, then the attribute represents that state. Otherwise, if the
attribute value matches none of the keywords but there is a <i>missing value default</i> state
defined, then <em>that</em> is the state represented by the attribute. Otherwise, there is no
default, and invalid values mean that there is no state represented.</p>
<p>When the attribute is <em>not</em> specified, if there is a <i>missing value default</i> state
defined, then that is the state represented by the (missing) attribute. Otherwise, the absence of
the attribute means that there is no state represented.</p>
<p class="note">The empty string can be a valid keyword.</p>
<h4>Numbers</h4>
<h5>Signed integers</h5>
<p>A string is a <dfn>valid integer</dfn> if it consists of one or more <span>ASCII digits</span>,
optionally prefixed with a U+002D HYPHEN-MINUS character (-).</p>
<p>A <span>valid integer</span> without a U+002D HYPHEN-MINUS (-) prefix represents the number
that is represented in base ten by that string of digits. A <span>valid integer</span>
<em>with</em> a U+002D HYPHEN-MINUS (-) prefix represents the number represented in base ten by
the string of digits that follows the U+002D HYPHEN-MINUS, subtracted from zero.</p>
<div class="nodev">
<p>The <dfn>rules for parsing integers</dfn> are as given in the following algorithm. When
invoked, the steps must be followed in the order given, aborting at the first step that returns a
value. This algorithm will return either an integer or an error.</p>
<ol>
<li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
<li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
pointing at the start of the string.</p></li>
<li><p>Let <var data-x="">sign</var> have the value "positive".</p></li>
<li><p><span>Skip whitespace</span>.</p></li>
<li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
error.</p></li>
<li>
<p>If the character indicated by <var data-x="">position</var> (the first character) is a U+002D
HYPHEN-MINUS character (-):</p>
<ol>
<li>Let <var data-x="">sign</var> be "negative".</li>
<li>Advance <var data-x="">position</var> to the next character.</li>
<li>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
error.</li>
</ol>
<p>Otherwise, if the character indicated by <var data-x="">position</var> (the first character)
is a U+002B PLUS SIGN character (+):</p>
<ol>
<li>Advance <var data-x="">position</var> to the next character. (The "<code data-x="">+</code>"
is ignored, but it is not conforming.)</li>
<li>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
error.</li>
</ol>
</li>
<li><p>If the character indicated by <var data-x="">position</var> is not an <span data-x="ASCII
digits">ASCII digit</span>, then return an error.</p></li>
<!-- Ok. At this point we know we have a number. It might have
trailing garbage which we'll ignore, but it's a number, and we
won't return an error. -->
<li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>, and
interpret the resulting sequence as a base-ten integer. Let <var data-x="">value</var> be that
integer.</p></li>
<li><p>If <var data-x="">sign</var> is "positive", return <var
data-x="">value</var>, otherwise return the result of subtracting
<var data-x="">value</var> from zero.</p></li>
</ol>
</div>
<h5>Non-negative integers</h5>
<p>A string is a <dfn>valid non-negative integer</dfn> if it consists of one or more <span>ASCII
digits</span>.</p>
<p>A <span>valid non-negative integer</span> represents the number that is represented in base ten
by that string of digits.</p>
<div class="nodev">
<p>The <dfn>rules for parsing non-negative integers</dfn> are as given in the following algorithm.
When invoked, the steps must be followed in the order given, aborting at the first step that
returns a value. This algorithm will return either zero, a positive integer, or an error.</p>
<ol>
<li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
<li><p>Let <var data-x="">value</var> be the result of parsing <var data-x="">input</var> using the
<span>rules for parsing integers</span>.</p></li>
<li><p>If <var data-x="">value</var> is an error, return an error.</p></li>
<li><p>If <var data-x="">value</var> is less than zero, return an error.</p></li>
<li><p>Return <var data-x="">value</var>.</p></li>
</ol>
<!-- Implications: A leading + is ignored. A leading - is ignored if the value is zero. -->
</div>
<h5>Floating-point numbers</h5>
<p>A string is a <dfn>valid floating-point number</dfn> if it consists of:</p>
<ol class="brief">
<li>Optionally, a U+002D HYPHEN-MINUS character (-).</li>
<li>One or both of the following, in the given order:
<ol>
<li>A series of one or more <span>ASCII digits</span>.</li>
<li>
<ol>
<li>A single U+002E FULL STOP character (.).</li>
<li>A series of one or more <span>ASCII digits</span>.</li>
</ol>
</li>
</ol>
</li>
<li>Optionally:
<ol>
<li>Either a U+0065 LATIN SMALL LETTER E character (e) or a U+0045 LATIN CAPITAL LETTER E
character (E).</li>
<li>Optionally, a U+002D HYPHEN-MINUS character (-) or U+002B PLUS SIGN character (+).</li>
<li>A series of one or more <span>ASCII digits</span>.</li>
</ol>
</li>
</ol>
<p>A <span>valid floating-point number</span> represents the number obtained by multiplying the
significand by ten raised to the power of the exponent, where the significand is the first number,
interpreted as base ten (including the decimal point and the number after the decimal point, if
any, and interpreting the significand as a negative number if the whole string starts with a
U+002D HYPHEN-MINUS character (-) and the number is not zero), and where the exponent is the
number after the E, if any (interpreted as a negative number if there is a U+002D HYPHEN-MINUS
character (-) between the E and the number and the number is not zero, or else ignoring a U+002B
PLUS SIGN character (+) between the E and the number if there is one). If there is no E, then the
exponent is treated as zero.</p>
<p class="note">The Infinity and Not-a-Number (NaN) values are not <span data-x="valid
floating-point number">valid floating-point numbers</span>.</p>
<div class="nodev">
<p>The <dfn data-x="best representation of the number as a floating-point number">best
representation of the number <var data-x="">n</var> as a floating-point number</dfn> is the string
obtained from applying the JavaScript operator ToString to <var data-x="">n</var>. The JavaScript
operator ToString is not uniquely determined. When there are multiple possible strings that could
be obtained from the JavaScript operator ToString for a particular value, the user agent must
always return the same string for that value (though it may differ from the value used by other
user agents).</p>
<p>The <dfn>rules for parsing floating-point number values</dfn> are as given in the following
algorithm. This algorithm must be aborted at the first step that returns something. This algorithm
will return either a number or an error.</p>
<ol>
<li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
<li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
pointing at the start of the string.</p></li>
<li><p>Let <var data-x="">value</var> have the value 1.</li>
<li><p>Let <var data-x="">divisor</var> have the value 1.</p></li>
<li><p>Let <var data-x="">exponent</var> have the value 1.</p></li>
<li><p><span>Skip whitespace</span>.</p></li>
<li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
error.</p></li>
<li>
<p>If the character indicated by <var data-x="">position</var> is a U+002D HYPHEN-MINUS character
(-):</p>
<ol>
<li>Change <var data-x="">value</var> and <var data-x="">divisor</var> to &#x2212;1.</li>
<li>Advance <var data-x="">position</var> to the next character.</li>
<li>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
error.</li>
</ol>
<p>Otherwise, if the character indicated by <var data-x="">position</var> (the first character)
is a U+002B PLUS SIGN character (+):</p>
<ol>
<li>Advance <var data-x="">position</var> to the next character. (The "<code data-x="">+</code>"
is ignored, but it is not conforming.)</li>
<li>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
error.</li>
</ol>
</li>
<li><p>If the character indicated by <var data-x="">position</var> is a U+002E FULL STOP (.), and
that is not the last character in <var data-x="">input</var>, and the character after the
character indicated by <var data-x="">position</var> is an <span data-x="ASCII digits">ASCII
digit</span>, then set <var data-x="">value</var> to zero and jump to the step labeled
<i>fraction</i>.</p> <!-- we have to check there's a number so that ".e1" fails to parse but ".0"
does not -->
<li><p>If the character indicated by <var data-x="">position</var> is not an <span data-x="ASCII
digits">ASCII digit</span>, then return an error.</p></li>
<li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>, and
interpret the resulting sequence as a base-ten integer. Multiply <var data-x="">value</var> by
that integer.</p></li>
<li>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, jump to the
step labeled <i>conversion</i>.</li>
<li><p><i>Fraction</i>: If the character indicated by <var data-x="">position</var> is a U+002E
FULL STOP (.), run these substeps:</p>
<ol>
<li><p>Advance <var data-x="">position</var> to the next character.</p></li>
<li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, or if the
character indicated by <var data-x="">position</var> is not an <span data-x="ASCII digits">ASCII
digit</span>, U+0065 LATIN SMALL LETTER E (e), or U+0045 LATIN CAPITAL LETTER E (E), then jump
to the step labeled <i>conversion</i>.</li>
<li><p>If the character indicated by <var data-x="">position</var> is a U+0065 LATIN SMALL
LETTER E character (e) or a U+0045 LATIN CAPITAL LETTER E character (E), skip the remainder of
these substeps.</p>
<li><p><i>Fraction loop</i>: Multiply <var data-x="">divisor</var> by ten.</p></li>
<li>Add the value of the character indicated by <var data-x="">position</var>, interpreted as a
base-ten digit (0..9) and divided by <var data-x="">divisor</var>, to <var
data-x="">value</var>.</li>
<li><p>Advance <var data-x="">position</var> to the next character.</p></li>
<li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, then jump
to the step labeled <i>conversion</i>.</li>
<li><p>If the character indicated by <var data-x="">position</var> is an <span data-x="ASCII
digits">ASCII digit</span>, jump back to the step labeled <i>fraction loop</i> in these
substeps.</p></li>
</ol>
</li>
<li><p>If the character indicated by <var data-x="">position</var> is a U+0065 LATIN SMALL LETTER
E character (e) or a U+0045 LATIN CAPITAL LETTER E character (E), run these substeps:</p>
<ol>
<li><p>Advance <var data-x="">position</var> to the next character.</p></li>
<li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, then jump
to the step labeled <i>conversion</i>.</li>
<li>
<p>If the character indicated by <var data-x="">position</var> is a U+002D HYPHEN-MINUS
character (-):</p>
<ol>
<li>Change <var data-x="">exponent</var> to &#x2212;1.</li>
<li>Advance <var data-x="">position</var> to the next character.</li>
<li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, then
jump to the step labeled <i>conversion</i>.</li>
</ol>
<p>Otherwise, if the character indicated by <var data-x="">position</var> is a U+002B PLUS SIGN
character (+):</p>
<ol>
<li>Advance <var data-x="">position</var> to the next character.</li>
<li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, then
jump to the step labeled <i>conversion</i>.</li>
</ol>
</li>
<li><p>If the character indicated by <var data-x="">position</var> is not an <span data-x="ASCII
digits">ASCII digit</span>, then jump to the step labeled <i>conversion</i>.</li>
<li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>, and
interpret the resulting sequence as a base-ten integer. Multiply <var data-x="">exponent</var>
by that integer.</p></li>
<li><p>Multiply <var data-x="">value</var> by ten raised to the <var data-x="">exponent</var>th
power.</p></li>
</ol>
</li>
<li><p><i>Conversion</i>: Let <var data-x="">S</var> be the set of finite IEEE 754
double-precision floating-point values except &#x2212;0, but with two special values added: 2<sup
data-x="">1024</sup> and &#x2212;2<sup data-x="">1024</sup>.</p></li>
<li><p>Let <var data-x="">rounded-value</var> be the number in <var data-x="">S</var> that is
closest to <var data-x="">value</var>, selecting the number with an even significand if there are
two equally close values. (The two special values 2<sup data-x="">1024</sup> and &#x2212;2<sup
data-x="">1024</sup> are considered to have even significands for this purpose.)</p></li>
<li><p>If <var data-x="">rounded-value</var> is 2<sup data-x="">1024</sup> or &#x2212;2<sup
data-x="">1024</sup>, return an error.</p></li>
<li><p>Return <var data-x="">rounded-value</var>.</p></li>
</ol>
</div>
<div class="nodev">
<h5 id="percentages-and-dimensions">Percentages and lengths</h5>
<p>The <dfn>rules for parsing dimension values</dfn> are as given in the following algorithm. When
invoked, the steps must be followed in the order given, aborting at the first step that returns a
value. This algorithm will return either a number greater than or equal to 1.0, or an error; if a
number is returned, then it is further categorised as either a percentage or a length.</p>
<ol>
<li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
<li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
pointing at the start of the string.</p></li>
<li><p><span>Skip whitespace</span>.</p></li>
<li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
error.</p></li>
<li><p>If the character indicated by <var data-x="">position</var> is a U+002B PLUS SIGN character
(+), advance <var data-x="">position</var> to the next character.</li>
<li><p><span>Collect a sequence of characters</span> that are U+0030 DIGIT ZERO (0) characters,
and discard them.</p></li>
<li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return an
error.</p></li>
<li><p>If the character indicated by <var data-x="">position</var> is not one of U+0031 DIGIT ONE
(1) to U+0039 DIGIT NINE (9), then return an error.</p></li>
<!-- Ok. At this point we know we have a number. It might have trailing garbage which we'll
ignore, but it's a number, and we won't return an error. -->
<li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>, and
interpret the resulting sequence as a base-ten integer. Let <var data-x="">value</var> be that
number.</li>
<li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return <var
data-x="">value</var> as a length.</p></li>
<li>
<p>If the character indicated by <var data-x="">position</var> is a U+002E FULL STOP character
(.):</p>
<ol>
<li><p>Advance <var data-x="">position</var> to the next character.</p></li>
<li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, or if the
character indicated by <var data-x="">position</var> is not an <span data-x="ASCII digits">ASCII
digit</span>, then return <var data-x="">value</var> as a length.</li>
<li><p>Let <var data-x="">divisor</var> have the value 1.</p></li>
<li><p><i>Fraction loop</i>: Multiply <var data-x="">divisor</var> by ten.</p></li>
<li>Add the value of the character indicated by <var data-x="">position</var>, interpreted as a
base-ten digit (0..9) and divided by <var data-x="">divisor</var>, to <var
data-x="">value</var>.</li>
<li><p>Advance <var data-x="">position</var> to the next character.</p></li>
<li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, then
return <var data-x="">value</var> as a length.</li>
<li><p>If the character indicated by <var data-x="">position</var> is an <span data-x="ASCII
digits">ASCII digit</span>, return to the step labeled <i>fraction loop</i> in these
substeps.</p></li>
</ol>
</li>
<li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, return <var
data-x="">value</var> as a length.</p></li>
<li><p>If the character indicated by <var data-x="">position</var> is a U+0025 PERCENT SIGN
character (%), return <var data-x="">value</var> as a percentage.</p></li>
<li><p>Return <var data-x="">value</var> as a length.</p></li>
</ol>
</div>
<h5>Lists of integers</h5>
<p>A <dfn>valid list of integers</dfn> is a number of <span data-x="valid integer">valid
integers</span> separated by U+002C COMMA characters, with no other characters (e.g. no <span
data-x="space character">space characters</span>). In addition, there might be restrictions on the
number of integers that can be given, or on the range of values allowed.</p>
<div class="nodev">
<p>The <dfn>rules for parsing a list of integers</dfn> are as follows:</p>
<ol>
<li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
<li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
pointing at the start of the string.</p></li>
<li><p>Let <var data-x="">numbers</var> be an initially empty list of integers. This list will be
the result of this algorithm.</p></li>
<li><p>If there is a character in the string <var data-x="">input</var> at position <var
data-x="">position</var>, and it is either a U+0020 SPACE, U+002C COMMA, or U+003B SEMICOLON
character, then advance <var data-x="">position</var> to the next character in <var
data-x="">input</var>, or to beyond the end of the string if there are no more
characters.</p></li>
<li><p>If <var data-x="">position</var> points to beyond the end of <var data-x="">input</var>,
return <var data-x="">numbers</var> and abort.</p></li>
<li><p>If the character in the string <var data-x="">input</var> at position <var
data-x="">position</var> is a U+0020 SPACE, U+002C COMMA, or U+003B SEMICOLON character, then
return to step 4.</li>
<li><p>Let <var data-x="">negated</var> be false.</p></li> <li><p>Let <var data-x="">value</var> be
0.</p></li>
<li><p>Let <var data-x="">started</var> be false. This variable is set to true when the parser
sees a number or a U+002D HYPHEN-MINUS character (-).</p></li>
<li><p>Let <var data-x="">got number</var> be false. This variable is set to true when the parser
sees a number.</p></li>
<li><p>Let <var data-x="">finished</var> be false. This variable is set to true to switch parser
into a mode where it ignores characters until the next separator.</p></li>
<li><p>Let <var data-x="">bogus</var> be false.</p></li>
<li><p><i>Parser</i>: If the character in the string <var data-x="">input</var> at position <var
data-x="">position</var> is:</p>
<dl class="switch">
<dt>A U+002D HYPHEN-MINUS character</dt>
<dd>
<p>Follow these substeps:</p>
<ol>
<li>If <var data-x="">got number</var> is true, let <var data-x="">finished</var> be true.</li>
<li>If <var data-x="">finished</var> is true, skip to the next step in the overall set of
steps.</li>
<li>If <var data-x="">started</var> is true, let <var data-x="">negated</var> be false.</li>
<li>Otherwise, if <var data-x="">started</var> is false and if <var data-x="">bogus</var> is
false, let <var data-x="">negated</var> be true.</li>
<li>Let <var data-x="">started</var> be true.</li>
</ol>
</dd>
<dt>An <span data-x="ASCII digits">ASCII digit</span></dt>
<dd>
<p>Follow these substeps:</p>
<ol>
<li>If <var data-x="">finished</var> is true, skip to the next step in the overall set of
steps.</li>
<li>Multiply <var data-x="">value</var> by ten.</li>
<li>Add the value of the digit, interpreted in base ten, to <var data-x="">value</var>.</li>
<li>Let <var data-x="">started</var> be true.</li>
<li>Let <var data-x="">got number</var> be true.</li>
</ol>
</dd>
<dt>A U+0020 SPACE character</dt>
<dt>A U+002C COMMA character</dt>
<dt>A U+003B SEMICOLON character</dt>
<dd>
<p>Follow these substeps:</p>
<ol>
<li>If <var data-x="">got number</var> is false, return the <var data-x="">numbers</var> list
and abort. This happens if an entry in the list has no digits, as in "<code
data-x="">1,2,x,4</code>".</li>
<li>If <var data-x="">negated</var> is true, then negate <var data-x="">value</var>.</li>
<li>Append <var data-x="">value</var> to the <var data-x="">numbers</var> list.</li>
<li>Jump to step 4 in the overall set of steps.</li>
</ol>
</dd>
<!-- <dt>A U+002E FULL STOP character</dt> -->
<dt>A character in the range U+0001 to U+001F, <!-- space --> U+0021 to U+002B, <!-- comma --> U+002D to U+002F, <!-- digits --> U+003A, <!-- semicolon --> U+003C to U+0040, <!-- a-z --> U+005B to U+0060, <!-- A-Z --> U+007b to U+007F
(i.e. any other non-alphabetic ASCII character)</dt>
<!--
IE6 on Wine treats the following characters like this also: U+1-U+1f, U+21-U+2b, U+2d-U+2f, U+3a,
U+3c-U+40, U+5b-U+60, U+7b-U+82, U+84-U+89, U+8b, U+8d, U+8f-U+99, U+9b, U+9d, U+a0-U+bf, U+d7,
U+f7, U+1f6-U+1f9, U+218-U+24f, U+2a9-U+385, U+387, U+38b, U+38d, U+3a2, U+3cf, U+3d7-U+3d9, U+3db,
U+3dd, U+3df, U+3e1, U+3f4-U+400, U+40d, U+450, U+45d, U+482-U+48f, U+4c5-U+4c6, U+4c9-U+4ca,
U+4cd-U+4cf, U+4ec-U+4ed, U+4f6-U+4f7, U+4fa-U+530, U+557-U+560, U+588-U+5cf, U+5eb-U+5ef,
U+5f3-U+620, U+63b-U+640, U+64b-U+670, U+6b8-U+6b9, U+6bf, U+6cf, U+6d4, U+6d6-U+904, U+93a-U+957,
U+962-U+984, U+98d-U+98e, U+991-U+992, U+9a9, U+9b1, U+9b3-U+9b5, U+9ba-U+9db, U+9de, U+9e2-U+9ef,
U+9f2-U+a04, U+a0b-U+a0e, U+a11-U+a12, U+a29, U+a31, U+a34, U+a37, U+a3a-U+a58, U+a5d, U+a5f-U+a84,
U+a8c, U+a8e, U+a92, U+aa9, U+ab1, U+ab4, U+aba-U+adf, U+ae1-U+b04, U+b0d-U+b0e, U+b11-U+b12,
U+b29, U+b31, U+b34-U+b35, U+b3a-U+b5b, U+b5e, U+b62-U+b84, U+b8b-U+b8d, U+b91, U+b96-U+b98, U+b9b,
U+b9d, U+ba0-U+ba2, U+ba5-U+ba7, U+bab-U+bad, U+bb6, U+bba-U+c04, U+c0d, U+c11, U+c29, U+c34,
U+c3a-U+c5f, U+c62-U+c84, U+c8d, U+c91, U+ca9, U+cb4, U+cba-U+cdd, U+cdf, U+ce2-U+d04, U+d0d,
U+d11, U+d29, U+d3a-U+d5f, U+d62-U+e00, U+e2f, U+e31, U+e34-U+e3f, U+e46-U+e80, U+e83, U+e85-U+e86,
U+e89, U+e8b-U+e8c, U+e8e-U+e93, U+e98, U+ea0, U+ea4, U+ea6, U+ea8-U+ea9, U+eac, U+eaf-U+edb,
U+ede-U+109f, U+10c6-U+10cf, U+10f7-U+10ff, U+115a-U+115e, U+11a3-U+11a7, U+11fa-U+1dff,
U+1e9b-U+1e9f, U+1efa-U+1eff, U+1f16-U+1f17, U+1f1e-U+1f1f, U+1f46-U+1f47, U+1f4e-U+1f4f, U+1f58,
U+1f5a, U+1f5c, U+1f5e, U+1f7e-U+1f7f, U+1fb5, U+1fbd-U+1fc1, U+1fc5, U+1fcd-U+1fcf, U+1fd4-U+1fd5,
U+1fdc-U+1fdf, U+1fed-U+1ff1, U+1ff5, U+1ffd-U+249b, U+24ea-U+3004, U+3006-U+3040, U+3095-U+309a,
U+309f-U+30a0, U+30fb, U+30ff-U+3104, U+312d-U+3130, U+318f-U+4dff, U+9fa6-U+abff, U+d7a4-U+d7ff,
U+e000-U+f8ff, U+fa2e-U+faff, U+fb07-U+fb12, U+fb18-U+fb1e, U+fb37, U+fb3d, U+fb3f, U+fb42, U+fb45,
U+fbb2-U+fbd2, U+fbe9, U+fce1, U+fd3e-U+fd4f, U+fd90-U+fd91, U+fdc8-U+fdef, U+fdfc-U+fe7f,
U+fefd-U+ff20, U+ff3b-U+ff40, U+ff5b-U+ff65, U+ffa0, U+ffbf-U+ffc1, U+ffc8-U+ffc9, U+ffd0-U+ffd1,
U+ffd8-U+ffd9, U+ffdd-U+ffff
IE7 on Win2003 treats the following characters like this also instead: U+1-U+1f, U+21-U+2b,
U+2d-U+2f, U+3a, U+3c-U+40, U+5b-U+60, U+7b-U+82, U+84-U+89, U+8b, U+8d, U+8f-U+99, U+9b, U+9d,
U+a0-U+a9, U+ab-U+b4, U+b6-U+b9, U+bb-U+bf, U+d7, U+f7, U+220-U+221, U+234-U+24f, U+2ae-U+2af,
U+2b9-U+2ba, U+2c2-U+2df, U+2e5-U+2ed, U+2ef-U+344, U+346-U+379, U+37b-U+385, U+387, U+38b, U+38d,
U+3a2, U+3cf, U+3d8-U+3d9, U+3f4-U+3ff, U+482-U+48b, U+4c5-U+4c6, U+4c9-U+4ca, U+4cd-U+4cf,
U+4f6-U+4f7, U+4fa-U+530, U+557-U+558, U+55a-U+560, U+588-U+5cf, U+5eb-U+5ef, U+5f3-U+620,
U+63b-U+640, U+656-U+66f, U+6d4, U+6dd-U+6e0, U+6e9-U+6ec, U+6ee-U+6f9, U+6fd-U+70f, U+72d-U+72f,
U+740-U+77f, U+7b1-U+900, U+904, U+93a-U+93c, U+94d - U+94f, U+951-U+957, U+964-U+980, U+984,
U+98d-U+98e, U+991-U+992, U+9a9, U+9b1, U+9b3-U+9b5, U+9ba-U+9bd, U+9c5-U+9c6, U+9c9-U+9ca,
U+9cd-U+9d6, U+9d8-U+9db, U+9de, U+9e4-U+9ef, U+9f2-U+a01, U+a03-U+a04, U+a0b-U+a0e, U+a11-U+a12,
U+a29, U+a31, U+a34, U+a37, U+a3a-U+a3d, U+a43-U+a46, U+a49-U+a4a, U+a4d-U+a58, U+a5d, U+a5f-U+a6f,
U+a75-U+a80, U+a84, U+a8c, U+a8e, U+a92, U+aa9, U+ab1, U+ab4, U+aba-U+abc, U+ac6, U+aca,
U+acd-U+acf, U+ad1-U+adf, U+ae1-U+b00, U+b04, U+b0d-U+b0e, U+b11-U+b12, U+b29, U+b31, U+b34-U+b35,
U+b3a-U+b3c, U+b44-U+b46, U+b49 - U+b4a, U+b4d-U+b55, U+b58-U+b5b, U+b5e, U+b62-U+b81, U+b84,
U+b8b-U+b8d, U+b91, U+b96-U+b98, U+b9b, U+b9d, U+ba0 - U+ba2, U+ba5-U+ba7, U+bab-U+bad, U+bb6,
U+bba-U+bbd, U+bc3-U+bc5, U+bc9, U+bcd-U+bd6, U+bd8-U+c00, U+c04, U+c0d, U+c11, U+c29, U+c34,
U+c3a-U+c3d, U+c45, U+c49, U+c4d-U+c54, U+c57-U+c5f, U+c62-U+c81, U+c84, U+c8d, U+c91, U+ca9,
U+cb4, U+cba-U+cbd, U+cc5, U+cc9, U+ccd-U+cd4, U+cd7-U+cdd, U+cdf, U+ce2-U+d01, U+d04, U+d0d,
U+d11, U+d29, U+d3a-U+d3d, U+d44-U+d45, U+d49, U+d4d-U+d56, U+d58-U+d5f, U+d62-U+d81, U+d84,
U+d97-U+d99, U+db2, U+dbc, U+dbe - U+dbf, U+dc7-U+dce, U+dd5, U+dd7, U+de0-U+df1, U+df4-U+e00,
U+e3b-U+e3f, U+e4f-U+e80, U+e83, U+e85-U+e86, U+e89, U+e8b-U+e8c, U+e8e-U+e93, U+e98, U+ea0, U+ea4,
U+ea6, U+ea8-U+ea9, U+eac, U+eba, U+ebe-U+ebf, U+ec5-U+ecc, U+ece-U+edb, U+ede-U+eff, U+f01-U+f3f,
U+f48, U+f6b-U+f70, U+f82-U+f87, U+f8c-U+f8f, U+f98, U+fbd-U+fff, U+1022, U+1028, U+102b,
U+1033-U+1035, U+1037, U+1039-U+104f, U+105a-U+109f, U+10c6-U+10cf, U+10f7-U+10ff, U+115a - U+115e,
U+11a3-U+11a7, U+11fa-U+11ff, U+1207, U+1247, U+1249, U+124e-U+124f, U+1257, U+1259, U+125e-U+125f,
U+1287, U+1289, U+128e-U+128f, U+12af, U+12b1, U+12b6-U+12b7, U+12bf, U+12c1, U+12c6-U+12c7,
U+12cf, U+12d7, U+12ef, U+130f, U+1311, U+1316-U+1317, U+131f, U+1347, U+135b-U+139f,
U+13f5-U+1400, U+166d-U+166e, U+1677-U+1680, U+169b - U+169f, U+16eb-U+177f, U+17c9-U+181f, U+1843,
U+1878-U+187f, U+18aa-U+1dff, U+1e9c-U+1e9f, U+1efa-U+1eff, U+1f16-U+1f17, U+1f1e-U+1f1f,
U+1f46-U+1f47, U+1f4e-U+1f4f, U+1f58, U+1f5a, U+1f5c, U+1f5e, U+1f7e-U+1f7f, U+1fb5, U+1fbd,
U+1fbf-U+1fc1, U+1fc5, U+1fcd-U+1fcf, U+1fd4-U+1fd5, U+1fdc-U+1fdf, U+1fed-U+1ff1, U+1ff5,
U+1ffd-U+207e, U+2080-U+2101, U+2103-U+2106, U+2108-U+2109, U+2114, U+2116-U+2118, U+211e-U+2123,
U+2125, U+2127, U+2129, U+212e, U+2132, U+213a-U+215f, U+2184-U+3005, U+3008-U+3020, U+302a-U+3037,
U+303b-U+3104, U+312d-U+3130, U+318f - U+319f, U+31b8-U+33ff, U+4db6-U+4dff, U+9fa6-U+9fff,
U+a48d-U+abff, U+d7a4-U+d7ff, U+e000-U+f8ff, U+fa2e-U+faff, U+fb07-U+fb12, U+fb18-U+fb1c, U+fb1e,
U+fb29, U+fb37, U+fb3d, U+fb3f, U+fb42, U+fb45, U+fbb2-U+fbd2, U+fd3e-U+fd4f, U+fd90-U+fd91,
U+fdc8-U+fdef, U+fdfc-U+fe6f, U+fe73, U+fe75, U+fefd-U+ff20, U+ff3b-U+ff40, U+ff5b-U+ff9f,
U+ffbf-U+ffc1, U+ffc8-U+ffc9, U+ffd0-U+ffd1, U+ffd8-U+ffd9, U+ffdd-U+ffff
-->
<dd>
<p>Follow these substeps:</p>
<ol>
<li>If <var data-x="">got number</var> is true, let <var data-x="">finished</var> be true.</li>
<li>If <var data-x="">finished</var> is true, skip to the next step in the overall set of
steps.</li>
<li>Let <var data-x="">negated</var> be false.</li>
</ol>
</dd>
<dt>Any other character</dt>
<!-- alphabetic a-z A-Z, and non-ASCII -->
<dd>
<p>Follow these substeps:</p>
<ol>
<li>If <var data-x="">finished</var> is true, skip to the next step in the overall set of
steps.</li>
<li>Let <var data-x="">negated</var> be false.</li>
<li>Let <var data-x="">bogus</var> be true.</li>
<li>If <var data-x="">started</var> is true, then return the <var data-x="">numbers</var> list,
and abort. (The value in <var data-x="">value</var> is not appended to the list first; it is
dropped.)</li>
</ol>
</dd>
</dl>
</li>
<li><p>Advance <var data-x="">position</var> to the next character in <var data-x="">input</var>,
or to beyond the end of the string if there are no more characters.</p></li>
<li><p>If <var data-x="">position</var> points to a character (and not to beyond the end of <var
data-x="">input</var>), jump to the big <i>Parser</i> step above.</p></li>
<li><p>If <var data-x="">negated</var> is true, then negate <var data-x="">value</var>.</li>
<li><p>If <var data-x="">got number</var> is true, then append <var data-x="">value</var> to the
<var data-x="">numbers</var> list.</li>
<li><p>Return the <var data-x="">numbers</var> list and abort.</p></li>
</ol>
</div>
<div class="nodev">
<h5>Lists of dimensions</h5>
<!-- no definition of a type since no conforming feature uses this syntax (it's only used in
cols="" and rows="" on <frameset> elements -->
<p>The <dfn>rules for parsing a list of dimensions</dfn> are as follows. These rules return a list
of zero or more pairs consisting of a number and a unit, the unit being one of <i>percentage</i>,
<i>relative</i>, and <i>absolute</i>.</p>
<ol>
<li><p>Let <var data-x="">raw input</var> be the string being parsed.</p></li>
<li><p>If the last character in <var data-x="">raw input</var> is a U+002C COMMA character (,),
then remove that character from <var data-x="">raw input</var>.</p></li>
<li><p><span data-x="split a string on commas">Split the string <var data-x="">raw input</var> on
commas</span>. Let <var data-x="">raw tokens</var> be the resulting list of tokens.</p></li>
<li><p>Let <var data-x="">result</var> be an empty list of number/unit pairs.</p></li>
<li>
<p>For each token in <var data-x="">raw tokens</var>, run the following substeps:</p>
<ol>
<li><p>Let <var data-x="">input</var> be the token.</p></li>
<li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>,
initially pointing at the start of the string.</p></li>
<li><p>Let <var data-x="">value</var> be the number 0.</p></li>
<li><p>Let <var data-x="">unit</var> be <i>absolute</i>.</p></li>
<li><p>If <var data-x="">position</var> is past the end of <var data-x="">input</var>, set <var
data-x="">unit</var> to <i>relative</i> and jump to the last substep.</p></li>
<li><p>If the character at <var data-x="">position</var> is an <span data-x="ASCII
digits">ASCII digit</span>, <span>collect a sequence of characters</span> that are <span>ASCII
digits</span>, interpret the resulting sequence as an integer in base ten, and increment <var
data-x="">value</var> by that integer.</p></li>
<li>
<p>If the character at <var data-x="">position</var> is a U+002E FULL STOP character (.), run
these substeps:</p>
<ol>
<li><p><span>Collect a sequence of characters</span> consisting of <span data-x="space
character">space characters</span> and <span>ASCII digits</span>. Let <var data-x="">s</var>
be the resulting sequence.</p></li>
<li><p>Remove all <span data-x="space character">space characters</span> in <var
data-x="">s</var>.</p></li>
<li>
<p>If <var data-x="">s</var> is not the empty string, run these subsubsteps:</p>
<ol>
<li><p>Let <var data-x="">length</var> be the number of characters in <var
data-x="">s</var> (after the spaces were removed).</p></li>
<li><p>Let <var data-x="">fraction</var> be the result of interpreting <var
data-x="">s</var> as a base-ten integer, and then dividing that number by <span
data-x="">10<sup data-x=""><var data-x="">length</var></sup></span>.</li>
<li><p>Increment <var data-x="">value</var> by <var data-x="">fraction</var>.</p></li>
</ol>
</li>
</ol>
</li>
<li><p><span>Skip whitespace</span>.</p></li>
<li>
<p>If the character at <var data-x="">position</var> is a U+0025 PERCENT SIGN character (%),
then set <var data-x="">unit</var> to <i>percentage</i>.</p>
<p>Otherwise, if the character at <var data-x="">position</var> is a U+002A ASTERISK character
(*), then set <var data-x="">unit</var> to <i>relative</i>.</p>
</li>
<!-- the remaining characters in /input/ are ignored -->
<li><p>Add an entry to <var data-x="">result</var> consisting of the number given by <var
data-x="">value</var> and the unit given by <var data-x="">unit</var>.</p></li>
</ol>
</li>
<li><p>Return the list <var data-x="">result</var>.</p></li>
</ol>
</div>
<h4>Dates and times</h4>
<p>In the algorithms below, the <dfn>number of days in month <var data-x="">month</var> of year
<var data-x="">year</var></dfn> is: <em>31</em> if <var data-x="">month</var> is 1, 3, 5, 7, 8,
10, or 12; <em>30</em> if <var data-x="">month</var> is 4, 6, 9, or 11; <em>29</em> if <var
data-x="">month</var> is 2 and <var data-x="">year</var> is a number divisible by 400, or if <var
data-x="">year</var> is a number divisible by 4 but not by 100; and <em>28</em> otherwise. This
takes into account leap years in the Gregorian calendar. <a
href="#refsGREGORIAN">[GREGORIAN]</a></p>
<p>When <span>ASCII digits</span> are used in the date and time syntaxes defined in this section,
they express numbers in base ten.</p>
<div class="nodev">
<p class="note">While the formats described here are intended to be subsets of the corresponding
ISO8601 formats, this specification defines parsing rules in much more detail than ISO8601.
Implementors are therefore encouraged to carefully examine any date parsing libraries before using
them to implement the parsing rules described below; ISO8601 libraries might not parse dates and
times in exactly the same manner. <a href="#refsISO8601">[ISO8601]</a></p>
</div>
<p>Where this specification refers to the <dfn>proleptic Gregorian calendar</dfn>, it means the
modern Gregorian calendar, extrapolated backwards to year 1. A date in the <span>proleptic
Gregorian calendar</span>, sometimes explicitly referred to as a <dfn>proleptic-Gregorian
date</dfn>, is one that is described using that calendar even if that calendar was not in use at
the time (or place) in question. <a href="#refsGREGORIAN">[GREGORIAN]</a></p>
<p class="note">The use of the Gregorian calendar as the wire format in this specification is an
arbitrary choice resulting from the cultural biases of those involved in the decision. See also
the section discussing <a href="#input-author-notes">date, time, and number formats</a> in forms
<span class="nodev">(for authors), <a href="#input-impl-notes">implemention notes regarding
localization of form controls</a>,</span> and the <code>time</code> element.</p>
<h5>Months</h5>
<p>A <dfn data-x="concept-month">month</dfn> consists of a specific <span>proleptic-Gregorian
date</span> with no time-zone information and no date information beyond a year and a month. <a
href="#refsGREGORIAN">[GREGORIAN]</a></p>
<p>A string is a <dfn>valid month string</dfn> representing a year <var data-x="">year</var> and
month <var data-x="">month</var> if it consists of the following components in the given order:</p>
<ol>
<li>Four or more <span>ASCII digits</span>, representing <var data-x="">year</var>, where <var
data-x="">year</var>&nbsp;&gt;&nbsp;0</li>
<li>A U+002D HYPHEN-MINUS character (-)</li>
<li>Two <span>ASCII digits</span>, representing the month <var data-x="">month</var>, in the range
1&nbsp;&le;&nbsp;<var data-x="">month</var>&nbsp;&le;&nbsp;12</li>
</ol>
<div class="nodev">
<p>The rules to <dfn>parse a month string</dfn> are as follows. This will return either a year and
month, or nothing. If at any point the algorithm says that it "fails", this means that it is
aborted at that point and returns nothing.</p>
<ol>
<li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
<li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
pointing at the start of the string.</p></li>
<li><p><span>Parse a month component</span> to obtain <var data-x="">year</var> and <var
data-x="">month</var>. If this returns nothing, then fail.</p>
<li><p>If <var data-x="">position</var> is <em>not</em> beyond the
end of <var data-x="">input</var>, then fail.</p></li>
<li><p>Return <var data-x="">year</var> and <var data-x="">month</var>.</p></li>
</ol>
<p>The rules to <dfn>parse a month component</dfn>, given an <var data-x="">input</var> string and
a <var data-x="">position</var>, are as follows. This will return either a year and a month, or
nothing. If at any point the algorithm says that it "fails", this means that it is aborted at that
point and returns nothing.</p>
<ol>
<li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>. If the
collected sequence is not at least four characters long, then fail. Otherwise, interpret the
resulting sequence as a base-ten integer. Let that number be the <var
data-x="">year</var>.</p></li>
<li><p>If <var data-x="">year</var> is not a number greater than zero, then fail.</p></li>
<li><p>If <var data-x="">position</var> is beyond the end of <var data-x="">input</var> or if the
character at <var data-x="">position</var> is not a U+002D HYPHEN-MINUS character, then fail.
Otherwise, move <var data-x="">position</var> forwards one character.</p></li>
<li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>. If the
collected sequence is not exactly two characters long, then fail. Otherwise, interpret the
resulting sequence as a base-ten integer. Let that number be the <var
data-x="">month</var>.</p></li>
<li><p>If <var data-x="">month</var> is not a number in the range 1&nbsp;&le;&nbsp;<var
data-x="">month</var>&nbsp;&le;&nbsp;12, then fail.</p></li>
<li><p>Return <var data-x="">year</var> and <var data-x="">month</var>.</p></li>
</ol>
</div>
<h5>Dates</h5>
<p>A <dfn data-x="concept-date">date</dfn> consists of a specific <span>proleptic-Gregorian
date</span> with no time-zone information, consisting of a year, a month, and a day. <a
href="#refsGREGORIAN">[GREGORIAN]</a></p>
<p>A string is a <dfn>valid date string</dfn> representing a year <var data-x="">year</var>, month
<var data-x="">month</var>, and day <var data-x="">day</var> if it consists of the following
components in the given order:</p>
<ol>
<li>A <span>valid month string</span>, representing <var data-x="">year</var> and <var
data-x="">month</var></li>
<li>A U+002D HYPHEN-MINUS character (-)</li>
<li>Two <span>ASCII digits</span>, representing <var data-x="">day</var>, in the range
1&nbsp;&le;&nbsp;<var data-x="">day</var>&nbsp;&le;&nbsp;<var data-x="">maxday</var> where <var
data-x="">maxday</var> is the <span data-x="number of days in month month of year year">number of
days in the month <var data-x="">month</var> and year <var data-x="">year</var></span></li>
</ol>
<div class="nodev">
<p>The rules to <dfn>parse a date string</dfn> are as follows. This will return either a date, or
nothing. If at any point the algorithm says that it "fails", this means that it is aborted at that
point and returns nothing.</p>
<ol>
<li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
<li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
pointing at the start of the string.</p></li>
<li><p><span>Parse a date component</span> to obtain <var data-x="">year</var>, <var
data-x="">month</var>, and <var data-x="">day</var>. If this returns nothing, then fail.</p>
<li><p>If <var data-x="">position</var> is <em>not</em> beyond the end of <var
data-x="">input</var>, then fail.</p></li>
<li><p>Let <var data-x="">date</var> be the date with year <var data-x="">year</var>, month <var
data-x="">month</var>, and day <var data-x="">day</var>.</p></li>
<li><p>Return <var data-x="">date</var>.</p></li>
</ol>
<p>The rules to <dfn>parse a date component</dfn>, given an <var data-x="">input</var> string and a
<var data-x="">position</var>, are as follows. This will return either a year, a month, and a day,
or nothing. If at any point the algorithm says that it "fails", this means that it is aborted at
that point and returns nothing.</p>
<ol>
<li><p><span>Parse a month component</span> to obtain <var data-x="">year</var> and <var
data-x="">month</var>. If this returns nothing, then fail.</li>
<li><p>Let <var data-x="">maxday</var> be the <span>number of days in month <var
data-x="">month</var> of year <var data-x="">year</var></span>.</p></li>
<li><p>If <var data-x="">position</var> is beyond the end of <var data-x="">input</var> or if the
character at <var data-x="">position</var> is not a U+002D HYPHEN-MINUS character, then fail.
Otherwise, move <var data-x="">position</var> forwards one character.</p></li>
<li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>. If the
collected sequence is not exactly two characters long, then fail. Otherwise, interpret the
resulting sequence as a base-ten integer. Let that number be the <var
data-x="">day</var>.</p></li>
<li><p>If <var data-x="">day</var> is not a number in the range 1&nbsp;&le;&nbsp;<var
data-x="">day</var>&nbsp;&le;&nbsp;<var data-x="">maxday</var>, then fail.</li>
<li><p>Return <var data-x="">year</var>, <var data-x="">month</var>, and <var
data-x="">day</var>.</p></li>
</ol>
</div>
<h5>Yearless dates</h5>
<p>A <dfn data-x="concept-yearless-date">yearless date</dfn> consists of a Gregorian month and a
day within that month, but with no associated year. <a href="#refsGREGORIAN">[GREGORIAN]</a></p>
<p>A string is a <dfn>valid yearless date string</dfn> representing a month <var
data-x="">month</var> and a day <var data-x="">day</var> if it consists of the following components
in the given order:</p>
<ol>
<li>Optionally, two U+002D HYPHEN-MINUS characters (-)</li>
<li>Two <span>ASCII digits</span>, representing the month <var data-x="">month</var>, in the range
1&nbsp;&le;&nbsp;<var data-x="">month</var>&nbsp;&le;&nbsp;12</li>
<li>A U+002D HYPHEN-MINUS character (-)</li>
<li>Two <span>ASCII digits</span>, representing <var data-x="">day</var>, in the range
1&nbsp;&le;&nbsp;<var data-x="">day</var>&nbsp;&le;&nbsp;<var data-x="">maxday</var> where <var
data-x="">maxday</var> is the <span data-x="number of days in month month of year year">number of
days</span> in the month <var data-x="">month</var> and any arbitrary leap year (e.g. 4 or
2000)</li>
</ol>
<p class="note">In other words, if the <var data-x="">month</var> is "<code data-x="">02</code>",
meaning February, then the day can be 29, as if the year was a leap year.</p>
<div class="nodev">
<p>The rules to <dfn>parse a yearless date string</dfn> are as follows. This will return either a
month and a day, or nothing. If at any point the algorithm says that it "fails", this means that
it is aborted at that point and returns nothing.</p>
<ol>
<li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
<li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
pointing at the start of the string.</p></li>
<li><p><span>Parse a yearless date component</span> to obtain <var data-x="">month</var> and <var
data-x="">day</var>. If this returns nothing, then fail.</p>
<li><p>If <var data-x="">position</var> is <em>not</em> beyond the end of <var
data-x="">input</var>, then fail.</p></li>
<li><p>Return <var data-x="">month</var> and <var data-x="">day</var>.</p></li>
</ol>
<p>The rules to <dfn>parse a yearless date component</dfn>, given an <var data-x="">input</var>
string and a <var data-x="">position</var>, are as follows. This will return either a month and a
day, or nothing. If at any point the algorithm says that it "fails", this means that it is aborted
at that point and returns nothing.</p>
<ol>
<li><p><span>Collect a sequence of characters</span> that are U+002D HYPHEN-MINUS characters (-).
If the collected sequence is not exactly zero or two characters long, then fail.</p></li>
<li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>. If the
collected sequence is not exactly two characters long, then fail. Otherwise, interpret the
resulting sequence as a base-ten integer. Let that number be the <var
data-x="">month</var>.</p></li>
<li><p>If <var data-x="">month</var> is not a number in the range 1&nbsp;&le;&nbsp;<var
data-x="">month</var>&nbsp;&le;&nbsp;12, then fail.</p></li>
<li><p>Let <var data-x="">maxday</var> be the <span data-x="number of days in month month of year
year">number of days</span> in month <var data-x="">month</var> of any arbitrary leap year (e.g. 4
or 2000).</p></li>
<li><p>If <var data-x="">position</var> is beyond the end of <var data-x="">input</var> or if the
character at <var data-x="">position</var> is not a U+002D HYPHEN-MINUS character, then fail.
Otherwise, move <var data-x="">position</var> forwards one character.</p></li>
<li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>. If the
collected sequence is not exactly two characters long, then fail. Otherwise, interpret the
resulting sequence as a base-ten integer. Let that number be the <var
data-x="">day</var>.</p></li>
<li><p>If <var data-x="">day</var> is not a number in the range 1&nbsp;&le;&nbsp;<var
data-x="">day</var>&nbsp;&le;&nbsp;<var data-x="">maxday</var>, then fail.</li>
<li><p>Return <var data-x="">month</var> and <var data-x="">day</var>.</p></li>
</ol>
</div>
<h5>Times</h5>
<p>A <dfn data-x="concept-time">time</dfn> consists of a specific time with no time-zone
information, consisting of an hour, a minute, a second, and a fraction of a second.</p>
<p>A string is a <dfn>valid time string</dfn> representing an hour <var data-x="">hour</var>, a
minute <var data-x="">minute</var>, and a second <var data-x="">second</var> if it consists of the
following components in the given order:</p>
<ol>
<li>Two <span>ASCII digits</span>, representing <var data-x="">hour</var>, in the range
0&nbsp;&le;&nbsp;<var data-x="">hour</var>&nbsp;&le;&nbsp;23</li>
<li>A U+003A COLON character (:)</li>
<li>Two <span>ASCII digits</span>, representing <var data-x="">minute</var>, in the range
0&nbsp;&le;&nbsp;<var data-x="">minute</var>&nbsp;&le;&nbsp;59</li>
<li>If <var data-x="">second</var> is non-zero, or optionally if <var data-x="">second</var> is
zero:
<ol>
<li>A U+003A COLON character (:)</li>
<li>Two <span>ASCII digits</span>, representing the integer part of <var data-x="">second</var>,
in the range 0&nbsp;&le;&nbsp;<var data-x="">s</var>&nbsp;&le;&nbsp;59</li>
<li>If <var data-x="">second</var> is not an integer, or optionally if <var
data-x="">second</var> is an integer:
<ol>
<li>A 002E FULL STOP character (.)</li>
<li>One, two, or three <span>ASCII digits</span>, representing the fractional part of <var
data-x="">second</var></li>
</ol>
</li>
</ol>
</li>
</ol>
<p class="note">The <var data-x="">second</var> component cannot be 60 or 61; leap seconds cannot
be represented.</p>
<div class="nodev">
<p>The rules to <dfn>parse a time string</dfn> are as follows. This will return either a time, or
nothing. If at any point the algorithm says that it "fails", this means that it is aborted at that
point and returns nothing.</p>
<ol>
<li><p>Let <var data-x="">input</var> be the string being parsed.</p></li>
<li><p>Let <var data-x="">position</var> be a pointer into <var data-x="">input</var>, initially
pointing at the start of the string.</p></li>
<li><p><span>Parse a time component</span> to obtain <var data-x="">hour</var>, <var
data-x="">minute</var>, and <var data-x="">second</var>. If this returns nothing, then fail.</p>
<li><p>If <var data-x="">position</var> is <em>not</em> beyond the end of <var
data-x="">input</var>, then fail.</p></li>
<li><p>Let <var data-x="">time</var> be the time with hour <var data-x="">hour</var>, minute <var
data-x="">minute</var>, and second <var data-x="">second</var>.</p></li>
<li><p>Return <var data-x="">time</var>.</p></li>
</ol>
<p>The rules to <dfn>parse a time component</dfn>, given an <var data-x="">input</var> string and a
<var data-x="">position</var>, are as follows. This will return either an hour, a minute, and a
second, or nothing. If at any point the algorithm says that it "fails", this means that it is
aborted at that point and returns nothing.</p>
<ol>
<li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>. If the
collected sequence is not exactly two characters long, then fail. Otherwise, interpret the
resulting sequence as a base-ten integer. Let that number be the <var
data-x="">hour</var>.</p></li>
<li>If <var data-x="">hour</var> is not a number in the range 0&nbsp;&le;&nbsp;<var
data-x="">hour</var>&nbsp;&le;&nbsp;23, then fail.</li>
<li><p>If <var data-x="">position</var> is beyond the end of <var data-x="">input</var> or if the
character at <var data-x="">position</var> is not a U+003A COLON character, then fail. Otherwise,
move <var data-x="">position</var> forwards one character.</p></li>
<li><p><span>Collect a sequence of characters</span> that are <span>ASCII digits</span>. If the
collected sequence is not exactly two characters long, then fail. Otherwise, interpret the
resulting sequence as a base-ten integer. Let that number be the <var
data-x="">minute</var>.</p></li>
<li>If <var data-x="">minute</var> is not a number in the range 0&nbsp;&le;&nbsp;<var
data-x="">minute</var>&nbsp;&le;&nbsp;59, then fail.</li>
<li><p>Let <var data-x="">second</var> be a string with the value "0".</p></li>
<li>
<p>If <var data-x="">position</var> is not beyond the end of <var data-x="">input</var> and the
character at <var data-x="">position</var> is a U+003A COLON, then run these substeps:</p>
<ol>
<li><p>Advance <var data-x="">position</var> to the next character in <var
data-x="">input</var>.</p></li>
<li><p>If <var data-x="">position</var> is beyond the end of <var data-x="">input</var>, or at
the last character in <var data-x="">input</var>, or if the next <em>two</em> characters in <var
data-x="">input</var> starting at <var data-x="">position</var> are not both <span>ASCII
digits</span>, then fail.</p></li>
<li><p><span>Collect a sequence of characters</span> that are either <span>ASCII digits</span>
or U+002E FULL STOP characters. If the collected sequence is three characters long, or if it is
longer than three characters long and the third character is not a U+002E FULL STOP character,
or if it has more than one U+002E FULL STOP character, then fail. Otherwise, let the collected
string be <var data-x="">second</var> instead of its previous value.</p></li>
</ol>
</li>
<li><p>Interpret <var data-x="">second</var> as a base-ten number (possibly with a fractional
part). Let <var data-x="">second</var> be that number instead of the string version.</p></li>
<li><p>If <var data-x="">second</var> is not a number in the range 0&nbsp;&le;&nbsp;<var
data-x="">second</var>&nbsp;&lt;&nbsp;60, then fail.</p></li>
<li><p>Return <var data-x="">hour</var>, <var data-x="">minute</var>, and <var
data-x="">second</var>.</p></li>
</ol>
</div>
<h5>Local dates and times</h5>
<p>A <dfn data-x="concept-datetime-local">local date and time</dfn> consists of a specific
<span>proleptic-Gregorian date</span>, consisting of a year, a month, and a day, and a time,
consisting of an hour, a minute, a second, and a fraction of a second, but expressed without a
time zone. <a href="#refsGREGORIAN">[GREGORIAN]</a></p>
<p>A string is a <dfn>valid local date and time string</dfn> representing a date and time if it
consists of the following components in the given order:</p>
<ol>
<li>A <span>valid date string</span> representing the date</li>
<li>A U+0054 LATIN CAPITAL LETTER T character (T) or a U+0020 SPACE character</li>
<li>A <span>valid time string</span> representing the time</li>
</ol>
<p>A string is a <dfn>valid normalised local date and time string</dfn> representing a date and
time if it consists of the following components in the given order:</p>
<ol>
<li>A <span>valid date string</span> representing the date</li>
<li>A U+0054 LATIN CAPITAL LETTER T character (T)</li>
<li>A <span>valid time string</span> representing the time, expressed as the shortest possible
string for the given time (e.g. omitting the seconds component entirely if the given time is zero
seconds past the minute)</li>
</ol>
<div class="nodev">
<p>The rules to <dfn>parse a local date and time string</dfn> are as follows. This will return
either a date and time, or nothing. If at any point the algorithm says that it "fails", this means