HTML5 parsing and anti-xml

You can use the Validator.nu HTML Parser with anti-xml.

import com.codecommit.antixml.{NodeSeqSAXHandler, SAXParser}
import org.xml.sax.InputSource
import nu.validator.htmlparser.sax.HtmlParser
import nu.validator.htmlparser.common.XmlViolationPolicy

class HTMLParser extends SAXParser {
  override def fromInputSource(source: InputSource) = {
    val handler = new NodeSeqSAXHandler

    val reader = new HtmlParser
    reader.setNamePolicy(XmlViolationPolicy.ALLOW)
    reader.setContentHandler(handler)
    reader.parse(source)

    handler.result.head
  }
}