HTML5 parsing and anti-xml
You can use the Validator.nu HTML Parser with anti-xml.
import com.codecommit.antixml.{NodeSeqSAXHandler, SAXParser}
import org.xml.sax.InputSource
import nu.validator.htmlparser.sax.HtmlParser
import nu.validator.htmlparser.common.XmlViolationPolicy
class HTMLParser extends SAXParser {
override def fromInputSource(source: InputSource) = {
val handler = new NodeSeqSAXHandler
val reader = new HtmlParser
reader.setNamePolicy(XmlViolationPolicy.ALLOW)
reader.setContentHandler(handler)
reader.parse(source)
handler.result.head
}
}