HTML5 parsing and anti-xml
You can use the Validator.nu HTML Parser with anti-xml.
import com.codecommit.antixml.{NodeSeqSAXHandler, SAXParser} import org.xml.sax.InputSource import nu.validator.htmlparser.sax.HtmlParser import nu.validator.htmlparser.common.XmlViolationPolicy class HTMLParser extends SAXParser { override def fromInputSource(source: InputSource) = { val handler = new NodeSeqSAXHandler val reader = new HtmlParser reader.setNamePolicy(XmlViolationPolicy.ALLOW) reader.setContentHandler(handler) reader.parse(source) handler.result.head } }