@@ -788,14 +788,6 @@ impl HostLanguage for &HTMLHost {
788788 None // TODO
789789 }
790790
791- // “The current language can be set using either the @lang
792- // or @xml:lang attributes. When the @lang attribute and
793- // the @xml:lang attribute are specified on the same element,
794- // the @xml:lang attribute takes precedence. When both @lang
795- // and @xml:lang are specified on the same element, they MUST
796- // have the same value. Further details related to setting the
797- // current language can be found in section 3.3 Specifying the
798- // Language for a Literal.
799791 fn default_language ( & self ) -> Option < LanguageIdentifier > {
800792 None
801793 }
@@ -807,6 +799,18 @@ impl HostLanguage for &HTMLHost {
807799 // NB: note that the "additional initial context" is currently empty.
808800}
809801
802+ struct XHTMLHost { }
803+
804+ impl HostLanguage for & XHTMLHost {
805+ fn default_language ( & self ) -> Option < LanguageIdentifier > {
806+ None // TODO
807+ }
808+
809+ fn default_vocabulary ( & self ) -> Option < oxrdf:: NamedNode > {
810+ None // TODO
811+ }
812+ }
813+
810814fn emit_processor ( pg : & mut Graph , pg_type : PGType , msg : & str ) {
811815 let warning_subj: oxrdf:: NamedOrBlankNode = oxrdf:: BlankNode :: default ( ) . into ( ) ;
812816 let pg_type: oxrdf:: NamedNodeRef = pg_type. into ( ) ;
@@ -852,7 +856,7 @@ impl<'o, 'p> RDFaProcessor<'o, 'p> {
852856 S :: Child ( element, base_ctx, parent_span) => {
853857 let _span = parent_span. entered ( ) ;
854858
855- let new_ctx = Rc :: new ( self . process_element ( & base_ctx, element, & host) ?) ;
859+ let new_ctx = Rc :: new ( self . process_element ( & base_ctx, & html , element, & host) ?) ;
856860 if element. has_children ( ) {
857861 stack. push ( S :: OutputList (
858862 new_ctx. parent_subject . clone ( ) ,
@@ -912,6 +916,7 @@ impl<'o, 'p> RDFaProcessor<'o, 'p> {
912916 fn process_element (
913917 & mut self ,
914918 eval_context : & EvaluationContext ,
919+ html : & scraper:: Html ,
915920 element : scraper:: ElementRef ,
916921 host : impl HostLanguage ,
917922 ) -> Result < EvaluationContext , Error > {
@@ -1023,11 +1028,31 @@ impl<'o, 'p> RDFaProcessor<'o, 'p> {
10231028 // 3.
10241029 // “Next, the current element is examined for IRI mappings and these are added to the local list of IRI mappings.
10251030 // Note that an IRI mapping will simply overwrite any current mapping in the list that has the same name;
1031+ //
1032+ // [HTML-RDFA]
1033+ // “Extracting URI Mappings declared via @xmlns: while operating from within a DOM Level 2 based RDFa processor
1034+ // can be achieved using the following algorithm:
1035+ // “While processing each DOM2 [Element] as described in [rdfa-core], Section 7.5: Sequence, Step #2:
1036+ // “1. For each [Attr] in the [Node.attributes] list that has a [namespace prefix] value of @xmlns,
1037+ // create an [IRI mapping] by storing the [local name] as the value to be mapped, and the
1038+ // [Node.nodeValue] as the value to map.
1039+ // (Note: this is not done because html5ever/scraper never reports namespace prefixes…)
1040+ // “2. For each [Attr] in the [Node.attributes] list that has a [namespace prefix] value of null
1041+ // and a [local name] that starts with @xmlns:, create an [IRI mapping] by storing the [local name]
1042+ // part with the @xmlns: characters removed as the value to be mapped, and the [Node.nodeValue] as
1043+ // the value to map.
1044+ // (Note: this is what is implemented below…)
10261045 let xmlns_prefixes = el
10271046 . attrs
10281047 . iter ( )
1029- . filter ( |( qn, _) | qn. prefix . as_deref ( ) == Some ( "xmlns" ) )
1030- . map ( |( qn, val) | ( qn. local . as_ref ( ) , val. as_ref ( ) ) )
1048+ . filter_map ( |( qn, value) | -> Option < _ > {
1049+ if qn. prefix . is_none ( ) {
1050+ let prefix = qn. local . strip_prefix ( "xmlns:" ) ?;
1051+ Some ( ( prefix, value) )
1052+ } else {
1053+ None
1054+ }
1055+ } )
10311056 . collect :: < Vec < _ > > ( ) ;
10321057
10331058 let prefixes = el
@@ -1069,6 +1094,16 @@ impl<'o, 'p> RDFaProcessor<'o, 'p> {
10691094 // 4. Language
10701095 // “The current element is also parsed for any language information,
10711096 // and if present, current language is set accordingly;
1097+ //
1098+ // [HTML-RDFA] 3.1
1099+ // “The current language can be set using either the @lang
1100+ // or @xml:lang attributes. When the @lang attribute and
1101+ // the @xml:lang attribute are specified on the same element,
1102+ // the @xml:lang attribute takes precedence. When both @lang
1103+ // and @xml:lang are specified on the same element, they MUST
1104+ // have the same value. Further details related to setting the
1105+ // current language can be found in section 3.3 Specifying the
1106+ // Language for a Literal.
10721107 if let Some ( lang) = el. attr ( "xml:lang" ) . or ( el. attr ( "lang" ) ) {
10731108 if lang. is_empty ( ) {
10741109 local. current_language = None ;
@@ -1621,6 +1656,40 @@ impl<'o, 'p> RDFaProcessor<'o, 'p> {
16211656 // the element itself, and giving it a datatype of XMLLiteral in the vocabulary
16221657 // http://www.w3.org/1999/02/22-rdf-syntax-ns#. The format of the resulting
16231658 // serialized content is as defined in Exclusive XML Canonicalization Version 1.0 [XML-EXC-C14N].
1659+ //
1660+ // [HTML-RDFA]
1661+ // “When generating literals of type XMLLiteral, the processor MUST ensure that the
1662+ // output XMLLiteral is a namespace well-formed XML fragment. A namespace well-formed XML
1663+ // fragment has the following properties:
1664+ // “- The XML fragment, when placed inside of a single root element, MUST validate as well-formed
1665+ // XML. The normative language that describes a well-formed XML document is specified in
1666+ // Section 2.1 "Well-Formed XML Documents" of the XML specification.
1667+ // “- The XML fragment, when placed inside of a single root element, MUST retain all active
1668+ // namespace information. The currently active attributes declared using @xmlns and @xmlns:
1669+ // that are stored in the RDFa processor's current evaluation context in the IRI mappings
1670+ // MUST be preserved in the generated XMLLiteral. The PREFIX value for @xmlns:PREFIX MUST
1671+ // be entirely transformed into lower-case characters when preserving the value in the
1672+ // XMLLiteral. All active namespaces declared via @xmlns, @xmlns:, and @prefix MUST be
1673+ // placed in each top-level element in the generated XMLLiteral, taking care to not overwrite
1674+ // pre-existing namespace values.
1675+ // (TODO: the above is not yet implemented, since I can't figure out how to work with
1676+ // the scraper API effectively here...)
1677+ /*
1678+ let mut output = String::new();
1679+ for child in element.children() {
1680+ if let Some(el) = child.value().as_element() {
1681+ let mut el = el.clone();
1682+ for (prefix, iri) in local.iri_mappings.mappings() {
1683+ let name = html5ever::QualName::new(
1684+ None,
1685+ html5ever::ns!(xmlns),
1686+ html5ever::LocalName::from(prefix.as_str()),
1687+ );
1688+ el.attrs.push((name, iri.to_string().into()));
1689+ }
1690+ } else {
1691+ }
1692+ } */
16241693 serialized = element. inner_html ( ) ;
16251694 oxrdf:: LiteralRef :: new_typed_literal ( & serialized, datatype) . into ( )
16261695 // TODO: incorrect, needs to be c14n'd
0 commit comments