// Copyright 2014-2017 The html5ever Project Developers. // Copyright Michael Howell and others. // // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your // option. This file may not be copied, modified, or distributed // except according to those terms. #![allow(missing_docs, dead_code)] //! A simple reference-counted DOM. //! //! This is sufficient as a static parse tree, but don't build a //! web browser using it. :) //! //! A DOM is a [tree structure] with ordered children that can be represented in an XML-like //! format. For example, the following graph //! //! ```text //! div //! +- "text node" //! +- span //! ``` //! in HTML would be serialized as //! //! ```html //! <div>text node<span></span></div> //! ``` //! //! See the [document object model article on wikipedia][dom wiki] for more information. //! //! This implementation stores the information associated with each node once, and then hands out //! refs to children. The nodes themselves are reference-counted to avoid copying - you can create //! a new ref and then a node will outlive the document. Nodes own their children, but only have //! weak references to their parents. //! //! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure) //! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::collections::{HashSet, VecDeque}; use std::default::Default; use std::fmt; use std::io; use std::mem; use std::rc::{Rc, Weak}; use html5ever::tendril::StrTendril; use html5ever::interface::tree_builder; use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; use html5ever::serialize::TraversalScope; use html5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode}; use html5ever::serialize::{Serialize, Serializer}; use html5ever::Attribute; use html5ever::ExpandedName; use html5ever::QualName; /// The different kinds of nodes in the DOM. #[derive(Debug)] pub enum NodeData { /// The `Document` itself - the root node of a HTML document. Document, /// A `DOCTYPE` with name, public id, and system id. See /// [document type declaration on wikipedia][dtd wiki]. /// /// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration Doctype { name: StrTendril, public_id: StrTendril, system_id: StrTendril, }, /// A text node. Text { contents: RefCell<StrTendril> }, /// A comment. Comment { contents: StrTendril }, /// An element with attributes. Element { name: QualName, attrs: RefCell<Vec<Attribute>>, /// For HTML \<template\> elements, the [template contents]. /// /// [template contents]: https://html.spec.whatwg.org/multipage/#template-contents template_contents: RefCell<Option<Handle>>, /// Whether the node is a [HTML integration point]. /// /// [HTML integration point]: https://html.spec.whatwg.org/multipage/#html-integration-point mathml_annotation_xml_integration_point: bool, }, /// A Processing instruction. ProcessingInstruction { target: StrTendril, contents: StrTendril, }, } /// A DOM node. pub struct Node { /// Parent node. pub parent: Cell<Option<WeakHandle>>, /// Child nodes of this node. pub children: RefCell<Vec<Handle>>, /// Represents this node's data. pub data: NodeData, } impl Node { /// Create a new node from its contents pub fn new(data: NodeData) -> Rc<Self> { Rc::new(Node { data, parent: Cell::new(None), children: RefCell::new(Vec::new()), }) } } impl Drop for Node { fn drop(&mut self) { let mut nodes = mem::take(&mut *self.children.borrow_mut()); while let Some(node) = nodes.pop() { let children = mem::take(&mut *node.children.borrow_mut()); nodes.extend(children.into_iter()); if let NodeData::Element { ref template_contents, .. } = node.data { if let Some(template_contents) = template_contents.borrow_mut().take() { nodes.push(template_contents); } } } } } impl fmt::Debug for Node { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { fmt.debug_struct("Node") .field("data", &self.data) .field("children", &self.children) .finish() } } /// Reference to a DOM node. pub type Handle = Rc<Node>; /// Weak reference to a DOM node, used for parent pointers. pub type WeakHandle = Weak<Node>; /// Append a parentless node to another nodes' children fn append(new_parent: &Handle, child: Handle) { let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent))); // Invariant: child cannot have existing parent assert!(previous_parent.is_none()); new_parent.children.borrow_mut().push(child); } /// If the node has a parent, get it and this node's position in its children fn get_parent_and_index(target: &Handle) -> Option<(Handle, usize)> { if let Some(weak) = target.parent.take() { let parent = weak.upgrade().expect("dangling weak pointer"); target.parent.set(Some(weak)); let i = match parent .children .borrow() .iter() .enumerate() .find(|&(_, child)| Rc::ptr_eq(child, target)) { Some((i, _)) => i, None => panic!("have parent but couldn't find in parent's children!"), }; Some((parent, i)) } else { None } } fn append_to_existing_text(prev: &Handle, text: &str) -> bool { match prev.data { NodeData::Text { ref contents } => { contents.borrow_mut().push_slice(text); true } _ => false, } } fn remove_from_parent(target: &Handle) { if let Some((parent, i)) = get_parent_and_index(target) { parent.children.borrow_mut().remove(i); target.parent.set(None); } } /// The DOM itself; the result of parsing. pub struct RcDom { /// The `Document` itself. pub document: Handle, /// Errors that occurred during parsing. pub errors: Vec<Cow<'static, str>>, /// The document's quirks mode. pub quirks_mode: QuirksMode, } impl TreeSink for RcDom { type Output = Self; fn finish(self) -> Self { self } type Handle = Handle; fn parse_error(&mut self, msg: Cow<'static, str>) { self.errors.push(msg); } fn get_document(&mut self) -> Handle { self.document.clone() } fn get_template_contents(&mut self, target: &Handle) -> Handle { if let NodeData::Element { ref template_contents, .. } = target.data { template_contents .borrow() .as_ref() .expect("not a template element!") .clone() } else { panic!("not a template element!") } } fn set_quirks_mode(&mut self, mode: QuirksMode) { self.quirks_mode = mode; } fn same_node(&self, x: &Handle, y: &Handle) -> bool { Rc::ptr_eq(x, y) } fn elem_name<'a>(&self, target: &'a Handle) -> ExpandedName<'a> { return match target.data { NodeData::Element { ref name, .. } => name.expanded(), _ => panic!("not an element!"), }; } fn create_element( &mut self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags, ) -> Handle { Node::new(NodeData::Element { name, attrs: RefCell::new(attrs), template_contents: RefCell::new(if flags.template { Some(Node::new(NodeData::Document)) } else { None }), mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, }) } fn create_comment(&mut self, text: StrTendril) -> Handle { Node::new(NodeData::Comment { contents: text }) } fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Handle { Node::new(NodeData::ProcessingInstruction { target, contents: data, }) } fn append(&mut self, parent: &Handle, child: NodeOrText<Handle>) { // Append to an existing Text node if we have one. if let NodeOrText::AppendText(ref text) = child { if let Some(h) = parent.children.borrow().last() { if append_to_existing_text(h, text) { return; } } } append( parent, match child { NodeOrText::AppendText(text) => Node::new(NodeData::Text { contents: RefCell::new(text), }), NodeOrText::AppendNode(node) => node, }, ); } fn append_before_sibling(&mut self, sibling: &Handle, child: NodeOrText<Handle>) { let (parent, i) = get_parent_and_index(sibling) .expect("append_before_sibling called on node without parent"); let child = match (child, i) { // No previous node. (NodeOrText::AppendText(text), 0) => Node::new(NodeData::Text { contents: RefCell::new(text), }), // Look for a text node before the insertion point. (NodeOrText::AppendText(text), i) => { let children = parent.children.borrow(); let prev = &children[i - 1]; if append_to_existing_text(prev, &text) { return; } Node::new(NodeData::Text { contents: RefCell::new(text), }) } // The tree builder promises we won't have a text node after // the insertion point. // Any other kind of node. (NodeOrText::AppendNode(node), _) => node, }; remove_from_parent(&child); child.parent.set(Some(Rc::downgrade(&parent))); parent.children.borrow_mut().insert(i, child); } fn append_based_on_parent_node( &mut self, element: &Self::Handle, prev_element: &Self::Handle, child: NodeOrText<Self::Handle>, ) { let parent = element.parent.take(); let has_parent = parent.is_some(); element.parent.set(parent); if has_parent { self.append_before_sibling(element, child); } else { self.append(prev_element, child); } } fn append_doctype_to_document( &mut self, name: StrTendril, public_id: StrTendril, system_id: StrTendril, ) { append( &self.document, Node::new(NodeData::Doctype { name, public_id, system_id, }), ); } fn add_attrs_if_missing(&mut self, target: &Handle, attrs: Vec<Attribute>) { let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { attrs.borrow_mut() } else { panic!("not an element") }; let existing_names = existing .iter() .map(|e| e.name.clone()) .collect::<HashSet<_>>(); existing.extend( attrs .into_iter() .filter(|attr| !existing_names.contains(&attr.name)), ); } fn remove_from_parent(&mut self, target: &Handle) { remove_from_parent(target); } fn reparent_children(&mut self, node: &Handle, new_parent: &Handle) { let mut children = node.children.borrow_mut(); let mut new_children = new_parent.children.borrow_mut(); for child in children.iter() { let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent))); assert!(Rc::ptr_eq( node, &previous_parent.unwrap().upgrade().expect("dangling weak") )) } new_children.extend(mem::take(&mut *children)); } fn is_mathml_annotation_xml_integration_point(&self, target: &Handle) -> bool { if let NodeData::Element { mathml_annotation_xml_integration_point, .. } = target.data { mathml_annotation_xml_integration_point } else { panic!("not an element!") } } } impl Default for RcDom { fn default() -> RcDom { RcDom { document: Node::new(NodeData::Document), errors: vec![], quirks_mode: tree_builder::NoQuirks, } } } enum SerializeOp { Open(Handle), Close(QualName), } pub struct SerializableHandle(Handle); impl From<Handle> for SerializableHandle { fn from(h: Handle) -> SerializableHandle { SerializableHandle(h) } } impl Serialize for SerializableHandle { fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()> where S: Serializer, { let mut ops = VecDeque::new(); match traversal_scope { IncludeNode => ops.push_back(SerializeOp::Open(self.0.clone())), ChildrenOnly(_) => ops.extend( self.0 .children .borrow() .iter() .map(|h| SerializeOp::Open(h.clone())), ), } while let Some(op) = ops.pop_front() { match op { SerializeOp::Open(handle) => match handle.data { NodeData::Element { ref name, ref attrs, .. } => { serializer.start_elem( name.clone(), attrs.borrow().iter().map(|at| (&at.name, &at.value[..])), )?; ops.reserve(1 + handle.children.borrow().len()); ops.push_front(SerializeOp::Close(name.clone())); for child in handle.children.borrow().iter().rev() { ops.push_front(SerializeOp::Open(child.clone())); } } NodeData::Doctype { ref name, .. } => serializer.write_doctype(name)?, NodeData::Text { ref contents } => serializer.write_text(&contents.borrow())?, NodeData::Comment { ref contents } => serializer.write_comment(contents)?, NodeData::ProcessingInstruction { ref target, ref contents, } => serializer.write_processing_instruction(target, contents)?, NodeData::Document => panic!("Can't serialize Document node itself"), }, SerializeOp::Close(name) => { serializer.end_elem(name)?; } } } Ok(()) } }