Merge pull request #4688 from BookStackApp/include-parser

New include tag parser
2024-11-23 11:22:33 +01:00 · 2023-11-27 21:54:18 +00:00 · 2023-11-27 21:54:18 +00:00 · 1011d61713
commit 1011d61713
parent 22a9cf1e48 652d5417bf
10 changed files with 669 additions and 134 deletions
--- a/app/Entities/Tools/PageContent.php
+++ b/app/Entities/Tools/PageContent.php
@ -11,6 +11,7 @@ use BookStack\Uploads\ImageRepo;
 use BookStack\Uploads\ImageService;
 use BookStack\Util\HtmlContentFilter;
 use BookStack\Util\HtmlDocument;
+use Closure;
 use DOMElement;
 use DOMNode;
 use DOMNodeList;
@ -275,21 +276,65 @@ class PageContent
     */
    public function render(bool $blankIncludes = false): string
    {
-        $content = $this->page->html ?? '';
+        $html = $this->page->html ?? '';
+
+        if (empty($html)) {
+            return $html;
+        }
+
+        $doc = new HtmlDocument($html);
+        $contentProvider = $this->getContentProviderClosure($blankIncludes);
+        $parser = new PageIncludeParser($doc, $contentProvider);
+
+        $nodesAdded = 1;
+        for ($includeDepth = 0; $includeDepth < 3 && $nodesAdded !== 0; $includeDepth++) {
+            $nodesAdded = $parser->parse();
+        }
+
+        if ($includeDepth > 1) {
+            $idMap = [];
+            $changeMap = [];
+            $this->updateIdsRecursively($doc->getBody(), 0, $idMap, $changeMap);
+        }

        if (!config('app.allow_content_scripts')) {
-            $content = HtmlContentFilter::removeScripts($content);
+            HtmlContentFilter::removeScriptsFromDocument($doc);
        }

-        if ($blankIncludes) {
-            $content = $this->blankPageIncludes($content);
-        } else {
-            for ($includeDepth = 0; $includeDepth < 3; $includeDepth++) {
-                $content = $this->parsePageIncludes($content);
+        return $doc->getBodyInnerHtml();
+    }
+
+    /**
+     * Get the closure used to fetch content for page includes.
+     */
+    protected function getContentProviderClosure(bool $blankIncludes): Closure
+    {
+        $contextPage = $this->page;
+
+        return function (PageIncludeTag $tag) use ($blankIncludes, $contextPage): PageIncludeContent {
+            if ($blankIncludes) {
+                return PageIncludeContent::fromHtmlAndTag('', $tag);
            }
-        }

-        return $content;
+            $matchedPage = Page::visible()->find($tag->getPageId());
+            $content = PageIncludeContent::fromHtmlAndTag($matchedPage->html ?? '', $tag);
+
+            if (Theme::hasListeners(ThemeEvents::PAGE_INCLUDE_PARSE)) {
+                $themeReplacement = Theme::dispatch(
+                    ThemeEvents::PAGE_INCLUDE_PARSE,
+                    $tag->tagContent,
+                    $content->toHtml(),
+                    clone $contextPage,
+                    $matchedPage ? (clone $matchedPage) : null,
+                );
+
+                if ($themeReplacement !== null) {
+                    $content = PageIncludeContent::fromInlineHtml(strval($themeReplacement));
+                }
+            }
+
+            return $content;
+        };
    }

    /**
@ -337,83 +382,4 @@ class PageContent

        return $tree->toArray();
    }
-
-    /**
-     * Remove any page include tags within the given HTML.
-     */
-    protected function blankPageIncludes(string $html): string
-    {
-        return preg_replace("/{{@\s?([0-9].*?)}}/", '', $html);
-    }
-
-    /**
-     * Parse any include tags "{{@<page_id>#section}}" to be part of the page.
-     */
-    protected function parsePageIncludes(string $html): string
-    {
-        $matches = [];
-        preg_match_all("/{{@\s?([0-9].*?)}}/", $html, $matches);
-
-        foreach ($matches[1] as $index => $includeId) {
-            $fullMatch = $matches[0][$index];
-            $splitInclude = explode('#', $includeId, 2);
-
-            // Get page id from reference
-            $pageId = intval($splitInclude[0]);
-            if (is_nan($pageId)) {
-                continue;
-            }
-
-            // Find page to use, and default replacement to empty string for non-matches.
-            /** @var ?Page $matchedPage */
-            $matchedPage = Page::visible()->find($pageId);
-            $replacement = '';
-
-            if ($matchedPage && count($splitInclude) === 1) {
-                // If we only have page id, just insert all page html and continue.
-                $replacement = $matchedPage->html;
-            } elseif ($matchedPage && count($splitInclude) > 1) {
-                // Otherwise, if our include tag defines a section, load that specific content
-                $innerContent = $this->fetchSectionOfPage($matchedPage, $splitInclude[1]);
-                $replacement = trim($innerContent);
-            }
-
-            $themeReplacement = Theme::dispatch(
-                ThemeEvents::PAGE_INCLUDE_PARSE,
-                $includeId,
-                $replacement,
-                clone $this->page,
-                $matchedPage ? (clone $matchedPage) : null,
-            );
-
-            // Perform the content replacement
-            $html = str_replace($fullMatch, $themeReplacement ?? $replacement, $html);
-        }
-
-        return $html;
-    }
-
-    /**
-     * Fetch the content from a specific section of the given page.
-     */
-    protected function fetchSectionOfPage(Page $page, string $sectionId): string
-    {
-        $topLevelTags = ['table', 'ul', 'ol', 'pre'];
-        $doc = new HtmlDocument($page->html);
-
-        // Search included content for the id given and blank out if not exists.
-        $matchingElem = $doc->getElementById($sectionId);
-        if ($matchingElem === null) {
-            return '';
-        }
-
-        // Otherwise replace the content with the found content
-        // Checks if the top-level wrapper should be included by matching on tag types
-        $isTopLevel = in_array(strtolower($matchingElem->nodeName), $topLevelTags);
-        if ($isTopLevel) {
-            return $doc->getNodeOuterHtml($matchingElem);
-        }
-
-        return $doc->getNodeInnerHtml($matchingElem);
-    }
 }
--- a/app/Entities/Tools/PageIncludeContent.php
+++ b/app/Entities/Tools/PageIncludeContent.php
@ -0,0 +1,85 @@
+<?php
+
+namespace BookStack\Entities\Tools;
+
+use BookStack\Util\HtmlDocument;
+use DOMNode;
+
+class PageIncludeContent
+{
+    protected static array $topLevelTags = ['table', 'ul', 'ol', 'pre'];
+
+    /**
+     * @param DOMNode[] $contents
+     * @param bool $isInline
+     */
+    public function __construct(
+        protected array $contents,
+        protected bool $isInline,
+    ) {
+    }
+
+    public static function fromHtmlAndTag(string $html, PageIncludeTag $tag): self
+    {
+        if (empty($html)) {
+            return new self([], true);
+        }
+
+        $doc = new HtmlDocument($html);
+
+        $sectionId = $tag->getSectionId();
+        if (!$sectionId) {
+            $contents = [...$doc->getBodyChildren()];
+            return new self($contents, false);
+        }
+
+        $section = $doc->getElementById($sectionId);
+        if (!$section) {
+            return new self([], true);
+        }
+
+        $isTopLevel = in_array(strtolower($section->nodeName), static::$topLevelTags);
+        $contents = $isTopLevel ? [$section] : [...$section->childNodes];
+        return new self($contents, !$isTopLevel);
+    }
+
+    public static function fromInlineHtml(string $html): self
+    {
+        if (empty($html)) {
+            return new self([], true);
+        }
+
+        $doc = new HtmlDocument($html);
+
+        return new self([...$doc->getBodyChildren()], true);
+    }
+
+    public function isInline(): bool
+    {
+        return $this->isInline;
+    }
+
+    public function isEmpty(): bool
+    {
+        return empty($this->contents);
+    }
+
+    /**
+     * @return DOMNode[]
+     */
+    public function toDomNodes(): array
+    {
+        return $this->contents;
+    }
+
+    public function toHtml(): string
+    {
+        $html = '';
+
+        foreach ($this->contents as $content) {
+            $html .= $content->ownerDocument->saveHTML($content);
+        }
+
+        return $html;
+    }
+}
--- a/app/Entities/Tools/PageIncludeParser.php
+++ b/app/Entities/Tools/PageIncludeParser.php
@ -0,0 +1,220 @@
+<?php
+
+namespace BookStack\Entities\Tools;
+
+use BookStack\Util\HtmlDocument;
+use Closure;
+use DOMDocument;
+use DOMElement;
+use DOMNode;
+use DOMText;
+
+class PageIncludeParser
+{
+    protected static string $includeTagRegex = "/{{@\s?([0-9].*?)}}/";
+
+    /**
+     * Elements to clean up and remove if left empty after a parsing operation.
+     * @var DOMElement[]
+     */
+    protected array $toCleanup = [];
+
+    /**
+     * @param Closure(PageIncludeTag $tag): PageContent $pageContentForId
+     */
+    public function __construct(
+        protected HtmlDocument $doc,
+        protected Closure $pageContentForId,
+    ) {
+    }
+
+    /**
+     * Parse out the include tags.
+     * Returns the count of new content DOM nodes added to the document.
+     */
+    public function parse(): int
+    {
+        $nodesAdded = 0;
+        $tags = $this->locateAndIsolateIncludeTags();
+
+        foreach ($tags as $tag) {
+            /** @var PageIncludeContent $content */
+            $content = $this->pageContentForId->call($this, $tag);
+
+            if (!$content->isInline()) {
+                $parentP = $this->getParentParagraph($tag->domNode);
+                $isWithinParentP = $parentP === $tag->domNode->parentNode;
+                if ($parentP && $isWithinParentP) {
+                    $this->splitNodeAtChildNode($tag->domNode->parentNode, $tag->domNode);
+                } else if ($parentP) {
+                    $this->moveTagNodeToBesideParent($tag, $parentP);
+                }
+            }
+
+            $replacementNodes = $content->toDomNodes();
+            $nodesAdded += count($replacementNodes);
+            $this->replaceNodeWithNodes($tag->domNode, $replacementNodes);
+        }
+
+        $this->cleanup();
+
+        return $nodesAdded;
+    }
+
+    /**
+     * Locate include tags within the given document, isolating them to their
+     * own nodes in the DOM for future targeted manipulation.
+     * @return PageIncludeTag[]
+     */
+    protected function locateAndIsolateIncludeTags(): array
+    {
+        $includeHosts = $this->doc->queryXPath("//*[text()[contains(., '{{@')]]");
+        $includeTags = [];
+
+        /** @var DOMNode $node */
+        /** @var DOMNode $childNode */
+        foreach ($includeHosts as $node) {
+            foreach ($node->childNodes as $childNode) {
+                if ($childNode->nodeName === '#text') {
+                    array_push($includeTags, ...$this->splitTextNodesAtTags($childNode));
+                }
+            }
+        }
+
+        return $includeTags;
+    }
+
+    /**
+     * Takes a text DOMNode and splits its text content at include tags
+     * into multiple text nodes within the original parent.
+     * Returns found PageIncludeTag references.
+     * @return PageIncludeTag[]
+     */
+    protected function splitTextNodesAtTags(DOMNode $textNode): array
+    {
+        $includeTags = [];
+        $text = $textNode->textContent;
+        preg_match_all(static::$includeTagRegex, $text, $matches, PREG_OFFSET_CAPTURE);
+
+        $currentOffset = 0;
+        foreach ($matches[0] as $index => $fullTagMatch) {
+            $tagOuterContent = $fullTagMatch[0];
+            $tagInnerContent = $matches[1][$index][0];
+            $tagStartOffset = $fullTagMatch[1];
+
+            if ($currentOffset < $tagStartOffset) {
+                $previousText = substr($text, $currentOffset, $tagStartOffset - $currentOffset);
+                $textNode->parentNode->insertBefore(new DOMText($previousText), $textNode);
+            }
+
+            $node = $textNode->parentNode->insertBefore(new DOMText($tagOuterContent), $textNode);
+            $includeTags[] = new PageIncludeTag($tagInnerContent, $node);
+            $currentOffset = $tagStartOffset + strlen($tagOuterContent);
+        }
+
+        if ($currentOffset > 0) {
+            $textNode->textContent = substr($text, $currentOffset);
+        }
+
+        return $includeTags;
+    }
+
+    /**
+     * Replace the given node with all those in $replacements
+     * @param DOMNode[] $replacements
+     */
+    protected function replaceNodeWithNodes(DOMNode $toReplace, array $replacements): void
+    {
+        /** @var DOMDocument $targetDoc */
+        $targetDoc = $toReplace->ownerDocument;
+
+        foreach ($replacements as $replacement) {
+            if ($replacement->ownerDocument !== $targetDoc) {
+                $replacement = $targetDoc->importNode($replacement, true);
+            }
+
+            $toReplace->parentNode->insertBefore($replacement, $toReplace);
+        }
+
+        $toReplace->parentNode->removeChild($toReplace);
+    }
+
+    /**
+     * Move a tag node to become a sibling of the given parent.
+     * Will attempt to guess a position based upon the tag content within the parent.
+     */
+    protected function moveTagNodeToBesideParent(PageIncludeTag $tag, DOMNode $parent): void
+    {
+        $parentText = $parent->textContent;
+        $tagPos = strpos($parentText, $tag->tagContent);
+        $before = $tagPos < (strlen($parentText) / 2);
+        $this->toCleanup[] = $tag->domNode->parentNode;
+
+        if ($before) {
+            $parent->parentNode->insertBefore($tag->domNode, $parent);
+        } else {
+            $parent->parentNode->insertBefore($tag->domNode, $parent->nextSibling);
+        }
+    }
+
+    /**
+     * Splits the given $parentNode at the location of the $domNode within it.
+     * Attempts replicate the original $parentNode, moving some of their parent
+     * children in where needed, before adding the $domNode between.
+     */
+    protected function splitNodeAtChildNode(DOMElement $parentNode, DOMNode $domNode): void
+    {
+        $children = [...$parentNode->childNodes];
+        $splitPos = array_search($domNode, $children, true);
+        if ($splitPos === false) {
+            $splitPos = count($children) - 1;
+        }
+
+        $parentClone = $parentNode->cloneNode();
+        $parentNode->parentNode->insertBefore($parentClone, $parentNode);
+        $parentClone->removeAttribute('id');
+
+        /** @var DOMNode $child */
+        for ($i = 0; $i < $splitPos; $i++) {
+            $child = $children[$i];
+            $parentClone->appendChild($child);
+        }
+
+        $parentNode->parentNode->insertBefore($domNode, $parentNode);
+
+        $this->toCleanup[] = $parentNode;
+        $this->toCleanup[] = $parentClone;
+    }
+
+    /**
+     * Get the parent paragraph of the given node, if existing.
+     */
+    protected function getParentParagraph(DOMNode $parent): ?DOMNode
+    {
+        do {
+            if (strtolower($parent->nodeName) === 'p') {
+                return $parent;
+            }
+
+            $parent = $parent->parentNode;
+        } while ($parent !== null);
+
+        return null;
+    }
+
+    /**
+     * Cleanup after a parse operation.
+     * Removes stranded elements we may have left during the parse.
+     */
+    protected function cleanup(): void
+    {
+        foreach ($this->toCleanup as $element) {
+            $element->normalize();
+            while ($element->parentNode && !$element->hasChildNodes()) {
+                $parent = $element->parentNode;
+                $parent->removeChild($element);
+                $element = $parent;
+            }
+        }
+    }
+}
--- a/app/Entities/Tools/PageIncludeTag.php
+++ b/app/Entities/Tools/PageIncludeTag.php
@ -0,0 +1,30 @@
+<?php
+
+namespace BookStack\Entities\Tools;
+
+use DOMNode;
+
+class PageIncludeTag
+{
+    public function __construct(
+        public string $tagContent,
+        public DOMNode $domNode,
+    ) {
+    }
+
+    /**
+     * Get the page ID that this tag references.
+     */
+    public function getPageId(): int
+    {
+        return intval(trim(explode('#', $this->tagContent, 2)[0]));
+    }
+
+    /**
+     * Get the section ID that this tag references (if any)
+     */
+    public function getSectionId(): string
+    {
+        return trim(explode('#', $this->tagContent, 2)[1] ?? '');
+    }
+}
--- a/app/Theming/CustomHtmlHeadContentProvider.php
+++ b/app/Theming/CustomHtmlHeadContentProvider.php
@ -50,7 +50,7 @@ class CustomHtmlHeadContentProvider
        $hash = md5($content);

        return $this->cache->remember('custom-head-export:' . $hash, 86400, function () use ($content) {
-            return HtmlContentFilter::removeScripts($content);
+            return HtmlContentFilter::removeScriptsFromHtmlString($content);
        });
    }

--- a/app/Theming/ThemeEvents.php
+++ b/app/Theming/ThemeEvents.php
@ -2,8 +2,6 @@

 namespace BookStack\Theming;

-use BookStack\Entities\Models\Page;
-
 /**
 * The ThemeEvents used within BookStack.
 *
@ -93,8 +91,8 @@ class ThemeEvents
     *
     * @param string $tagReference
     * @param string $replacementHTML
-     * @param Page   $currentPage
-     * @param ?Page  $referencedPage
+     * @param \BookStack\Entities\Models\Page   $currentPage
+     * @param ?\BookStack\Entities\Models\Page  $referencedPage
     */
    const PAGE_INCLUDE_PARSE = 'page_include_parse';

--- a/app/Theming/ThemeService.php
+++ b/app/Theming/ThemeService.php
@ -48,6 +48,14 @@ class ThemeService
        return null;
    }

+    /**
+     * Check if there are listeners registered for the given event name.
+     */
+    public function hasListeners(string $event): bool
+    {
+        return count($this->listeners[$event] ?? []) > 0;
+    }
+
    /**
     * Register a new custom artisan command to be available.
     */
--- a/app/Util/HtmlContentFilter.php
+++ b/app/Util/HtmlContentFilter.php
@ -9,16 +9,10 @@ use DOMNodeList;
 class HtmlContentFilter
 {
    /**
-     * Remove all the script elements from the given HTML.
+     * Remove all the script elements from the given HTML document.
     */
-    public static function removeScripts(string $html): string
+    public static function removeScriptsFromDocument(HtmlDocument $doc)
    {
-        if (empty($html)) {
-            return $html;
-        }
-
-        $doc = new HtmlDocument($html);
-
        // Remove standard script tags
        $scriptElems = $doc->queryXPath('//script');
        static::removeNodes($scriptElems);
@ -53,6 +47,19 @@ class HtmlContentFilter
        // Remove 'on*' attributes
        $onAttributes = $doc->queryXPath('//@*[starts-with(name(), \'on\')]');
        static::removeAttributes($onAttributes);
+    }
+
+    /**
+     * Remove scripts from the given HTML string.
+     */
+    public static function removeScriptsFromHtmlString(string $html): string
+    {
+        if (empty($html)) {
+            return $html;
+        }
+
+        $doc = new HtmlDocument($html);
+        static::removeScriptsFromDocument($doc);

        return $doc->getBodyInnerHtml();
    }
--- a/tests/Entity/PageContentTest.php
+++ b/tests/Entity/PageContentTest.php
@ -8,7 +8,7 @@ use Tests\TestCase;

 class PageContentTest extends TestCase
 {
-    protected $base64Jpeg = '/9j/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/yQALCAABAAEBAREA/8wABgAQEAX/2gAIAQEAAD8A0s8g/9k=';
+    protected string $base64Jpeg = '/9j/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkICQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/yQALCAABAAEBAREA/8wABgAQEAX/2gAIAQEAAD8A0s8g/9k=';

    public function test_page_includes()
    {
@ -57,38 +57,6 @@ class PageContentTest extends TestCase
        $this->assertEquals('', $page->text);
    }

-    public function test_page_includes_do_not_break_tables()
-    {
-        $page = $this->entities->page();
-        $secondPage = $this->entities->page();
-
-        $content = '<table id="table"><tbody><tr><td>test</td></tr></tbody></table>';
-        $secondPage->html = $content;
-        $secondPage->save();
-
-        $page->html = "{{@{$secondPage->id}#table}}";
-        $page->save();
-
-        $pageResp = $this->asEditor()->get($page->getUrl());
-        $pageResp->assertSee($content, false);
-    }
-
-    public function test_page_includes_do_not_break_code()
-    {
-        $page = $this->entities->page();
-        $secondPage = $this->entities->page();
-
-        $content = '<pre id="bkmrk-code"><code>var cat = null;</code></pre>';
-        $secondPage->html = $content;
-        $secondPage->save();
-
-        $page->html = "{{@{$secondPage->id}#bkmrk-code}}";
-        $page->save();
-
-        $pageResp = $this->asEditor()->get($page->getUrl());
-        $pageResp->assertSee($content, false);
-    }
-
    public function test_page_includes_rendered_on_book_export()
    {
        $page = $this->entities->page();
@ -120,6 +88,19 @@ class PageContentTest extends TestCase
        $this->withHtml($pageResp)->assertElementNotContains('#bkmrk-test', 'Hello Barry Hello Barry Hello Barry Hello Barry Hello Barry ' . $tag);
    }

+    public function test_page_includes_to_nonexisting_pages_does_not_error()
+    {
+        $page = $this->entities->page();
+        $missingId = Page::query()->max('id') + 1;
+        $tag = "{{@{$missingId}}}";
+        $page->html = '<p id="bkmrk-test">Hello Barry ' . $tag . '</p>';
+        $page->save();
+
+        $pageResp = $this->asEditor()->get($page->getUrl());
+        $pageResp->assertOk();
+        $pageResp->assertSee('Hello Barry');
+    }
+
    public function test_page_content_scripts_removed_by_default()
    {
        $this->asEditor();
--- a/tests/Unit/PageIncludeParserTest.php
+++ b/tests/Unit/PageIncludeParserTest.php
@ -0,0 +1,240 @@
+<?php
+
+namespace Tests\Unit;
+
+use BookStack\Entities\Tools\PageIncludeContent;
+use BookStack\Entities\Tools\PageIncludeParser;
+use BookStack\Entities\Tools\PageIncludeTag;
+use BookStack\Util\HtmlDocument;
+use Tests\TestCase;
+
+class PageIncludeParserTest extends TestCase
+{
+    public function test_simple_inline_text()
+    {
+        $this->runParserTest(
+            '<p>{{@45#content}}</p>',
+            ['45' => '<p id="content">Testing</p>'],
+            '<p>Testing</p>',
+        );
+    }
+
+    public function test_simple_inline_text_with_existing_siblings()
+    {
+        $this->runParserTest(
+            '<p>{{@45#content}} <strong>Hi</strong>there!</p>',
+            ['45' => '<p id="content">Testing</p>'],
+            '<p>Testing <strong>Hi</strong>there!</p>',
+        );
+    }
+
+    public function test_simple_inline_text_within_other_text()
+    {
+        $this->runParserTest(
+            '<p>Hello {{@45#content}}there!</p>',
+            ['45' => '<p id="content">Testing</p>'],
+            '<p>Hello Testingthere!</p>',
+        );
+    }
+
+    public function test_complex_inline_text_within_other_text()
+    {
+        $this->runParserTest(
+            '<p>Hello {{@45#content}}there!</p>',
+            ['45' => '<p id="content"><strong>Testing</strong> with<em>some</em><i>extra</i>tags</p>'],
+            '<p>Hello <strong>Testing</strong> with<em>some</em><i>extra</i>tagsthere!</p>',
+        );
+    }
+
+    public function test_block_content_types()
+    {
+        $inputs = [
+            '<table id="content"><td>Text</td></table>',
+            '<ul id="content"><li>Item A</li></ul>',
+            '<ol id="content"><li>Item A</li></ol>',
+            '<pre id="content">Code</pre>',
+        ];
+
+        foreach ($inputs as $input) {
+            $this->runParserTest(
+                '<p>A{{@45#content}}B</p>',
+                ['45' => $input],
+                '<p>A</p>' . $input . '<p>B</p>',
+            );
+        }
+    }
+
+    public function test_block_content_nested_origin_gets_placed_before()
+    {
+        $this->runParserTest(
+            '<p><strong>A {{@45#content}} there!</strong></p>',
+            ['45' => '<pre id="content">Testing</pre>'],
+            '<pre id="content">Testing</pre><p><strong>A  there!</strong></p>',
+        );
+    }
+
+    public function test_block_content_nested_origin_gets_placed_after()
+    {
+        $this->runParserTest(
+            '<p><strong>Some really good {{@45#content}} there!</strong></p>',
+            ['45' => '<pre id="content">Testing</pre>'],
+            '<p><strong>Some really good  there!</strong></p><pre id="content">Testing</pre>',
+        );
+    }
+
+    public function test_block_content_in_shallow_origin_gets_split()
+    {
+        $this->runParserTest(
+            '<p>Some really good {{@45#content}} there!</p>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<p>Some really good </p><pre id="content">doggos</pre><p> there!</p>',
+        );
+    }
+
+    public function test_block_content_in_shallow_origin_split_does_not_duplicate_id()
+    {
+        $this->runParserTest(
+            '<p id="test" title="Hi">Some really good {{@45#content}} there!</p>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<p title="Hi">Some really good </p><pre id="content">doggos</pre><p id="test" title="Hi"> there!</p>',
+        );
+    }
+
+    public function test_block_content_in_shallow_origin_does_not_leave_empty_nodes()
+    {
+        $this->runParserTest(
+            '<p>{{@45#content}}</p>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<pre id="content">doggos</pre>',
+        );
+    }
+
+    public function test_block_content_in_allowable_parent_element()
+    {
+        $this->runParserTest(
+            '<div>{{@45#content}}</div>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<div><pre id="content">doggos</pre></div>',
+        );
+    }
+
+    public function test_block_content_in_paragraph_origin_with_allowable_grandparent()
+    {
+        $this->runParserTest(
+            '<div><p>{{@45#content}}</p></div>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<div><pre id="content">doggos</pre></div>',
+        );
+    }
+
+    public function test_block_content_in_paragraph_origin_with_allowable_grandparent_with_adjacent_content()
+    {
+        $this->runParserTest(
+            '<div><p>Cute {{@45#content}} over there!</p></div>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<div><p>Cute </p><pre id="content">doggos</pre><p> over there!</p></div>',
+        );
+    }
+
+    public function test_block_content_in_child_within_paragraph_origin_with_allowable_grandparent_with_adjacent_content()
+    {
+        $this->runParserTest(
+            '<div><p><strong>Cute {{@45#content}} over there!</strong></p></div>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<div><pre id="content">doggos</pre><p><strong>Cute  over there!</strong></p></div>',
+        );
+    }
+
+    public function test_block_content_in_paragraph_origin_within_details()
+    {
+        $this->runParserTest(
+            '<details><p>{{@45#content}}</p></details>',
+            ['45' => '<pre id="content">doggos</pre>'],
+            '<details><pre id="content">doggos</pre></details>',
+        );
+    }
+
+    public function test_simple_whole_document()
+    {
+        $this->runParserTest(
+            '<p>{{@45}}</p>',
+            ['45' => '<p id="content">Testing</p>'],
+            '<p id="content">Testing</p>',
+        );
+    }
+
+    public function test_multi_source_elem_whole_document()
+    {
+        $this->runParserTest(
+            '<p>{{@45}}</p>',
+            ['45' => '<p>Testing</p><blockquote>This</blockquote>'],
+            '<p>Testing</p><blockquote>This</blockquote>',
+        );
+    }
+
+    public function test_multi_source_elem_whole_document_with_shared_content_origin()
+    {
+        $this->runParserTest(
+            '<p>This is {{@45}} some text</p>',
+            ['45' => '<p>Testing</p><blockquote>This</blockquote>'],
+            '<p>This is </p><p>Testing</p><blockquote>This</blockquote><p> some text</p>',
+        );
+    }
+
+    public function test_multi_source_elem_whole_document_with_nested_content_origin()
+    {
+        $this->runParserTest(
+            '<p><strong>{{@45}}</strong></p>',
+            ['45' => '<p>Testing</p><blockquote>This</blockquote>'],
+            '<p>Testing</p><blockquote>This</blockquote>',
+        );
+    }
+
+    public function test_multiple_tags_in_same_origin_with_inline_content()
+    {
+        $this->runParserTest(
+            '<p>This {{@45#content}}{{@45#content}} content is {{@45#content}}</p>',
+            ['45' => '<p id="content">inline</p>'],
+            '<p>This inlineinline content is inline</p>',
+        );
+    }
+
+    public function test_multiple_tags_in_same_origin_with_block_content()
+    {
+        $this->runParserTest(
+            '<p>This {{@45#content}}{{@45#content}} content is {{@45#content}}</p>',
+            ['45' => '<pre id="content">block</pre>'],
+            '<p>This </p><pre id="content">block</pre><pre id="content">block</pre><p> content is </p><pre id="content">block</pre>',
+        );
+    }
+
+    public function test_multiple_tags_in_differing_origin_levels_with_block_content()
+    {
+        $this->runParserTest(
+            '<div><p>This <strong>{{@45#content}}</strong> content is {{@45#content}}</p>{{@45#content}}</div>',
+            ['45' => '<pre id="content">block</pre>'],
+            '<div><pre id="content">block</pre><p>This  content is </p><pre id="content">block</pre><pre id="content">block</pre></div>',
+        );
+    }
+
+    public function test_multiple_tags_in_shallow_origin_with_multi_block_content()
+    {
+        $this->runParserTest(
+            '<p>{{@45}}C{{@45}}</p><div>{{@45}}{{@45}}</div>',
+            ['45' => '<p>A</p><p>B</p>'],
+            '<p>A</p><p>B</p><p>C</p><p>A</p><p>B</p><div><p>A</p><p>B</p><p>A</p><p>B</p></div>',
+        );
+    }
+
+    protected function runParserTest(string $html, array $contentById, string $expected): void
+    {
+        $doc = new HtmlDocument($html);
+        $parser = new PageIncludeParser($doc, function (PageIncludeTag $tag) use ($contentById): PageIncludeContent {
+            $html = $contentById[strval($tag->getPageId())] ?? '';
+            return PageIncludeContent::fromHtmlAndTag($html, $tag);
+        });
+
+        $parser->parse();
+        $this->assertEquals($expected, $doc->getBodyInnerHtml());
+    }
+}