From 09c606181084b78dba9c279dee91a633eae7dce4 Mon Sep 17 00:00:00 2001 From: Random Penguin <205060075+randompenguin1@users.noreply.github.com> Date: Sun, 20 Apr 2025 12:05:26 -0500 Subject: [PATCH 1/4] Strip HTML tags from content sent as Markdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "toMarkdown" function prepares content to be sent, primarily, to Diaspora. The HTML to Markdown converter by default "preserves HTML tags without Markdown equivalents like `` and `
.`" At least according to the README in _/friendica/vendor/league/html-to-markdown/_ - which also says "To strip HTML tags that don’t have a Markdown equivalent while preserving the content inside them, set strip_tags..." Diaspora, however, does not appear to know what to DO with the HTML sent to it. It actually appears to _encode_ the HTML and displays the *code* in the post body rather than rendering it as HTML. In which case it would make more sense to strip out all tags that have no Markdown equivalents. --- src/Content/Text/HTML.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index f5cf2c6eca..f29f4a148e 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -689,7 +689,7 @@ class HTML public static function toMarkdown(string $html): string { DI::profiler()->startRecording('rendering'); - $converter = new HtmlConverter(['hard_break' => true]); + $converter = new HtmlConverter(['hard_break' => true, ‘strip_tags’ => true]]); $markdown = $converter->convert($html); DI::profiler()->stopRecording(); From 403b0672e688c8f61fafa3c50f31bc23e8a4bab3 Mon Sep 17 00:00:00 2001 From: Random Penguin <205060075+randompenguin1@users.noreply.github.com> Date: Sat, 26 Apr 2025 16:44:14 -0500 Subject: [PATCH 2/4] Strip HTML tags from content sent as Markdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "toMarkdown" function prepares content to be sent, primarily, to Diaspora. The HTML to Markdown converter by default "preserves HTML tags without Markdown equivalents like  and 
." At least according to the README in /friendica/vendor/league/html-to-markdown/ - which also says "To strip HTML tags that don’t have a Markdown equivalent while preserving the content inside them, set strip_tags..." Diaspora, however, does not appear to know what to DO with the HTML sent to it. It actually appears to encode the HTML and displays the code in the post body rather than rendering it as HTML. In which case it would make more sense to strip out all tags that have no Markdown equivalents. Example
The post as sent from Friendica mixed BBcode and Markdown: [class=postbox-ocean]Norddeutscher Bürger ![Noddeutscher Bürger - Bismark Brötchen (Roger Cziwerny - pixapay)](/rscamo/……)[/class] The BBcode gets converted into an HTML  tag. It also looks like Diaspora encoded the Markdown for the image rather than parsing it, or at the very least didn't parse it as Markdown. Yet another reason to strip out HTML that has no Markdown equivalents. It looks like it may be encoding the tags and the content inside the tags. And, yes, I'm aware the [class] BBcode was marked as "deprecated" in the Friendica code, but it's the only way my Bookface scheme for the "Frio" theme could implement both profile "Cover Photos" and "Postboxes." Which won't display to other platforms anyway, so I don't want to see this custom BBcode removed, I just think it needs to be stripped out before content is delivered to other platforms. (There is a related issue with Hubzilla where the raw BBcode is coming through unparsed because the [class] code apparently does not exist in Hubzilla's BBcodes. But I've no clue where or how to fix that in Friendica or if it can be as it may need to be handled on Hubzilla's end). --- src/Content/Text/HTML.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Content/Text/HTML.php b/src/Content/Text/HTML.php index f29f4a148e..72c61f38cc 100644 --- a/src/Content/Text/HTML.php +++ b/src/Content/Text/HTML.php @@ -689,7 +689,7 @@ class HTML public static function toMarkdown(string $html): string { DI::profiler()->startRecording('rendering'); - $converter = new HtmlConverter(['hard_break' => true, ‘strip_tags’ => true]]); + $converter = new HtmlConverter(['hard_break' => true, 'strip_tags' => true]); $markdown = $converter->convert($html); DI::profiler()->stopRecording(); From 3bd6335fa6668c2c3cd01c7ef50341f4578e5cb5 Mon Sep 17 00:00:00 2001 From: Philipp Date: Thu, 29 May 2025 19:11:24 +0200 Subject: [PATCH 3/4] Add test --- tests/src/Content/Text/BBCodeTest.php | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/tests/src/Content/Text/BBCodeTest.php b/tests/src/Content/Text/BBCodeTest.php index 9c61e10887..17f48ddae6 100644 --- a/tests/src/Content/Text/BBCodeTest.php +++ b/tests/src/Content/Text/BBCodeTest.php @@ -294,26 +294,31 @@ Karl Marx - Die ursprüngliche Akkumulation public function dataBBCodesToMarkdown() { return [ - 'bug-7808-gt' => [ + 'bug-7808-gt' => [ 'expected' => '>`>`', 'text' => '>[code]>[/code]', ], - 'bug-7808-lt' => [ + 'bug-7808-lt' => [ 'expected' => '<`<`', 'text' => '<[code]<[/code]', ], - 'bug-7808-amp' => [ + 'bug-7808-amp' => [ 'expected' => '&`&`', 'text' => '&[code]&[/code]', ], - 'bug-12701-quotes' => [ + 'bug-12701-quotes' => [ 'expected' => '[![abc"fgh](https://domain.tld/photo/86912721086415cdc8e0a03226831581-1.png)](https://domain.tld/photos/user/image/86912721086415cdc8e0a03226831581)', 'text' => '[url=https://domain.tld/photos/user/image/86912721086415cdc8e0a03226831581][img=https://domain.tld/photo/86912721086415cdc8e0a03226831581-1.png]abc"fgh[/img][/url]' ], - 'bug-12701-no-quotes' => [ + 'bug-12701-no-quotes' => [ 'expected' => '[![abcfgh](https://domain.tld/photo/86912721086415cdc8e0a03226831581-1.png "abcfgh")](https://domain.tld/photos/user/image/86912721086415cdc8e0a03226831581)', 'text' => '[url=https://domain.tld/photos/user/image/86912721086415cdc8e0a03226831581][img=https://domain.tld/photo/86912721086415cdc8e0a03226831581-1.png]abcfgh[/img][/url]' ], + /** @see https://github.com/friendica/friendica/pull/14908 */ + 'task-14908-strip-tags' => [ + 'expected' => 'Norddeutscher Bürger !\[Noddeutscher Bürger - Bismark Brötchen (Roger Cziwerny - pixapay)\](/rscamo/……)', + 'text' => '[class=postbox-ocean]Norddeutscher Bürger ![Noddeutscher Bürger - Bismark Brötchen (Roger Cziwerny - pixapay)](/rscamo/……)[/class]', + ], ]; } From f2ee3602ac0b48e7e53e9fe55ebe23ddcacd197c Mon Sep 17 00:00:00 2001 From: Philipp Date: Thu, 29 May 2025 19:14:43 +0200 Subject: [PATCH 4/4] Fix PHP-CS --- tests/src/Content/Text/BBCodeTest.php | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/src/Content/Text/BBCodeTest.php b/tests/src/Content/Text/BBCodeTest.php index e51bb997d0..472ebc52dd 100644 --- a/tests/src/Content/Text/BBCodeTest.php +++ b/tests/src/Content/Text/BBCodeTest.php @@ -294,23 +294,23 @@ Karl Marx - Die ursprüngliche Akkumulation public function dataBBCodesToMarkdown() { return [ - 'bug-7808-gt' => [ + 'bug-7808-gt' => [ 'expected' => '>`>`', 'text' => '>[code]>[/code]', ], - 'bug-7808-lt' => [ + 'bug-7808-lt' => [ 'expected' => '<`<`', 'text' => '<[code]<[/code]', ], - 'bug-7808-amp' => [ + 'bug-7808-amp' => [ 'expected' => '&`&`', 'text' => '&[code]&[/code]', ], - 'bug-12701-quotes' => [ + 'bug-12701-quotes' => [ 'expected' => '[![abc"fgh](https://domain.tld/photo/86912721086415cdc8e0a03226831581-1.png)](https://domain.tld/photos/user/image/86912721086415cdc8e0a03226831581)', 'text' => '[url=https://domain.tld/photos/user/image/86912721086415cdc8e0a03226831581][img=https://domain.tld/photo/86912721086415cdc8e0a03226831581-1.png]abc"fgh[/img][/url]', ], - 'bug-12701-no-quotes' => [ + 'bug-12701-no-quotes' => [ 'expected' => '[![abcfgh](https://domain.tld/photo/86912721086415cdc8e0a03226831581-1.png "abcfgh")](https://domain.tld/photos/user/image/86912721086415cdc8e0a03226831581)', 'text' => '[url=https://domain.tld/photos/user/image/86912721086415cdc8e0a03226831581][img=https://domain.tld/photo/86912721086415cdc8e0a03226831581-1.png]abcfgh[/img][/url]', ],