Created
June 30, 2015 09:32
-
-
Save rdohms/73afce744f7e8b894174 to your computer and use it in GitHub Desktop.
A simple html to email friendly text
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
namespace Symbid\Library\Bundle\MessagingBundle\Twig; | |
use Twig_Extension; | |
class TextTransformer extends Twig_Extension | |
{ | |
/** | |
* Returns the name of the extension. | |
* | |
* @return string The extension name | |
*/ | |
public function getName() | |
{ | |
return 'symbid_messaging.twig.text_transformer'; | |
} | |
public function getFilters() | |
{ | |
return [ | |
new \Twig_SimpleFilter('textSafe', [$this, 'makeTextSafe']) | |
]; | |
} | |
public function makeTextSafe($content) | |
{ | |
libxml_use_internal_errors(true); | |
// Apply rules | |
// Clean up | |
$content = $this->purifyHTML($content); | |
// a[href] => content (href) | |
$content = $this->linksToText($content); | |
// <p>text</p> => text <line break> | |
$content = $this->paragraphsToText($content); | |
// all else => strip | |
$content = strip_tags($content); | |
libxml_use_internal_errors(false); | |
return $content; | |
} | |
protected function linksToText($content) | |
{ | |
$dom = new \DOMDocument(); | |
$dom->loadHTML($content, LIBXML_NOERROR | LIBXML_NOENT); | |
$links = $dom->getElementsByTagName('a'); | |
foreach ($links as $tag) { | |
/** @var \DOMElement $tag */ | |
$href = $tag->getAttribute('href'); | |
$output = (! empty($href))? "{$tag->textContent} ($href)" : "{$tag->textContent}"; | |
$content = $this->replaceTagInContent($tag, $output, $content); | |
} | |
return trim($content); | |
} | |
/** | |
* Trim and Tidy's up HTML so that DOM Parser can handle it | |
* | |
* Allowing it to wrap with body/html allows it to properly handle segments of html with paragraphs without getting | |
* lost. | |
* | |
* @param string $content | |
* @return string | |
*/ | |
public function purifyHTML($content) | |
{ | |
$dom = new \DOMDocument(); | |
$dom->loadHTML($content, LIBXML_HTML_NODEFDTD | LIBXML_NOENT); | |
return trim(preg_replace('/<html><body>(.*)<\/body><\/html>/', '$1', $dom->saveHTML())); | |
} | |
/** | |
* Replaces all paragraphs with text and line breaks. | |
* | |
* @param $content | |
* @return mixed | |
*/ | |
public function paragraphsToText($content) | |
{ | |
$dom = new \DOMDocument(); | |
$dom->loadHTML($content, LIBXML_NOERROR | LIBXML_NOENT); | |
$paragraphs = $dom->getElementsByTagName('p'); | |
foreach ($paragraphs as $tag) { | |
$output = "{$tag->textContent}\n"; | |
$content = $this->replaceTagInContent($tag, $output, $content); | |
} | |
return trim($content); | |
} | |
/** | |
* @param \DOMElement $tag | |
* @param string $output | |
* @param string $content | |
* @return string | |
*/ | |
protected function replaceTagInContent(\DOMElement $tag, $output, $content) | |
{ | |
$tmpDoc = new \DOMDocument(); | |
$tmpDoc->appendChild($tmpDoc->importNode($tag, true)); | |
$htmlRepresentation = $tmpDoc->saveHTML(); | |
return str_replace(trim($htmlRepresentation), $output, $content); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Obviously needed to point out here: