rdohms/TextTransformer.php

## TextTransformer.php
<?php

namespace Symbid\Library\Bundle\MessagingBundle\Twig;

use Twig_Extension;

class TextTransformer extends Twig_Extension
{
    /**
     * Returns the name of the extension.
     *
     * @return string The extension name
     */
    public function getName()
    {
        return 'symbid_messaging.twig.text_transformer';
    }

    public function getFilters()
    {
        return [
            new \Twig_SimpleFilter('textSafe', [$this, 'makeTextSafe'])
        ];
    }

    public function makeTextSafe($content)
    {
        libxml_use_internal_errors(true);
        // Apply rules

        // Clean up
        $content = $this->purifyHTML($content);

        // a[href] => content (href)
        $content = $this->linksToText($content);

        // <p>text</p> => text <line break>
        $content = $this->paragraphsToText($content);

        // all else => strip
        $content = strip_tags($content);

        libxml_use_internal_errors(false);
        return $content;
    }

    protected function linksToText($content)
    {
        $dom = new \DOMDocument();
        $dom->loadHTML($content, LIBXML_NOERROR | LIBXML_NOENT);

        $links = $dom->getElementsByTagName('a');

        foreach ($links as $tag) {
            /** @var \DOMElement $tag */
            $href = $tag->getAttribute('href');
            $output = (! empty($href))? "{$tag->textContent} ($href)" : "{$tag->textContent}";
            $content = $this->replaceTagInContent($tag, $output, $content);
        }

        return trim($content);
    }

    /**
     * Trim and Tidy's up HTML so that DOM Parser can handle it
     *
     * Allowing it to wrap with body/html allows it to properly handle segments of html with paragraphs without getting
     * lost.
     *
     * @param string $content
     * @return string
     */
    public function purifyHTML($content)
    {
        $dom = new \DOMDocument();
        $dom->loadHTML($content,  LIBXML_HTML_NODEFDTD | LIBXML_NOENT);

        return trim(preg_replace('/<html><body>(.*)<\/body><\/html>/', '$1', $dom->saveHTML()));
    }

    /**
     * Replaces all paragraphs with text and line breaks.
     *
     * @param $content
     * @return mixed
     */
    public function paragraphsToText($content)
    {
        $dom = new \DOMDocument();
        $dom->loadHTML($content, LIBXML_NOERROR | LIBXML_NOENT);

        $paragraphs = $dom->getElementsByTagName('p');

        foreach ($paragraphs as $tag) {
            $output = "{$tag->textContent}\n";
            $content = $this->replaceTagInContent($tag, $output, $content);
        }

        return trim($content);
    }

    /**
     * @param \DOMElement $tag
     * @param string $output
     * @param string $content
     * @return string
     */
    protected function replaceTagInContent(\DOMElement $tag, $output, $content)
    {
        $tmpDoc = new \DOMDocument();
        $tmpDoc->appendChild($tmpDoc->importNode($tag, true));
        $htmlRepresentation = $tmpDoc->saveHTML();

        return str_replace(trim($htmlRepresentation), $output, $content);
    }
}
	<?php

	namespace Symbid\Library\Bundle\MessagingBundle\Twig;

	use Twig_Extension;

	class TextTransformer extends Twig_Extension
	{
	/**
	* Returns the name of the extension.
	*
	* @return string The extension name
	*/
	public function getName()
	{
	return 'symbid_messaging.twig.text_transformer';
	}

	public function getFilters()
	{
	return [
	new \Twig_SimpleFilter('textSafe', [$this, 'makeTextSafe'])
	];
	}

	public function makeTextSafe($content)
	{
	libxml_use_internal_errors(true);
	// Apply rules

	// Clean up
	$content = $this->purifyHTML($content);

	// a[href] => content (href)
	$content = $this->linksToText($content);

	// <p>text</p> => text <line break>
	$content = $this->paragraphsToText($content);

	// all else => strip
	$content = strip_tags($content);

	libxml_use_internal_errors(false);
	return $content;
	}

	protected function linksToText($content)
	{
	$dom = new \DOMDocument();
	$dom->loadHTML($content, LIBXML_NOERROR \| LIBXML_NOENT);

	$links = $dom->getElementsByTagName('a');

	foreach ($links as $tag) {
	/** @var \DOMElement $tag */
	$href = $tag->getAttribute('href');
	$output = (! empty($href))? "{$tag->textContent} ($href)" : "{$tag->textContent}";
	$content = $this->replaceTagInContent($tag, $output, $content);
	}

	return trim($content);
	}

	/**
	* Trim and Tidy's up HTML so that DOM Parser can handle it
	*
	* Allowing it to wrap with body/html allows it to properly handle segments of html with paragraphs without getting
	* lost.
	*
	* @param string $content
	* @return string
	*/
	public function purifyHTML($content)
	{
	$dom = new \DOMDocument();
	$dom->loadHTML($content, LIBXML_HTML_NODEFDTD \| LIBXML_NOENT);

	return trim(preg_replace('/<html><body>(.*)<\/body><\/html>/', '$1', $dom->saveHTML()));
	}

	/**
	* Replaces all paragraphs with text and line breaks.
	*
	* @param $content
	* @return mixed
	*/
	public function paragraphsToText($content)
	{
	$dom = new \DOMDocument();
	$dom->loadHTML($content, LIBXML_NOERROR \| LIBXML_NOENT);

	$paragraphs = $dom->getElementsByTagName('p');

	foreach ($paragraphs as $tag) {
	$output = "{$tag->textContent}\n";
	$content = $this->replaceTagInContent($tag, $output, $content);
	}

	return trim($content);
	}

	/**
	* @param \DOMElement $tag
	* @param string $output
	* @param string $content
	* @return string
	*/
	protected function replaceTagInContent(\DOMElement $tag, $output, $content)
	{
	$tmpDoc = new \DOMDocument();
	$tmpDoc->appendChild($tmpDoc->importNode($tag, true));
	$htmlRepresentation = $tmpDoc->saveHTML();

	return str_replace(trim($htmlRepresentation), $output, $content);
	}
	}