mechkurt | Bon ben impossible de trouver une solution en regex pur, je suis passé par php DOM...
Code :
- function CleanHtml($content, $remove_color = false, $clean_span = false) {
- $content = trim($content);
- if (empty($content)) {
- return '';
- }
- //nettoyage contenu
- if ($remove_color) {
- $content = preg_replace('/<span style="(color|font-(family|size|weight)):([a-z0-9-,\s#]*);">/isU', '<span class="delete_me">', $content, -1, $count);
- } else {
- $content = preg_replace('/<span style="(font-(family|size|weight)):([a-z0-9-,\s]*);">/isU', '<span class="delete_me">', $content, -1, $count);
- }
- if ($clean_span && !empty($count)) {
- //si on nettoie les span inutiles
- $xml = new DOMDocument();
- $xml->loadXML('<?xml version="1.0" encoding="utf-8"?><root>'."\n".$content."\n".'</root>');
- $xpath = new DOMXpath($xml);
- $spans = $xpath->query('//span[@class="delete_me"]');
- if (!is_null($spans)) {
- foreach($spans as $from) {
- $sibling = $from->firstChild;
- do {
- $next = $sibling->nextSibling;
- $from->parentNode->insertBefore($sibling, $from);
- } while ($sibling = $next);
- $from->parentNode->removeChild($from);
- }
- }
- $content = substr($xml->saveXML(),46,-9);
- }
- //nettoyage escpaces vides multiples
- return preg_replace('/\s\s+/', ' ', $content);
- }
- $content = '
- <p><span style="font-family: Trebuchet MS,sans-serif;">
- <span style="font-size: x-small;">
- <span style="color: #FF0000;">
- <span style="font-family: Times New Roman,serif;">
- <span style="font-size: small;">
- <span style="font-weight: normal;">
- ABlah1
- <span style="text-decoration: underline;">
- blah2
- </span>
- blah3
- </span>
- </span>
- </span>
- </span>
- </span>
- </span></p>
- <p><span style="font-weight: normal;">
- BBlah1
- <span style="text-decoration: underline;">
- blah2
- </span>
- blah3
- </span>
- <br/>
- <span style="color: #FF0000;">
- CBlah1
- <span style="font-size: x-large;">
- blah2
- </span>
- blah3
- </span></p>';
- echo CleanHtml($content);
|
|