|
[geeklog-cvs] geeklog: New function COM_getTextContent turns HTML into cont: msg#00026geeklog-cvs
details: http://project.geeklog.net/cgi-bin/hgweb.cgi/rev/dd235920fb85 changeset: 7186:dd235920fb85 user: Dirk Haun <dirk@xxxxxxxxxxxxxx> date: Sat Jul 25 19:38:57 2009 +0200 description: New function COM_getTextContent turns HTML into continuous text, e.g. for word counts and text excerpts diffstat: public_html/lib-common.php | 34 ++++++++++++++++++++++++++++++++++ system/classes/search.class.php | 5 +---- system/lib-story.php | 2 +- 3 files changed, 36 insertions(+), 5 deletions(-) diffs (71 lines): diff -r 85ab60faa47f -r dd235920fb85 public_html/lib-common.php --- a/public_html/lib-common.php Sat Jul 25 10:04:23 2009 +0200 +++ b/public_html/lib-common.php Sat Jul 25 19:38:57 2009 +0200 @@ -6933,6 +6933,40 @@ } /** +* Turn a piece of HTML into continuous(!) plain text +* +* This function removes HTML tags, line breaks, etc. and returns one long +* line of text. This is useful for word counts (do an explode() on the result) +* and for text excerpts. +* +* @param string $text original text, including HTML and line breaks +* @return string continuous plain text +* +*/ +function COM_getTextContent($text) +{ + // replace <br> with spaces so that Text<br>Text becomes two words + $text = preg_replace('/\<br(\s*)?\/?\>/i', ' ', $text); + + // add extra space between tags, e.g. <p>Text</p><p>Text</p> + $text = str_replace('><', '> <', $text); + + // only now remove all HTML tags + $text = strip_tags($text); + + // replace all tabs, newlines, and carrriage returns with spaces + $text = str_replace(array("\011", "\012", "\015"), ' ', $text); + + // replace entities with plain spaces + $text = str_replace(array('', ' ', ' '), ' ', $text); + + // collapse whitespace + $text = preg_replace('/\s\s+/', ' ', $text); + + return trim($text); +} + +/** * Now include all plugin functions */ foreach ($_PLUGINS as $pi_name) { diff -r 85ab60faa47f -r dd235920fb85 system/classes/search.class.php --- a/system/classes/search.class.php Sat Jul 25 10:04:23 2009 +0200 +++ b/system/classes/search.class.php Sat Jul 25 19:38:57 2009 +0200 @@ -795,10 +795,7 @@ */ function _shortenText($keyword, $text, $num_words = 7) { - $text = strip_tags($text); - $text = str_replace(array("\011", "\012", "\015"), ' ', trim($text)); - $text = str_replace(' ', ' ', $text); - $text = preg_replace('/\s\s+/', ' ', $text); + $text = COM_getTextContent($text); $words = explode(' ', $text); $word_count = count($words); if ($word_count <= $num_words) { diff -r 85ab60faa47f -r dd235920fb85 system/lib-story.php --- a/system/lib-story.php Sat Jul 25 10:04:23 2009 +0200 +++ b/system/lib-story.php Sat Jul 25 19:38:57 2009 +0200 @@ -325,7 +325,7 @@ { $article->set_var( 'lang_readmore', $LANG01[2] ); $article->set_var( 'lang_readmore_words', $LANG01[62] ); - $numwords = COM_numberFormat (sizeof( explode( ' ', strip_tags( $bodytext )))); + $numwords = COM_numberFormat(count(explode(' ', COM_getTextContent($bodytext)))); $article->set_var( 'readmore_words', $numwords ); $article->set_var( 'readmore_link', _______________________________________________ geeklog-cvs mailing list geeklog-cvs@xxxxxxxxxxxxxxxxx http://eight.pairlist.net/mailman/listinfo/geeklog-cvs
|
|
||||||||||||||||||||||||||
| News | Mail Home | sitemap | FAQ | advertise |