Skip to content

Commit 18b5dd8

Browse files
committed
Merge pull request #21 from GabrieleNunez/master
- Moved Regular Expresion generation into function generateCensorChec…
2 parents f75d765 + 1065ea5 commit 18b5dd8

File tree

1 file changed

+60
-42
lines changed

1 file changed

+60
-42
lines changed

src/CensorWords.php

Lines changed: 60 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,12 @@
44

55
class CensorWords
66
{
7+
/*
8+
* When the dictionary is loaded, a ton of regular expression strings are generated
9+
* These regular expressions are used to perform the profanity checks.
10+
* Store them here so when we call censorString we don't need to regenerate them on every call
11+
*/
12+
private $censorChecks = null;
713

814
public function __construct() {
915
$this->replacer = '*';
@@ -45,7 +51,6 @@ public function setDictionary($dictionary) {
4551
}
4652
}
4753
$this->badwords = $badwords;
48-
4954
}
5055

5156

@@ -72,7 +77,55 @@ public function randCensor($chars, $len) {
7277
substr($chars, 0, ($len%strlen($chars))));
7378

7479
}
80+
81+
/**
82+
* Generates the regular expressions that are going to be used to check for profanity
83+
* @param boolean $fullWords Option to generate regular expressions used for full words instead. Default is false
84+
* void
85+
*/
86+
private function generateCensorChecks($fullWords = false) {
87+
88+
$badwords = $this->badwords;
89+
90+
// generate censor checks as soon as we load the dictionary
91+
// utilize leet equivalents as well
92+
$leet_replace = array();
93+
$leet_replace['a']= '(a|a\.|a\-|4|@|Á|á|À|Â|à|Â|â|Ä|ä|Ã|ã|Å|å|α|Δ|Λ|λ)';
94+
$leet_replace['b']= '(b|b\.|b\-|8|\|3|ß|Β|β)';
95+
$leet_replace['c']= '(c|c\.|c\-|Ç|ç|¢|€|<|\(|{|©)';
96+
$leet_replace['d']= '(d|d\.|d\-|&part;|\|\)|Þ|þ|Ð|ð)';
97+
$leet_replace['e']= '(e|e\.|e\-|3|€|È|è|É|é|Ê|ê|∑)';
98+
$leet_replace['f']= '(f|f\.|f\-|ƒ)';
99+
$leet_replace['g']= '(g|g\.|g\-|6|9)';
100+
$leet_replace['h']= '(h|h\.|h\-|Η)';
101+
$leet_replace['i']= '(i|i\.|i\-|!|\||\]\[|]|1|∫|Ì|Í|Î|Ï|ì|í|î|ï)';
102+
$leet_replace['j']= '(j|j\.|j\-)';
103+
$leet_replace['k']= '(k|k\.|k\-|Κ|κ)';
104+
$leet_replace['l']= '(l|1\.|l\-|!|\||\]\[|]|£|∫|Ì|Í|Î|Ï)';
105+
$leet_replace['m']= '(m|m\.|m\-)';
106+
$leet_replace['n']= '(n|n\.|n\-|η|Ν|Π)';
107+
$leet_replace['o']= '(o|o\.|o\-|0|Ο|ο|Φ|¤|°|ø)';
108+
$leet_replace['p']= '(p|p\.|p\-|ρ|Ρ|¶|þ)';
109+
$leet_replace['q']= '(q|q\.|q\-)';
110+
$leet_replace['r']= '(r|r\.|r\-|®)';
111+
$leet_replace['s']= '(s|s\.|s\-|5|\$|§)';
112+
$leet_replace['t']= '(t|t\.|t\-|Τ|τ)';
113+
$leet_replace['u']= '(u|u\.|u\-|υ|µ)';
114+
$leet_replace['v']= '(v|v\.|v\-|υ|ν)';
115+
$leet_replace['w']= '(w|w\.|w\-|ω|ψ|Ψ)';
116+
$leet_replace['x']= '(x|x\.|x\-|Χ|χ)';
117+
$leet_replace['y']= '(y|y\.|y\-|¥|γ|ÿ|ý|Ÿ|Ý)';
118+
$leet_replace['z']= '(z|z\.|z\-|Ζ)';
75119

120+
$censorChecks = array();
121+
for ($x=0; $x<count($badwords); $x++) {
122+
$censorChecks[$x] = $fullWords ? '/\b'.str_ireplace(array_keys($leet_replace),array_values($leet_replace), $badwords[$x]).'\b/i'
123+
: '/'.str_ireplace(array_keys($leet_replace),array_values($leet_replace), $badwords[$x]).'/i';
124+
}
125+
126+
$this->censorChecks = $censorChecks;
127+
128+
}
76129

77130
/**
78131
* Apply censorship to $string, replacing $badwords with $censorChar.
@@ -81,53 +134,18 @@ public function randCensor($chars, $len) {
81134
* string[string]
82135
*/
83136
public function censorString($string, $fullWords = false) {
84-
$badwords = $this->badwords;
85-
$anThis = &$this;
86137

87-
$leet_replace = array();
88-
$leet_replace['a']= '(a|a\.|a\-|4|@|Á|á|À|Â|à|Â|â|Ä|ä|Ã|ã|Å|å|α|Δ|Λ|λ)';
89-
$leet_replace['b']= '(b|b\.|b\-|8|\|3|ß|Β|β)';
90-
$leet_replace['c']= '(c|c\.|c\-|Ç|ç|¢|€|<|\(|{|©)';
91-
$leet_replace['d']= '(d|d\.|d\-|&part;|\|\)|Þ|þ|Ð|ð)';
92-
$leet_replace['e']= '(e|e\.|e\-|3|€|È|è|É|é|Ê|ê|∑)';
93-
$leet_replace['f']= '(f|f\.|f\-|ƒ)';
94-
$leet_replace['g']= '(g|g\.|g\-|6|9)';
95-
$leet_replace['h']= '(h|h\.|h\-|Η)';
96-
$leet_replace['i']= '(i|i\.|i\-|!|\||\]\[|]|1|∫|Ì|Í|Î|Ï|ì|í|î|ï)';
97-
$leet_replace['j']= '(j|j\.|j\-)';
98-
$leet_replace['k']= '(k|k\.|k\-|Κ|κ)';
99-
$leet_replace['l']= '(l|1\.|l\-|!|\||\]\[|]|£|∫|Ì|Í|Î|Ï)';
100-
$leet_replace['m']= '(m|m\.|m\-)';
101-
$leet_replace['n']= '(n|n\.|n\-|η|Ν|Π)';
102-
$leet_replace['o']= '(o|o\.|o\-|0|Ο|ο|Φ|¤|°|ø)';
103-
$leet_replace['p']= '(p|p\.|p\-|ρ|Ρ|¶|þ)';
104-
$leet_replace['q']= '(q|q\.|q\-)';
105-
$leet_replace['r']= '(r|r\.|r\-|®)';
106-
$leet_replace['s']= '(s|s\.|s\-|5|\$|§)';
107-
$leet_replace['t']= '(t|t\.|t\-|Τ|τ)';
108-
$leet_replace['u']= '(u|u\.|u\-|υ|µ)';
109-
$leet_replace['v']= '(v|v\.|v\-|υ|ν)';
110-
$leet_replace['w']= '(w|w\.|w\-|ω|ψ|Ψ)';
111-
$leet_replace['x']= '(x|x\.|x\-|Χ|χ)';
112-
$leet_replace['y']= '(y|y\.|y\-|¥|γ|ÿ|ý|Ÿ|Ý)';
113-
$leet_replace['z']= '(z|z\.|z\-|Ζ)';
114-
115-
$words = explode(" ", $string);
116-
117-
for ($x=0; $x<count($badwords); $x++) {
118-
if($fullWords) {
119-
$badwords[$x] = '/\b'.str_ireplace(array_keys($leet_replace),array_values($leet_replace), $badwords[$x]).'\b/i';
120-
} else {
121-
$badwords[$x] = '/'.str_ireplace(array_keys($leet_replace),array_values($leet_replace), $badwords[$x]).'/i';
122-
}
123-
}
124-
138+
// generate our censor checks if they are not defined yet
139+
if(!$this->censorChecks)
140+
$this->generateCensorChecks($fullWords);
141+
142+
$anThis = &$this;
125143
$counter=0;
126144
$match = array();
127145
$newstring = array();
128146
$newstring['orig'] = html_entity_decode($string);
129147
// $anThis for <= PHP5.3
130-
$newstring['clean'] = preg_replace_callback($badwords, function($matches) use (&$anThis,&$counter,&$match) {
148+
$newstring['clean'] = preg_replace_callback($this->censorChecks, function($matches) use (&$anThis,&$counter,&$match) {
131149
$match[$counter++] = $matches[0];
132150

133151
// is $anThis->replacer a single char?

0 commit comments

Comments
 (0)