Created
July 15, 2021 16:51
-
-
Save mikkorantalainen/1a913bb9b3410f3274a062a5390e07b5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
function escapeElasticReservedChars($string) { | |
$regex = "/[\\+\\-\\=\\&\\|\\!\\(\\)\\{\\}\\[\\]\\^\\\"\\~\\*\\<\\>\\?\\:\\\\\\/]/"; | |
return preg_replace($regex, addslashes('\\$0'), $string); | |
} | |
/** | |
* @param string $s untrusted user input | |
* @return string safe string to be used in `query_string` argument to elasticsearch | |
*/ | |
function escapeForElasticSearch($s) | |
{ | |
static $keys = array(); | |
static $values = array(); | |
if (!$keys) | |
{ | |
# https://www.elastic.co/guide/en/elasticsearch/reference/5.5/query-dsl-query-string-query.html#_reserved_characters | |
$replacements = array( | |
"\\" => "\\\\", # must be done first to not double encode later backslashes! | |
"+" => "\\+", | |
"-" => "\\-", | |
"=" => "\\=", | |
"&" => "\\&", | |
"|" => "\\|", | |
">" => "", # cannot be safely encoded | |
"<" => "", # cannot be safely encoded | |
"!" => "\\!", | |
"(" => "\\(", | |
")" => "\\)", | |
"{" => "\\{", | |
"}" => "\\}", | |
"[" => "\\[", | |
"]" => "\\]", | |
"^" => "\\^", | |
"\"" => "\\\"", | |
"~" => "\\~", | |
"*" => "\\*", | |
"?" => "\\?", | |
":" => "\\:", | |
"/" => "\\/", | |
); | |
$keys = array_keys($replacements); | |
$values = array_values($replacements); | |
} | |
return str_replace($keys, $values, $s); | |
} | |
$tests = array( | |
"", | |
"abc", | |
"123", | |
" abc 123 ", | |
"!\"#%&/()=", | |
"<b>foo</b>", | |
"a|b", | |
"a||b", | |
"a[5]='cat';", | |
"a = b * c ?", | |
"a&b", | |
"&", | |
"c = printf(\"%s\n\", &xyz);", | |
"for (;;) {...}", | |
"a = b^c", | |
"a = b^^c", | |
"a = b ^ ~c", | |
"a := b / c", | |
"/dev/null", | |
"C:\\Windows\\Windows.ini", | |
); | |
foreach ($tests as $test) | |
{ | |
$a = escapeElasticReservedChars($test); | |
$b = escapeForElasticSearch($test); | |
if ($a === $b) | |
continue; | |
printf("Found a difference!\ni='%s'\na='%s'\nb='%s'\n\n", $test, $a, $b); | |
} | |
$max = 200000; | |
$start = microtime(true); | |
for ($i = 0; $i < $max; $i++) | |
foreach ($tests as $test) | |
{ | |
$a = escapeElasticReservedChars($test); | |
} | |
$elapsed = microtime(true) - $start; | |
printf("(1) Looping %d calls took %.4s s\n", $max, $elapsed); | |
$start = microtime(true); | |
for ($i = 0; $i < $max; $i++) | |
foreach ($tests as $test) | |
{ | |
$b = escapeForElasticSearch($test); | |
} | |
$elapsed = microtime(true) - $start; | |
printf("(2) Looping %d calls took %.4s s\n", $max, $elapsed); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment