This Python function, json_comquotes
, is a handy tool for preprocessing JSON data that contains unescaped quotes within string values. It takes a JSON string as input and transforms it by replacing the double and single quotes within the string values with alternative characters, allowing you to parse the JSON data without errors.
Take a look into json_esquotes
- Replaces double quotes
"
within string values with escaped double quotes\"
. - Replaces single quotes
'
within string values with escaped single quotes\'
. - Outputs the processed JSON as a dictionary.
- Pass your raw JSON string as input to the
json_comquotes
function. - The function will return:
- On sucess: processed JSON dictionary ;
- On insucess: raise ValueError ;
import json, re, ast
def json_comquotes(raw_json, lone_char_searches=2, debug=False):
try:
out_json = json.loads(raw_json)
return out_json
except:
try:
out_json = ast.literal_eval(raw_json)
return out_json
except:
# prepare raw json from some unwanted scenarios
raw_json = raw_json.replace(": '", ":'").replace(", '", ",'").replace("{ '", "{'").replace("[ '", "['").replace("' }", "'}").replace("' }", "'}").replace("''", "' '")
raw_json = raw_json.replace(': "', ':"').replace(', "', ',"').replace('{ "', '{"').replace('[ "', '["').replace('" }', '"}').replace('" }', '"}').replace('""', '" "')
# Regex patterns : dq|sq stands for double|single quote(s)
_re_dq_pattern = r'([\s\w])"([\s\w])'
_re_dq_sub = r"\1\"\2"
_re_sq_pattern = r"([\s\w])'([\s\w])"
_re_sq_sub = r'\1\'\2'
for _lone_char in range(lone_char_searches):
# Substitute Double Quotes
if _lone_char == 0:
_re_find = re.sub(_re_dq_pattern, _re_dq_sub, raw_json)
# > Solve schenarios like ""a"a"a"a"a" since 1st return "a\"a"a\"a"a", second time return a\"a\"a\"a\"a" (Other egs. ["Anything"a"Anything else", "Anything"a"Anythin"g" else"])
else:
_re_find = re.sub(_re_dq_pattern, _re_dq_sub, _re_find)
# Substitute Single Quote > Solve schenarios like 'a'a'a' since 1st return 'a\'a'a', secund time return 'a\'a\'\a' ...
_re_find = re.sub(_re_sq_pattern, _re_sq_sub, _re_find)
if debug:
print(f"Iteration #{_lone_char+1}:", _re_find)
try:
out_json = json.loads(_re_find)
# Rem space from raw_json.replace("''", "' '").replace('""', '" "')
_re_find= _re_find.replace('\\" "', '\\""').replace('\\" \\"', '\\"\\"').replace("\\' '", "\\''").replace("\\' \\'", "\\'\\'")
return json.loads(_re_find)
except Exception as ej:
try:
out_json = ast.literal_eval(_re_find)
# Rem space from raw_json.replace("''", "' '").replace('""', '" "')
_re_find= _re_find.replace('\\" "', '\\""').replace("\\' '", "\\''")
return ast.literal_eval(_re_find)
except Exception as ea:
if _lone_char != lone_char_searches-1:
continue
raise ValueError(f"Json Parse exception: {ej}\nAst Parse exception : {ea}\nProcessed Json : {_re_find}")
if __name__ == "__main__":
req_jsons = ['{"na"me": "Jack O"Sullivan", "id": "1"}', '{"name": "Jack: The "OG" O"Sullivan"", "id": "2"}', '{"name": "Jack: The "OG"", "surname": \'O\'Sullivan\', "id": "3"}', '{"test_str": {"1singlechar": "a""a""a", "2singlechars": "a"a"a"a"a"a"a"a"a"}, "id": "5"}', "{'name': 'Jack O'Sullivan, 'id': '6'}"]
for req_json in req_jsons:
try:
proc_json = json_comquotes(req_json)
print("Raw json :", req_json)
print("Processed json:", json.dumps(proc_json, indent=2), "\n")
except Exception as e:
print("Something went wrong!")
print("Raw json:", req_json)
print(f"{e}\n")
Raw json : {"na"me": "Jack O"Sullivan", "id": "1"}
Processed json: {
"na\"me": "Jack O\"Sullivan",
"id": "1"
}
Raw json : {"name": "Jack: The "OG" O"Sullivan"", "id": "2"}
Processed json: {
"name": "Jack: The \"OG\" O\"Sullivan\"",
"id": "2"
}
Raw json : {"name": "Jack: The "OG"", "surname": 'O'Sullivan', "id": "3"}
Processed json: {
"name": "Jack: The \"OG\"",
"surname": "O'Sullivan",
"id": "3"
}
Raw json : {"test_str": {"1singlechar": "a""a""a", "2singlechars": "a"a"a"a"a"a"a"a"a"}, "id": "4"}
Processed json: {
"test_str": {
"1singlechar": "a\"\"a\"\"a",
"2singlechars": "a\"a\"a\"a\"a\"a\"a\"a\"a"
},
"id": "4"
}
Something went wrong!
Raw json: {'name': 'Jack O'Sullivan, 'id': '5'}
Json Parse exception: Expecting property name enclosed in double quotes: line 1 column 2 (char 1)
Ast Parse exception : unterminated string literal (detected at line 1) (<unknown>, line 1)
Processed Json : {'name':'Jack O\'Sullivan,'id':'5'}