Using Python's json module
By John Lekberg on December 11, 2020.
This week's post is about Python's json module.
JavaScript Object Notation (JSON) is a popular data interchange format. (See RFC 7159.) Encoding and decoding data from JSON is an easy way to share data between different systems and programming environments.
You will learn:
- How to read and write JSON.
- How to encode custom objects into JSON and also decode them.
Reading and writing JSON data
Use json.loads to read JSON data into Python:
import json json.loads(""" { "a": [1, 2, 3], "b": { "c": "d" } } """)
{'a': [1, 2, 3], 'b': {'c': 'd'}}
And use json.dumps to write Python data into JSON:
json.dumps({"a": [1, 2, 3], "b": {"c": "d"}})
'{"a": [1, 2, 3], "b": {"c": "d"}}'
json.loads and json.dumps deal with string objects (json.loads also supports bytes objects). But, if you want to use read and write with file objects, then use json.load and json.dump instead:
data = {"a": [1, 2, 3], "b": {"c": "d"}} with open("document.json", "w") as file: json.dump(data, file) with open("document.json", "r") as file: new_data = json.load(file) new_data
{'a': [1, 2, 3], 'b': {'c': 'd'}}
new_data == data
True
Encoding custom objects into JSON
Here's the Python data that json.dumps supports:
- Dictionary objects (
dict
) - List objects (
list
) and tuple objects (tuple
). - String objects (
str
). - Integer objects (
int
) and Floating-point objects (float
). - Boolean objects (
True
,False
) and None objects (None
).
(json.dumps also supports enum.IntEnum and enum.IntFlag objects.)
But what if you need to encode binary data or timestamps?
json.dumps(b"\xc9\xe3\x9d\xd0/\xdb\xf0S\x05\x98")
TypeError: Object of type bytes is not JSON serializable
import datetime json.dumps(datetime.datetime(2019, 6, 17, 3, 46, 23))
TypeError: Object of type datetime is not JSON serializable
You could manually encode the data. E.g.,
import base64 data = b"\xc9\xe3\x9d\xd0/\xdb\xf0S\x05\x98" data = base64.b64encode(data).decode() json.dumps(data)
'"yeOd0C/b8FMFmA=="'
data = datetime.datetime(2019, 6, 17, 3, 46, 23) data = data.isoformat() json.dumps(data)
'"2019-06-17T03:46:23"'
But, what if the data that you need to encode is embedded inside other data? E.g.,
data = {
"server-a": {
"secrets": [
b"\xc9\xe3\x9d\xd0/\xdb\xf0S\x05\x98",
b"\xe3\x9d\x98\xc9\xf0S\xdb\xd0/\x05",
]
},
"server-b": {
"secrets": [
b"\xc9\xf0S\xe3\x9d\xd0/\xdb\x05\x98",
{
"compromised": "2020-10-01",
"value": b"\xc9\x9d\x98\xd0/\xdb\x05\xf0S\xe3",
},
]
},
}
Now the manual solution is more complicated:
for server, server_data in data.items(): for i, secret in enumerate(server_data["secrets"]): if isinstance(secret, bytes): server_data["secrets"][i] = base64.b64encode( secret ).decode() elif isinstance(secret, dict) and isinstance( secret.get("value"), bytes ): server_data["secrets"][i][ "value" ] = base64.b64encode(secret["value"]).decode() print(json.dumps(data, indent=2))
{
"server-a": {
"secrets": [
"yeOd0C/b8FMFmA==",
"452YyfBT29AvBQ=="
]
},
"server-b": {
"secrets": [
"yfBT453QL9sFmA==",
{
"compromised": "2020-10-01",
"value": "yZ2Y0C/bBfBT4w=="
}
]
}
}
Instead of doing it manually, use the default=
parameter for json.dumps:
data = { "server-a": { "last-reboot": datetime.datetime( 2010, 1, 2, 19, 23, 18 ), "secrets": [ b"\xc9\xe3\x9d\xd0/\xdb\xf0S\x05\x98", b"\xe3\x9d\x98\xc9\xf0S\xdb\xd0/\x05", ], }, "server-b": { "last-reboot": datetime.datetime( 2011, 2, 6, 2, 4, 21 ), "secrets": [ b"\xc9\xf0S\xe3\x9d\xd0/\xdb\x05\x98", { "compromised": datetime.datetime( 2017, 4, 23, 8, 2, 43 ), "value": b"\xc9\x9d\x98\xd0/\xdb\x05\xf0S\xe3", }, ], }, } def custom_encoder(x): if isinstance(x, datetime.datetime): return x.isoformat() elif isinstance(x, bytes): return base64.b64encode(x).decode() else: raise TypeError print(json.dumps(data, default=custom_encoder, indent=2))
{
"server-a": {
"last-reboot": "2010-01-02T19:23:18",
"secrets": [
"yeOd0C/b8FMFmA==",
"452YyfBT29AvBQ=="
]
},
"server-b": {
"last-reboot": "2011-02-06T02:04:21",
"secrets": [
"yfBT453QL9sFmA==",
{
"compromised": "2017-04-23T08:02:43",
"value": "yZ2Y0C/bBfBT4w=="
}
]
}
}
(You can also subclass json.JSONEncoder and use the cls=
parameter for json.dumps.)
Decoding custom objects from JSON
To decode custom objects, use the object_hooks=
parameter for json.loads:
import json document = """ { "account/bob": { "type": "decimal", "value": "189.69" }, "account/john": { "type": "decimal", "value": "175.62" } } """ json.loads(document)
{'account/bob': {'type': 'decimal', 'value': '189.69'},
'account/john': {'type': 'decimal', 'value': '175.62'}}
from decimal import Decimal def custom_decoder(x): if x.get("type") == "decimal": return Decimal(x["value"]) else: return x json.loads(document, object_hook=custom_decoder)
{'account/bob': Decimal('189.69'),
'account/john': Decimal('175.62')}
Because the object_hooks=
parameter is only triggered when reading JSON
objects (as opposed to JSON arrays, JSON strings, etc.), I need to change the
way I encode binary data and timestamps:
data = { "server-a": { "last-reboot": datetime.datetime( 2010, 1, 2, 19, 23, 18 ), "secrets": [ b"\xc9\xe3\x9d\xd0/\xdb\xf0S\x05\x98", b"\xe3\x9d\x98\xc9\xf0S\xdb\xd0/\x05", ], }, "server-b": { "last-reboot": datetime.datetime( 2011, 2, 6, 2, 4, 21 ), "secrets": [ b"\xc9\xf0S\xe3\x9d\xd0/\xdb\x05\x98", { "compromised": datetime.datetime( 2017, 4, 23, 8, 2, 43 ), "value": b"\xc9\x9d\x98\xd0/\xdb\x05\xf0S\xe3", }, ], }, } def custom_encoder(x): if isinstance(x, datetime.datetime): return {"type": "datetime", "value": x.isoformat()} elif isinstance(x, bytes): return { "type": "bytes", "value": base64.b64encode(x).decode(), } else: raise TypeError document = json.dumps( data, default=custom_encoder, indent=2 ) print(document)
{
"server-a": {
"last-reboot": {
"type": "datetime",
"value": "2010-01-02T19:23:18"
},
"secrets": [
{
"type": "bytes",
"value": "yeOd0C/b8FMFmA=="
},
{
"type": "bytes",
"value": "452YyfBT29AvBQ=="
}
]
},
"server-b": {
"last-reboot": {
"type": "datetime",
"value": "2011-02-06T02:04:21"
},
"secrets": [
{
"type": "bytes",
"value": "yfBT453QL9sFmA=="
},
{
"compromised": {
"type": "datetime",
"value": "2017-04-23T08:02:43"
},
"value": {
"type": "bytes",
"value": "yZ2Y0C/bBfBT4w=="
}
}
]
}
}
Now, here's a custom decoder:
def custom_decoder(x): if x.get("type") == "datetime": return datetime.datetime.fromisoformat(x["value"]) elif x.get("type") == "bytes": return base64.b64decode(x["value"].encode()) else: return x json.loads(document, object_hook=custom_decoder)
{
"server-a": {
"last-reboot": datetime.datetime(
2010, 1, 2, 19, 23, 18
),
"secrets": [
b"\xc9\xe3\x9d\xd0/\xdb\xf0S\x05\x98",
b"\xe3\x9d\x98\xc9\xf0S\xdb\xd0/\x05",
],
},
"server-b": {
"last-reboot": datetime.datetime(
2011, 2, 6, 2, 4, 21
),
"secrets": [
b"\xc9\xf0S\xe3\x9d\xd0/\xdb\x05\x98",
{
"compromised": datetime.datetime(
2017, 4, 23, 8, 2, 43
),
"value": b"\xc9\x9d\x98\xd0/\xdb\x05\xf0S\xe3",
},
],
},
}
(You can also subclass json.JSONDecoder and use the cls=
parameter for json.loads.)
In conclusion...
In this week's post, you learned how to read and write JSON with json.dumps and json.loads (and json.dump and json.load for file objects). You also learned how to encode and decode custom objects, like timestamps and binary data.
My challenge to you:
Besides integer objects and floating-point objects, Python has other numeric types: complex objects, decimal.Decimal, fractions.Fraction. Create a custom encoder and decoder for these types.
If you enjoyed this week's post, share it with your friends and stay tuned for next week's post. See you then!
(If you spot any errors or typos on this post, contact me via my contact page.)