Meine Lösung bestand darin, die BSON-Dateien manuell zu öffnen (mit Python), die großen Dokumente zu finden und einen Teil davon zu entfernen, dann das BSON-Objekt in eine neue BSON-Datei zu schreiben und die bearbeitete BSON-Datei zu laden, die erfolgreich in mongo.
Dies erfüllt nicht meinen Wunsch, die gedumpte DB in das System laden zu können, ohne sie zu ändern!
Python3:import bson
from pprint import pprint
def get_bson_data(filename):
with open(filename, "rb") as f:
data = bson.decode_all(f.read())
return data
def report_problematics_documents(data):
problematics = []
for item in data:
if is_too_big(item):
print(item)input("give me some more...")
input("give me some more...")
problematics.append(item)
print(f"data len: {len(data)}")
print(f"problematics: {problematics}")
print(f"problematics len: {len(problematics)}")
def shrink_data(data):
for i, item in enumerate(data):
if is_too_big(item):
data[i] = shrink_item(item) # or delete it...
print(f"item shrinked: {i}")
def write_bson_file(data, filename):
new_filename = filename
with open(new_filename, "wb") as f:
for event in data:
bson_data = bson.BSON.encode(event)
f.write(bson_data)
def is_too_big(item):
# you need to implement this one...
pass
def shrink_item(item):
# you need to implement this one...
pass
def main():
bson_file_name = "/path/to/file.bson"
data = get_bson_data(bson_file_name)
report_problematics_documents(data)
shrink_data(data)
report_problematics_documents(data)
new_filename = bson_file_name + ".new"
write_bson_file(data, new_filename)
print("Load new data")
data = get_bson_data(new_filename)
report_problematics_documents(data)
if __name__ == '__main__':
main()