From 0b010d5c10c5010963a8b8bc0e61cff914dd9b56 Mon Sep 17 00:00:00 2001 From: MasterofJOKers Date: Tue, 18 Jan 2022 21:50:16 +0100 Subject: [PATCH] Upload to uploaddir instead of /tmp When uploading larger files, cgi.FieldStorage decides to store the files in an unnamed temporary file in /tmp while parsing the form-data. This is counter-intuitive and might not work, if the partition hosting /tmp/ is too small. Therefore, we overwrite FieldStorage's make_file() method to use the targetDir as upload path. While we're at it, we also use NamedTemporaryFile instead of TemporaryFile, because that lets us use os.link() to create a "copy" of the file-data without writing it to disk a second time. This does not work for small data, because small data is kept in an BytesIO object and thus never written to file automatically. For this case, we keep the old code, that's writing down files manually. We have to inline-define CustomFieldStorage, because FieldStorage will instantiate a new FieldStorage instance for parsing the parts of a multipart/form-data body and thus we cannot pass targetDir via __init__() argument. Signed-off-by: MasterofJOKers --- servefile/servefile.py | 43 ++++++++++++++++++++++++++++++++--------- tests/test_servefile.py | 14 ++++++++++++++ 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/servefile/servefile.py b/servefile/servefile.py index 4363de9..39d2789 100755 --- a/servefile/servefile.py +++ b/servefile/servefile.py @@ -21,6 +21,7 @@ import select import socket from subprocess import Popen, PIPE import sys +import tempfile import time # fix imports for python2/python3 @@ -611,7 +612,24 @@ class FilePutter(BaseHTTPServer.BaseHTTPRequestHandler): # create FieldStorage object for multipart parsing env = os.environ env['REQUEST_METHOD'] = "POST" - fstorage = cgi.FieldStorage(fp=self.rfile, headers=self.headers, environ=env) + targetDir = self.targetDir + + class CustomFieldStorage(cgi.FieldStorage): + + def make_file(self, *args, **kwargs): + """Overwritten to use a named file and the upload directory + + Python 2.7 has an unused "binary" argument while Python 3 does + not have any arguments. Python 2.7 does not have a + self._binary_file attribute. + """ + if sys.version_info.major == 2 or self._binary_file: + return tempfile.NamedTemporaryFile("wb+", dir=targetDir) + else: + return tempfile.NamedTemporaryFile( + "w+", encoding=self.encoding, newline='\n', dir=targetDir) + + fstorage = CustomFieldStorage(fp=self.rfile, headers=self.headers, environ=env) if "file" not in fstorage: self.sendResponse(400, "No file found in request.") return @@ -621,14 +639,21 @@ class FilePutter(BaseHTTPServer.BaseHTTPRequestHandler): self.sendResponse(400, "Filename was empty or invalid") return - # write file down to disk, send a 200 afterwards - target = open(destFileName, "wb") - bytesLeft = length - while bytesLeft > 0: - bytesToRead = min(self.blockSize, bytesLeft) - target.write(fstorage["file"].file.read(bytesToRead)) - bytesLeft -= bytesToRead - target.close() + # put the file at the right place, send 200 afterwards + if getattr(fstorage["file"].file, "name", None): + # the sent file was large, so we can just hard link the temporary + # file and are done + os.link(fstorage["file"].file.name, destFileName) + else: + # write file to disk. it was small enough so no temporary file was + # created + target = open(destFileName, "wb") + bytesLeft = length + while bytesLeft > 0: + bytesToRead = min(self.blockSize, bytesLeft) + target.write(fstorage["file"].file.read(bytesToRead)) + bytesLeft -= bytesToRead + target.close() self.sendResponse(200, "OK! Thanks for uploading") print("Received file '%s' from %s." % (destFileName, self.client_address[0])) diff --git a/tests/test_servefile.py b/tests/test_servefile.py index 8400122..1d7e0f7 100644 --- a/tests/test_servefile.py +++ b/tests/test_servefile.py @@ -357,6 +357,20 @@ def test_upload_size_limit(run_servefile, tmp_path): assert r.status_code == 200 +def test_upload_large_file(run_servefile, tmp_path): + # small files end up in BytesIO while large files get temporary files. this + # test makes sure we hit the large file codepath at least once + uploaddir = tmp_path / 'upload' + run_servefile(['-u', str(uploaddir)]) + + data = "asdf" * 1024 + files = {'file': ('more_data.txt', data)} + r = _retry_while(ConnectionError, make_request)(method='post', files=files) + assert r.status_code == 200 + with open(str(uploaddir / 'more_data.txt')) as f: + assert f.read() == data + + def test_tar_mode(run_servefile, datadir): d = { 'foo': {