Upload to uploaddir instead of /tmp

When uploading larger files, cgi.FieldStorage decides to store the files
in an unnamed temporary file in /tmp while parsing the form-data. This
is counter-intuitive and might not work, if the partition hosting /tmp/
is too small. Therefore, we overwrite FieldStorage's make_file() method
to use the targetDir as upload path.

While we're at it, we also use NamedTemporaryFile instead of
TemporaryFile, because that lets us use os.link() to create a "copy" of
the file-data without writing it to disk a second time. This does not
work for small data, because small data is kept in an BytesIO object and
thus never written to file automatically. For this case, we keep the old
code, that's writing down files manually.

We have to inline-define CustomFieldStorage, because FieldStorage will
instantiate a new FieldStorage instance for parsing the parts of a
multipart/form-data body and thus we cannot pass targetDir via
__init__() argument.

Signed-off-by: MasterofJOKers <joker@someserver.de>
This commit is contained in:
MasterofJOKers 2022-01-18 21:50:16 +01:00 committed by Sebastian Lohff
parent 4f3b916b9f
commit 0b010d5c10
2 changed files with 48 additions and 9 deletions

View File

@ -21,6 +21,7 @@ import select
import socket
from subprocess import Popen, PIPE
import sys
import tempfile
import time
# fix imports for python2/python3
@ -611,7 +612,24 @@ class FilePutter(BaseHTTPServer.BaseHTTPRequestHandler):
# create FieldStorage object for multipart parsing
env = os.environ
env['REQUEST_METHOD'] = "POST"
fstorage = cgi.FieldStorage(fp=self.rfile, headers=self.headers, environ=env)
targetDir = self.targetDir
class CustomFieldStorage(cgi.FieldStorage):
def make_file(self, *args, **kwargs):
"""Overwritten to use a named file and the upload directory
Python 2.7 has an unused "binary" argument while Python 3 does
not have any arguments. Python 2.7 does not have a
self._binary_file attribute.
"""
if sys.version_info.major == 2 or self._binary_file:
return tempfile.NamedTemporaryFile("wb+", dir=targetDir)
else:
return tempfile.NamedTemporaryFile(
"w+", encoding=self.encoding, newline='\n', dir=targetDir)
fstorage = CustomFieldStorage(fp=self.rfile, headers=self.headers, environ=env)
if "file" not in fstorage:
self.sendResponse(400, "No file found in request.")
return
@ -621,14 +639,21 @@ class FilePutter(BaseHTTPServer.BaseHTTPRequestHandler):
self.sendResponse(400, "Filename was empty or invalid")
return
# write file down to disk, send a 200 afterwards
target = open(destFileName, "wb")
bytesLeft = length
while bytesLeft > 0:
bytesToRead = min(self.blockSize, bytesLeft)
target.write(fstorage["file"].file.read(bytesToRead))
bytesLeft -= bytesToRead
target.close()
# put the file at the right place, send 200 afterwards
if getattr(fstorage["file"].file, "name", None):
# the sent file was large, so we can just hard link the temporary
# file and are done
os.link(fstorage["file"].file.name, destFileName)
else:
# write file to disk. it was small enough so no temporary file was
# created
target = open(destFileName, "wb")
bytesLeft = length
while bytesLeft > 0:
bytesToRead = min(self.blockSize, bytesLeft)
target.write(fstorage["file"].file.read(bytesToRead))
bytesLeft -= bytesToRead
target.close()
self.sendResponse(200, "OK! Thanks for uploading")
print("Received file '%s' from %s." % (destFileName, self.client_address[0]))

View File

@ -357,6 +357,20 @@ def test_upload_size_limit(run_servefile, tmp_path):
assert r.status_code == 200
def test_upload_large_file(run_servefile, tmp_path):
# small files end up in BytesIO while large files get temporary files. this
# test makes sure we hit the large file codepath at least once
uploaddir = tmp_path / 'upload'
run_servefile(['-u', str(uploaddir)])
data = "asdf" * 1024
files = {'file': ('more_data.txt', data)}
r = _retry_while(ConnectionError, make_request)(method='post', files=files)
assert r.status_code == 200
with open(str(uploaddir / 'more_data.txt')) as f:
assert f.read() == data
def test_tar_mode(run_servefile, datadir):
d = {
'foo': {