Ticket #2070: 4459-streaming-file-upload.2.diff
File 4459-streaming-file-upload.2.diff, 26.7 KB (added by , 18 years ago) |
---|
-
django/http/__init__.py
1 import os 1 import os, pickle 2 2 from Cookie import SimpleCookie 3 3 from pprint import pformat 4 4 from urllib import urlencode, quote 5 5 from django.utils.datastructures import MultiValueDict 6 6 7 try: 8 from cStringIO import StringIO 9 except ImportError: 10 from StringIO import StringIO 11 7 12 RESERVED_CHARS="!*'();:@&=+$,/?%#[]" 8 13 9 14 try: … … 42 47 def is_secure(self): 43 48 return os.environ.get("HTTPS") == "on" 44 49 45 def parse_file_upload(header_dict, post_data): 46 "Returns a tuple of (POST MultiValueDict, FILES MultiValueDict)" 47 import email, email.Message 48 from cgi import parse_header 49 raw_message = '\r\n'.join(['%s:%s' % pair for pair in header_dict.items()]) 50 raw_message += '\r\n\r\n' + post_data 51 msg = email.message_from_string(raw_message) 52 POST = MultiValueDict() 53 FILES = MultiValueDict() 54 for submessage in msg.get_payload(): 55 if isinstance(submessage, email.Message.Message): 56 name_dict = parse_header(submessage['Content-Disposition'])[1] 57 # name_dict is something like {'name': 'file', 'filename': 'test.txt'} for file uploads 58 # or {'name': 'blah'} for POST fields 59 # We assume all uploaded files have a 'filename' set. 60 if name_dict.has_key('filename'): 61 assert type([]) != type(submessage.get_payload()), "Nested MIME messages are not supported" 62 if not name_dict['filename'].strip(): 63 continue 64 # IE submits the full path, so trim everything but the basename. 65 # (We can't use os.path.basename because it expects Linux paths.) 66 filename = name_dict['filename'][name_dict['filename'].rfind("\\")+1:] 67 FILES.appendlist(name_dict['name'], { 68 'filename': filename, 69 'content-type': (submessage.has_key('Content-Type') and submessage['Content-Type'] or None), 70 'content': submessage.get_payload(), 71 }) 50 def parse_file_upload(headers, input): 51 from django.conf import settings 52 53 # Only stream files to disk if FILE_STREAMING_DIR is set 54 file_upload_dir = getattr(settings, 'FILE_UPLOAD_DIR', None) 55 file_upload_min_size = getattr(settings, 'FILE_UPLOAD_MIN_SIZE', 100000) 56 57 try: 58 parser = MultiPartParser(headers, input, file_upload_dir, file_upload_min_size) 59 return parser.parse() 60 except MultiPartParserError, e: 61 return MultiValueDict({ '_file_upload_error': [e.message] }), {} 62 63 class MultiPartParserError(Exception): 64 def __init__(self, message): 65 self.message = message 66 def __str__(self): 67 return repr(self.message) 68 69 class MultiPartParser(object): 70 """ 71 A rfc2388 multipart/form-data parser. 72 73 parse() reads the input stream in chunk_size chunks and returns a 74 tuple of (POST MultiValueDict, FILES MultiValueDict). If 75 file_upload_dir is defined files will be streamed to temporary 76 files in the specified directory. 77 78 The FILES dictionary will have 'filename', 'content-type', 79 'content' and 'content-length' entries. For streamed files it will 80 also have 'tmpfilename' and 'tmpfile'. The 'content' entry will 81 only be read from disk when referenced for streamed files. 82 83 If the header X-Progress-ID is sent with a 32 character hex string 84 a temporary file with the same name will be created in 85 `file_upload_dir`` with a pickled { 'received', 'size' } 86 dictionary with the number of bytes received and the size expected 87 respectively. The file will be unlinked when the parser finishes. 88 89 """ 90 91 def __init__(self, headers, input, file_upload_dir=None, file_upload_min_size=None, chunk_size=1024*64): 92 try: 93 content_length = int(headers['Content-Length']) 94 except: 95 raise MultiPartParserError('Invalid Content-Length: %s' % headers.get('Content-Length')) 96 97 content_type = headers.get('Content-Type') 98 99 if not content_type or not content_type.startswith('multipart/'): 100 raise MultiPartParserError('Invalid Content-Type: %s' % content_type) 101 102 ctype, opts = self.parse_header(content_type) 103 boundary = opts.get('boundary') 104 from cgi import valid_boundary 105 if not boundary or not valid_boundary(boundary): 106 raise MultiPartParserError('Invalid boundary in multipart form: %s' % boundary) 107 108 # check if we got a valid X-Progress-ID id 109 progress_id = headers.get('X-Progress-ID') 110 if file_upload_dir and progress_id: 111 import re 112 if re.match(r'^[0-9a-zA-Z]{32}$', progress_id): 113 self._progress_filename = os.path.join(file_upload_dir, progress_id) 72 114 else: 73 POST.appendlist(name_dict['name'], submessage.get_payload()) 74 return POST, FILES 115 raise MultiPartParserError('Invalid X-Progress-ID: %s' % progress_id) 116 else: 117 self._progress_filename = None 75 118 119 self._boundary = '--' + boundary 120 self._input = input 121 self._size = content_length 122 self._received = 0 123 self._file_upload_dir = file_upload_dir 124 self._chunk_size = chunk_size 125 self._state = 'PREAMBLE' 126 self._partial = '' 127 self._post = MultiValueDict() 128 self._files = MultiValueDict() 129 130 if file_upload_min_size is not None and content_length < file_upload_min_size: 131 self._file_upload_dir = None # disable file streaming for small request 132 133 try: 134 # use mx fast string search if available 135 from mx.TextTools import FS 136 self._fs = FS(self._boundary) 137 except ImportError: 138 self._fs = None 139 140 def parse(self): 141 try: 142 self._parse() 143 finally: 144 if self._progress_filename: 145 try: 146 os.unlink(self._progress_filename) 147 except OSError: 148 pass 149 150 return self._post, self._files 151 152 def _parse(self): 153 size = self._size 154 155 try: 156 while size > 0: 157 n = self._read(self._input, min(self._chunk_size, size)) 158 if not n: 159 break 160 size -= n 161 except: 162 # consume any remaining data so we dont generate a "Connection Reset" error 163 size = self._size - self._received 164 while size > 0: 165 data = self._input.read(min(self._chunk_size, size)) 166 size -= len(data) 167 raise 168 169 def _find_boundary(self, data, start, stop): 170 """ 171 Find the next boundary and return the end of current part 172 and start of next part. 173 """ 174 if self._fs: 175 boundary = self._fs.find(data, start, stop) 176 else: 177 boundary = data.find(self._boundary, start, stop) 178 if boundary >= 0: 179 end = boundary 180 next = boundary + len(self._boundary) 181 182 # backup over CRLF 183 if end > 0 and data[end-1] == '\n': end -= 1 184 if end > 0 and data[end-1] == '\r': end -= 1 185 # skip over --CRLF 186 if next < stop and data[next] == '-': next += 1 187 if next < stop and data[next] == '-': next += 1 188 if next < stop and data[next] == '\r': next += 1 189 if next < stop and data[next] == '\n': next += 1 190 191 return True, end, next 192 else: 193 return False, stop, stop 194 195 class TemporaryFile(object): 196 "A temporary file that tries to delete itself when garbage collected." 197 def __init__(self, dir): 198 import tempfile 199 (fd, name) = tempfile.mkstemp(suffix='.upload', dir=dir) 200 self.file = os.fdopen(fd, 'w+b') 201 self.name = name 202 203 def __getattr__(self, name): 204 a = getattr(self.__dict__['file'], name) 205 if type(a) != type(0): 206 setattr(self, name, a) 207 return a 208 209 def __del__(self): 210 try: 211 os.unlink(self.name) 212 except OSError: 213 pass 214 215 class LazyContent(dict): 216 """ 217 A lazy FILES dictionary entry that reads the contents from 218 tmpfile only when referenced. 219 """ 220 def __init__(self, data): 221 dict.__init__(self, data) 222 223 def __getitem__(self, key): 224 if key == 'content' and not self.has_key(key): 225 self['tmpfile'].seek(0) 226 self['content'] = self['tmpfile'].read() 227 return dict.__getitem__(self, key) 228 229 def _read(self, input, size): 230 data = input.read(size) 231 232 if not data: 233 return 0 234 235 read_size = len(data) 236 self._received += read_size 237 238 if self._partial: 239 data = self._partial + data 240 241 start = 0 242 stop = len(data) 243 244 while start < stop: 245 boundary, end, next = self._find_boundary(data, start, stop) 246 247 if not boundary and read_size: 248 # make sure we dont treat a partial boundary (and its separators) as data 249 stop -= len(self._boundary) + 16 250 end = next = stop 251 if end <= start: 252 break # need more data 253 254 if self._state == 'PREAMBLE': 255 # Preamble, just ignore it 256 self._state = 'HEADER' 257 258 elif self._state == 'HEADER': 259 # Beginning of header, look for end of header and parse it if found. 260 261 header_end = data.find('\r\n\r\n', start, stop) 262 if header_end == -1: 263 break # need more data 264 265 header = data[start:header_end] 266 267 self._fieldname = None 268 self._filename = None 269 self._content_type = None 270 271 for line in header.split('\r\n'): 272 ctype, opts = self.parse_header(line) 273 if ctype == 'content-disposition: form-data': 274 self._fieldname = opts.get('name') 275 self._filename = opts.get('filename') 276 elif ctype.startswith('content-type: '): 277 self._content_type = ctype[14:] 278 279 if self._filename is not None: 280 # cleanup filename from IE full paths: 281 self._filename = self._filename[self._filename.rfind("\\")+1:].strip() 282 283 if self._filename: # ignore files without filenames 284 if self._file_upload_dir: 285 try: 286 self._file = self.TemporaryFile(dir=self._file_upload_dir) 287 except: 288 raise MultiPartParserError("Failed to create temporary file.") 289 else: 290 self._file = StringIO() 291 else: 292 self._file = None 293 self._filesize = 0 294 self._state = 'FILE' 295 else: 296 self._field = StringIO() 297 self._state = 'FIELD' 298 next = header_end + 4 299 300 elif self._state == 'FIELD': 301 # In a field, collect data until a boundary is found. 302 303 self._field.write(data[start:end]) 304 if boundary: 305 if self._fieldname: 306 self._post.appendlist(self._fieldname, self._field.getvalue()) 307 self._field.close() 308 self._state = 'HEADER' 309 310 elif self._state == 'FILE': 311 # In a file, collect data until a boundary is found. 312 313 if self._file: 314 try: 315 self._file.write(data[start:end]) 316 except IOError, e: 317 raise MultiPartParserError("Failed to write to temporary file.") 318 self._filesize += end-start 319 320 if self._progress_filename: 321 f = open(os.path.join(self._file_upload_dir, self._progress_filename), 'w') 322 pickle.dump({ 'received': self._received, 'size': self._size }, f) 323 f.close() 324 325 if boundary: 326 if self._file: 327 if self._file_upload_dir: 328 self._file.seek(0) 329 file = self.LazyContent({ 330 'filename': self._filename, 331 'content-type': self._content_type, 332 # 'content': is read on demand 333 'content-length': self._filesize, 334 'tmpfilename': self._file.name, 335 'tmpfile': self._file 336 }) 337 else: 338 file = { 339 'filename': self._filename, 340 'content-type': self._content_type, 341 'content': self._file.getvalue(), 342 'content-length': self._filesize 343 } 344 self._file.close() 345 346 self._files.appendlist(self._fieldname, file) 347 348 self._state = 'HEADER' 349 350 start = next 351 352 self._partial = data[start:] 353 354 return read_size 355 356 def parse_header(self, line): 357 from cgi import parse_header 358 return parse_header(line) 359 360 76 361 class QueryDict(MultiValueDict): 77 362 """A specialized MultiValueDict that takes a query string when initialized. 78 363 This is immutable unless you create a copy of it.""" … … 302 587 if not host: 303 588 host = request.META.get('HTTP_HOST', '') 304 589 return host 590 -
django/oldforms/__init__.py
661 661 self.validator_list = [self.isNonEmptyFile] + validator_list 662 662 663 663 def isNonEmptyFile(self, field_data, all_data): 664 try:665 content = field_data['content']666 except TypeError:664 if field_data.has_key('_file_upload_error'): 665 raise validators.CriticalValidationError, field_data['_file_upload_error'] 666 if not field_data.has_key('filename'): 667 667 raise validators.CriticalValidationError, gettext("No file was submitted. Check the encoding type on the form.") 668 if not content:668 if not field_data['content-length']: 669 669 raise validators.CriticalValidationError, gettext("The submitted file is empty.") 670 670 671 671 def render(self, data): 672 672 return '<input type="file" id="%s" class="v%s" name="%s" />' % \ 673 673 (self.get_id(), self.__class__.__name__, self.field_name) 674 674 675 def prepare(self, new_data): 676 if new_data.has_key('_file_upload_error'): 677 # pretend we got something in the field to raise a validation error later 678 new_data[self.field_name] = { '_file_upload_error': new_data['_file_upload_error'] } 679 675 680 def html2python(data): 676 681 if data is None: 677 682 raise EmptyValue -
django/db/models/base.py
321 321 def _get_FIELD_size(self, field): 322 322 return os.path.getsize(self._get_FIELD_filename(field)) 323 323 324 def _save_FIELD_file(self, field, filename, raw_ contents):324 def _save_FIELD_file(self, field, filename, raw_field): 325 325 directory = field.get_directory_name() 326 326 try: # Create the date-based directory if it doesn't exist. 327 327 os.makedirs(os.path.join(settings.MEDIA_ROOT, directory)) … … 343 343 setattr(self, field.attname, filename) 344 344 345 345 full_filename = self._get_FIELD_filename(field) 346 fp = open(full_filename, 'wb') 347 fp.write(raw_contents) 348 fp.close() 346 if raw_field.has_key('tmpfilename'): 347 raw_field['tmpfile'].close() 348 os.rename(raw_field['tmpfilename'], full_filename) 349 else: 350 fp = open(full_filename, 'wb') 351 fp.write(raw_field['content']) 352 fp.close() 349 353 350 354 # Save the width and/or height, if applicable. 351 355 if isinstance(field, ImageField) and (field.width_field or field.height_field): -
django/db/models/fields/__init__.py
625 625 setattr(cls, 'get_%s_filename' % self.name, curry(cls._get_FIELD_filename, field=self)) 626 626 setattr(cls, 'get_%s_url' % self.name, curry(cls._get_FIELD_url, field=self)) 627 627 setattr(cls, 'get_%s_size' % self.name, curry(cls._get_FIELD_size, field=self)) 628 setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_ contents: instance._save_FIELD_file(self, filename, raw_contents))628 setattr(cls, 'save_%s_file' % self.name, lambda instance, filename, raw_field: instance._save_FIELD_file(self, filename, raw_field)) 629 629 dispatcher.connect(self.delete_file, signal=signals.post_delete, sender=cls) 630 630 631 631 def delete_file(self, instance): … … 648 648 if new_data.get(upload_field_name, False): 649 649 func = getattr(new_object, 'save_%s_file' % self.name) 650 650 if rel: 651 func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0] ["content"])651 func(new_data[upload_field_name][0]["filename"], new_data[upload_field_name][0]) 652 652 else: 653 func(new_data[upload_field_name]["filename"], new_data[upload_field_name] ["content"])653 func(new_data[upload_field_name]["filename"], new_data[upload_field_name]) 654 654 655 655 def get_directory_name(self): 656 656 return os.path.normpath(datetime.datetime.now().strftime(self.upload_to)) -
django/core/handlers/wsgi.py
111 111 if self.environ.get('CONTENT_TYPE', '').startswith('multipart'): 112 112 header_dict = dict([(k, v) for k, v in self.environ.items() if k.startswith('HTTP_')]) 113 113 header_dict['Content-Type'] = self.environ.get('CONTENT_TYPE', '') 114 self._post, self._files = http.parse_file_upload(header_dict, self.raw_post_data) 114 header_dict['Content-Length'] = self.environ.get('CONTENT_LENGTH', '') 115 header_dict['X-Progress-ID'] = self.environ.get('HTTP_X_PROGRESS_ID', '') 116 try: 117 self._post, self._files = http.parse_file_upload(header_dict, self.environ['wsgi.input']) 118 except: 119 self._post, self._files = {}, {} # make sure we dont read the input stream again 120 raise 121 self._raw_post_data = None # raw data is not available for streamed multipart messages 115 122 else: 116 123 self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict() 117 124 else: -
django/core/handlers/modpython.py
47 47 def _load_post_and_files(self): 48 48 "Populates self._post and self._files" 49 49 if self._req.headers_in.has_key('content-type') and self._req.headers_in['content-type'].startswith('multipart'): 50 self._post, self._files = http.parse_file_upload(self._req.headers_in, self.raw_post_data) 50 self._raw_post_data = None # raw data is not available for streamed multipart messages 51 try: 52 self._post, self._files = http.parse_file_upload(self._req.headers_in, self._req) 53 except: 54 self._post, self._files = {}, {} # make sure we dont read the input stream again 55 raise 51 56 else: 52 57 self._post, self._files = http.QueryDict(self.raw_post_data), datastructures.MultiValueDict() 53 58 -
tests/modeltests/test_client/views.py
22 22 23 23 return HttpResponse(t.render(c)) 24 24 25 def post_file_view(request): 26 "A view that expects a multipart post and returns a file in the context" 27 t = Template('File {{ file.filename }} received', name='POST Template') 28 c = Context({'file': request.FILES['file_file']}) 29 return HttpResponse(t.render(c)) 30 25 31 def redirect_view(request): 26 32 "A view that redirects all requests to the GET view" 27 33 return HttpResponseRedirect('/test_client/get_view/') … … 32 38 c = Context({'user': request.user}) 33 39 34 40 return HttpResponse(t.render(c)) 35 login_protected_view = login_required(login_protected_view) 36 No newline at end of file 41 login_protected_view = login_required(login_protected_view) -
tests/modeltests/test_client/models.py
66 66 self.assertEqual(response.template.name, 'POST Template') 67 67 self.failUnless('Data received' in response.content) 68 68 69 def test_post_file_view(self): 70 "POST this python file to a view" 71 import os, tempfile 72 from django.conf import settings 73 file = __file__.replace('.pyc', '.py') 74 for upload_dir in [None, tempfile.gettempdir()]: 75 settings.FILE_UPLOAD_DIR = upload_dir 76 post_data = { 'name': file, 'file': open(file) } 77 response = self.client.post('/test_client/post_file_view/', post_data) 78 self.failUnless('models.py' in response.context['file']['filename']) 79 self.failUnless(len(response.context['file']['content']) == os.path.getsize(file)) 80 if upload_dir: 81 self.failUnless(response.context['file']['tmpfilename']) 82 69 83 def test_redirect(self): 70 84 "GET a URL that redirects elsewhere" 71 85 response = self.client.get('/test_client/redirect_view/') -
tests/modeltests/test_client/urls.py
4 4 urlpatterns = patterns('', 5 5 (r'^get_view/$', views.get_view), 6 6 (r'^post_view/$', views.post_view), 7 (r'^post_file_view/$', views.post_file_view), 7 8 (r'^redirect_view/$', views.redirect_view), 8 9 (r'^login_protected_view/$', views.login_protected_view), 9 10 ) -
docs/request_response.txt
72 72 ``FILES`` 73 73 A dictionary-like object containing all uploaded files. Each key in 74 74 ``FILES`` is the ``name`` from the ``<input type="file" name="" />``. Each 75 value in ``FILES`` is a standard Python dictionary with the following three75 value in ``FILES`` is a standard Python dictionary with the following four 76 76 keys: 77 77 78 78 * ``filename`` -- The name of the uploaded file, as a Python string. 79 79 * ``content-type`` -- The content type of the uploaded file. 80 80 * ``content`` -- The raw content of the uploaded file. 81 * ``content-length`` -- The length of the content in bytes. 81 82 83 If streaming file uploads are enabled two additional keys 84 describing the uploaded file will be present: 85 86 * ``tmpfilename`` -- The filename for the temporary file. 87 * ``tmpfile`` -- An open file object for the temporary file. 88 89 The temporary file will be removed when the request finishes. 90 91 Note that accessing ``content`` when streaming uploads are enabled 92 will read the whole file into memory which may not be what you want. 93 82 94 Note that ``FILES`` will only contain data if the request method was POST 83 95 and the ``<form>`` that posted to the request had 84 96 ``enctype="multipart/form-data"``. Otherwise, ``FILES`` will be a blank -
docs/settings.txt
409 409 or ``django.core.mail.mail_managers``. You'll probably want to include the 410 410 trailing space. 411 411 412 FILE_UPLOAD_DIR 413 --------------- 414 415 Default: Not defined 416 417 Path to a directory where temporary files should be written during 418 file uploads. Leaving this unset will read files into memory. 419 420 FILE_UPLOAD_MIN_SIZE 421 -------------------- 422 423 Default: 100000 424 425 An integer specifying the minimum number of bytes that has to be 426 received for file upload streaming to take place. Any request smaller 427 than this will be handled in memory. Note: ``FILE_UPLOAD_DIR`` has to 428 be defined to enable streaming. 429 412 430 IGNORABLE_404_ENDS 413 431 ------------------ 414 432 -
docs/forms.txt
454 454 new_data = request.POST.copy() 455 455 new_data.update(request.FILES) 456 456 457 Streaming file uploads. 458 ----------------------- 459 460 File uploads will be read into memory by default. This works fine for 461 small to medium sized uploads (from 1MB to 100MB depending on your 462 setup and usage). If you want to support larger uploads you can enable 463 upload streaming where only a small part of the file will be in memory 464 at any time. To do this you need to specify the ``FILE_UPLOAD_DIR`` 465 setting (see the settings_ document for more details). 466 467 See `request object`_ for more details about ``request.FILES`` objects 468 with streaming file uploads enabled. 469 457 470 Validators 458 471 ========== 459 472 … … 668 681 .. _`generic views`: ../generic_views/ 669 682 .. _`models API`: ../model_api/ 670 683 .. _settings: ../settings/ 684 .. _request object: ../request_response/#httprequest-objects