diff mercurial/mail.py @ 39118:7b12a2d2eedc

py3: ditch email.parser.BytesParser which appears to be plain crap As I said before, BytesParser is a thin wrapper over the unicode Parser, and it's too thin to return bytes back. Today, I found it does normalize newline characters to '\n's thanks to the careless use of TextIOWrapper. So, this patch replaces BytesParser with Parser + TextIOWrapper, and fix newline handling. Since I don't know what's the least bad encoding strategy here, I just copied it from BytesParser. I've moved new parse() function from pycompat, as it is no longer a trivial wrapper.
author Yuya Nishihara <yuya@tcha.org>
date Sat, 16 Jun 2018 19:31:07 +0900 (2018-06-16)
parents 7edf68862fe3
children 858fe9625dab
line wrap: on
line diff
--- a/mercurial/mail.py
+++ b/mercurial/mail.py
@@ -11,6 +11,8 @@
 import email.charset
 import email.header
 import email.message
+import email.parser
+import io
 import os
 import smtplib
 import socket
@@ -322,6 +324,23 @@
         s, cs = _encode(ui, s, charsets)
     return mimetextqp(s, 'plain', cs)
 
+if pycompat.ispy3:
+    def parse(fp):
+        ep = email.parser.Parser()
+        # disable the "universal newlines" mode, which isn't binary safe.
+        # I have no idea if ascii/surrogateescape is correct, but that's
+        # what the standard Python email parser does.
+        fp = io.TextIOWrapper(fp, encoding=r'ascii',
+                              errors=r'surrogateescape', newline=chr(10))
+        try:
+            return ep.parse(fp)
+        finally:
+            fp.detach()
+else:
+    def parse(fp):
+        ep = email.parser.Parser()
+        return ep.parse(fp)
+
 def headdecode(s):
     '''Decodes RFC-2047 header'''
     uparts = []