[hwasan] support python3 in hwasan_sanitize

Verified no diff exist between previous version, new version python 2, and python 3 for an example stack.

Reviewed By: eugenis

Differential Revision: https://reviews.llvm.org/D114404

GitOrigin-RevId: 26d1edfb105bdc857733c3bb8697a9f73828bde7
diff --git a/lib/hwasan/scripts/hwasan_symbolize b/lib/hwasan/scripts/hwasan_symbolize
index f4c946e..e0a6554 100755
--- a/lib/hwasan/scripts/hwasan_symbolize
+++ b/lib/hwasan/scripts/hwasan_symbolize
@@ -10,6 +10,10 @@
 # HWAddressSanitizer offline symbolization script.
 #
 #===------------------------------------------------------------------------===#
+
+from __future__ import print_function
+from __future__ import unicode_literals
+
 import glob
 import os
 import re
@@ -18,6 +22,12 @@
 import subprocess
 import argparse
 
+if sys.version_info.major < 3:
+  # Simulate Python 3.x behaviour of defaulting to UTF-8 for print. This is
+  # important in case any symbols are non-ASCII.
+  import codecs
+  sys.stdout = codecs.getwriter("utf-8")(sys.stdout)
+
 last_access_address = None
 last_access_tag = None
 
@@ -35,21 +45,26 @@
 
   def __open_pipe(self):
     if not self.__pipe:
+      opt = {}
+      if sys.version_info.major > 2:
+        opt['encoding'] = 'utf-8'
       self.__pipe = subprocess.Popen([self.__path, "--inlining", "--functions"],
-                                     stdin=subprocess.PIPE, stdout=subprocess.PIPE)
+                                     stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+                                     **opt)
 
-  class __EOF:
+  class __EOF(Exception):
     pass
 
   def __write(self, s):
-    print >>self.__pipe.stdin, s
+    print(s, file=self.__pipe.stdin)
+    self.__pipe.stdin.flush()
     if self.__log:
-      print >>sys.stderr, ("#>>  |%s|" % (s,))
+      print("#>>  |%s|" % (s,), file=sys.stderr)
 
   def __read(self):
     s = self.__pipe.stdout.readline().rstrip()
     if self.__log:
-      print >>sys.stderr, ("# << |%s|" % (s,))
+      print("# << |%s|" % (s,), file=sys.stderr)
     if s == '':
       raise Symbolizer.__EOF
     return s
@@ -75,7 +90,7 @@
       if os.path.exists(full_path):
         return full_path
     if name not in self.__warnings:
-      print >>sys.stderr, "Could not find symbols for", name
+      print("Could not find symbols for", name, file=sys.stderr)
       self.__warnings.add(name)
     return None
 
@@ -128,16 +143,16 @@
     frames = list(symbolizer.iter_call_stack(binary, addr))
 
     if len(frames) > 0:
-      print "%s#%s%s%s in %s" % (match.group(1).encode('utf-8'), match.group(2).encode('utf-8'),
-                                 match.group(3).encode('utf-8'), frames[0][0], frames[0][1])
+      print("%s#%s%s%s in %s" % (match.group(1), match.group(2),
+                                 match.group(3), frames[0][0], frames[0][1]))
       for i in range(1, len(frames)):
         space1 = ' ' * match.end(1)
         space2 = ' ' * (match.start(4) - match.end(1) - 2)
-        print "%s->%s%s in %s" % (space1, space2, frames[i][0], frames[i][1])
+        print("%s->%s%s in %s" % (space1, space2, frames[i][0], frames[i][1]))
     else:
-      print line.rstrip().encode('utf-8')
+      print(line.rstrip())
   else:
-    print line.rstrip().encode('utf-8')
+    print(line.rstrip())
 
 def save_access_address(line):
   global last_access_address, last_access_tag
@@ -177,10 +192,10 @@
       tag_offset = local[5]
       if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag):
         continue
-      print ''
-      print 'Potentially referenced stack object:'
-      print '  %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0])
-      print '  at %s' % (local[1],)
+      print('')
+      print('Potentially referenced stack object:')
+      print('  %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]))
+      print('  at %s' % (local[1],))
     return True
   return False
 
@@ -204,7 +219,7 @@
 
 for p in binary_prefixes:
   if not os.path.isdir(p):
-    print >>sys.stderr, "Symbols path does not exist or is not a directory:", p
+    print("Symbols path does not exist or is not a directory:", p, file=sys.stderr)
     sys.exit(1)
 
 # Source location.
@@ -262,24 +277,25 @@
       break
 
 if not os.path.exists(symbolizer_path):
-  print >>sys.stderr, "Symbolizer path does not exist:", symbolizer_path
+  print("Symbolizer path does not exist:", symbolizer_path, file=sys.stderr)
   sys.exit(1)
 
 if args.v:
-  print "Looking for symbols in:"
+  print("Looking for symbols in:")
   for s in binary_prefixes:
-    print "  %s" % (s,)
-  print "Stripping source path prefixes:"
+    print("  %s" % (s,))
+  print("Stripping source path prefixes:")
   for s in paths_to_cut:
-    print "  %s" % (s,)
-  print "Using llvm-symbolizer binary in:\n  %s" % (symbolizer_path,)
-  print
+    print("  %s" % (s,))
+  print("Using llvm-symbolizer binary in:\n  %s" % (symbolizer_path,))
+  print()
 
 symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut)
 symbolizer.enable_logging(args.d)
 
 for line in sys.stdin:
-  line = line.decode('utf-8')
+  if sys.version_info.major < 3:
+    line = line.decode('utf-8')
   save_access_address(line)
   if process_stack_history(line, symbolizer, ignore_tags=args.ignore_tags):
     continue