changeset 60:edc42ee97c41

- handle encodings other than ASCII / UTF-8 gracefully -- if a string can't be decoded, assume it uses an 8-bit single-byte encoding when determining the number of characters in the string.
author Mark Edgington <edgimar@gmail.com>
date Wed, 05 May 2010 22:35:52 +0200
parents 648a08c398a1
children ee97b8215eaa
files crecord/chunk_selector.py
diffstat 1 files changed, 8 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/crecord/chunk_selector.py
+++ b/crecord/chunk_selector.py
@@ -6,8 +6,10 @@
 demandimport.ignore.append('mercurial.encoding')
 try:
     import mercurial.encoding as encoding
+    code = encoding.encoding
 except ImportError:
     encoding = util
+    code = encoding._encoding
 
 import os
 import re
@@ -16,7 +18,6 @@
 import struct
 import termios
 import signal
-import locale
 
 from crpatch import Patch, header, hunk, HunkLine
 
@@ -36,10 +37,6 @@
     raise util.Abort(_('the python curses/wcurses module is not available/installed'))
     
 
-# deal with unicode correctly
-locale.setlocale(locale.LC_ALL, '')
-code = locale.getpreferredencoding()
-
 orig_stdout = sys.__stdout__ # used by gethw()
 
 def gethw():
@@ -399,7 +396,12 @@
         width = self.xScreenSize
         # turn tabs into spaces
         inStr = inStr.expandtabs(4)
-        strLen = len(unicode(encoding.fromlocal(inStr), code))
+        try:
+            strLen = len(unicode(encoding.fromlocal(inStr), code))
+        except:
+            # if text is not utf8, then assume an 8-bit single-byte encoding.
+            strLen = len(inStr)
+
         numSpaces = (width - ((strLen + xStart) % width) - 1)
         return inStr + " " * numSpaces + "\n"