Skip to content

Instantly share code, notes, and snippets.

@chaomai
Created September 8, 2012 09:19
Show Gist options
  • Save chaomai/3672990 to your computer and use it in GitHub Desktop.
Save chaomai/3672990 to your computer and use it in GitHub Desktop.
Python 3: convert files to UTF8
#convert files to UTF8 encoding
#tested under Python3
from chardet.universaldetector import UniversalDetector
import os
import os.path
def convert_to_uft8(filename, filetype, out_enc='utf-8'):
(filepath, name) = os.path.split(filename)
if filetype == os.path.splitext(name)[1]:
try:
f = open(filename, 'rb')
b = b' '
b += f.read(1024)
u = UniversalDetector()
u.reset()
u.feed(b)
u.close()
f.seek(0)
b = f.read()
f.close()
in_enc = u.result['encoding']
if 'ascii' == in_enc:
print('Ignore:' + filename + ' is ascii')
elif 'utf-8' == in_enc:
print('Ignore:' + filename + ' is utf-8')
else:
new_content = b.decode(in_enc, 'ignore')
f = open(filename, 'w', encoding=out_enc)
f.write(new_content)
f.close()
print('Success:' + filename + ' converted from ' + in_enc + ' to ' + out_enc)
except IOError:
print('Error:' + filename + ' fail to converted from ' + in_enc + ' to ' + out_enc)
finally:
f.close()
else:
return
def find_and_operate(sou_dir, filetype, isloop_subdir = True):
if '' == filetype:
print('filetype is not defined')
print('Quit')
return
else:
for item in os.listdir(sou_dir):
subdir = os.path.join(sou_dir, item)
if os.path.isfile(subdir):
convert_to_uft8(subdir, filetype,'utf-8')
else:
if isloop_subdir:
find_and_operate(subdir, filetype, isloop_subdir)
else:
pass
def main():
directory = input('directory:')
flag = input('is loop sub dir(t or f, Default is True):')
if (flag == '') | (flag == 't'):
isloop_subdir = True
else:
isloop_subdir = False
filetype = input('file suffix(must defined):')
find_and_operate(directory, filetype, isloop_subdir)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment