The following Python script fixes this problem. It takes your Microsoft Expression Web directory tree and copies it to another location, testing HTML files for the UTF-8 BOM and removing them if present. It knows when to not copy files which are unchanged, so it is fast to run just before you upload your changes. You might also be interested in RemoveBOM v2.
No guarantees or support are given with this code. Enjoy!
# RemoveBOM
# Clones a directory structure but removing the BOM from UTF-8 files
# (C) 2007 Niall Douglas
# 23rd April 2007
import sys, os, shutil
def enumeratedir(path):
ret={}
for root, dirs, files in os.walk(path, False):
for f in files:
ret[os.path.join(root, f)]=(1, os.stat(os.path.join(root, f)))
for f in dirs:
ret[os.path.join(root, f)]=(2, os.stat(os.path.join(root, f)))
return ret
def replaceroot(root, path, pathroot):
return os.path.join(root, path[-(len(path)-len(pathroot)-1):])
def ensuredir(path):
if os.path.exists(path): return
ensuredir(os.path.dirname(path))
print 'Making directory',path
os.mkdir(path)
UTF8BOM=chr(0xef)+chr(0xbb)+chr(0xbf)
try:
indir=sys.argv[1]
outdir=sys.argv[2]
except:
indir="public_html"
outdir="public_html_bomfixed"
if not indir or not outdir: raise Exception, "Missing input or output dirs"
indircontents=enumeratedir(indir)
outdircontents=enumeratedir(outdir)
for path, st in outdircontents.iteritems():
ipath=replaceroot(indir, path, outdir)
if not indircontents.has_key(ipath):
print 'Deleting',path
if os.path.isfile(path) or os.path.islink(path):
os.remove(path)
elif os.path.isdir(path):
os.rmdir(path)
for path, st in indircontents.iteritems():
if st[0]==1:
f2path=replaceroot(outdir, path, indir)
if outdircontents.has_key(f2path) and \
abs(st[1].st_atime-outdircontents[f2path][1].st_atime)<2 and \
abs(st[1].st_mtime-outdircontents[f2path][1].st_mtime)<2:
# Unchanged
pass
else:
#print abs(st[1].st_atime-outdircontents[f2path][1].st_atime),abs(st[1].st_mtime-outdircontents[f2path][1].st_mtime)
done=False
if path[-5:]=='.html':
#print 'Looking at',path
f=open(path, 'rb')
try:
data=f.read(3)
if data==UTF8BOM: # It's the UTF-8 BOM
print 'File',path,'changed, removing UTF-8 BOM to',f2path
ensuredir(os.path.dirname(f2path))
f2=open(f2path, 'wb')
try:
data=f.read()
f2.write(data)
finally:
f2.close()
done=True
finally:
f.close()
if done:
shutil.copymode(path, f2path)
shutil.copystat(path, f2path)
if not done:
print 'File',path,'changed, copying to',f2path
ensuredir(os.path.dirname(f2path))
shutil.copy2(path, f2path)
print "All up to date!"