This page has been accessed 1,200 times since the 21st August 2007.
| View this page in: |
English |
Any language: |
Chinese |
French |
German |
Japanese |
Portuguese |
Russian |
Spanish |
Translation to non-English languages provided by Google Language
|
|
The following Python script fixes this problem and is an enhancement of RemoveBOM v1. It takes your Microsoft Expression Web directory tree and copies it to another location, testing HTML files for the UTF-8 BOM and replacing that with the above php header rewriting code. It knows when to not copy files which are unchanged, so it is fast to run just before you upload your changes. It also rewrites the header via php such that HTTP Last-Modified is set to the last modified time of the php-containing html file which ensures that a HTTP 302 Not Modified response is given by Apache should the web browser send a "send if modified since X" request (which most do), thus greatly lowering bandwidth costs and indeed server load thanks to idiotic spider robots. Lastly, it also uses PHP output buffering to determine a correct Content-Length header and enables zlib compression should the source file exceed 64Kb - this adds latency for the compression and decompression, but halves or quarters the amount of data needing to be transmitted. No guarantees or support are given with this code. Enjoy! # RemoveBOM v2.0
# Clones a directory structure but removing the BOM from UTF-8 files
# (C) 2007 Niall Douglas
# 23rd April 2007 (last modified: 21st August 2007)
import sys, os, shutil
def enumeratedir(path):
ret={}
for root, dirs, files in os.walk(path, False):
for f in files:
ret[os.path.join(root, f)]=(1, os.stat(os.path.join(root, f)))
for f in dirs:
ret[os.path.join(root, f)]=(2, os.stat(os.path.join(root, f)))
return ret
def replaceroot(root, path, pathroot):
return os.path.join(root, path[-(len(path)-len(pathroot)-1):])
def ensuredir(path):
if os.path.exists(path): return
ensuredir(os.path.dirname(path))
print 'Making directory',path
os.mkdir(path)
UTF8BOM=chr(0xef)+chr(0xbb)+chr(0xbf)
try:
indir=sys.argv[1]
outdir=sys.argv[2]
except:
indir="public_html"
outdir="public_html_bomfixed"
if not indir or not outdir: raise Exception, "Missing input or output dirs"
indircontents=enumeratedir(indir)
outdircontents=enumeratedir(outdir)
for path, st in outdircontents.iteritems():
ipath=replaceroot(indir, path, outdir)
if not indircontents.has_key(ipath):
print 'Deleting',path
if os.path.isfile(path) or os.path.islink(path):
os.remove(path)
elif os.path.isdir(path):
os.rmdir(path)
for path, st in indircontents.iteritems():
if st[0]==1:
f2path=replaceroot(outdir, path, indir)
if outdircontents.has_key(f2path) and \
abs(st[1].st_mtime-outdircontents[f2path][1].st_mtime)<2:
#abs(st[1].st_atime-outdircontents[f2path][1].st_atime)<2 and \
# Unchanged
pass
else:
#print abs(st[1].st_atime-outdircontents[f2path][1].st_atime),abs(st[1].st_mtime-outdircontents[f2path][1].st_mtime)
done=False
if path[-5:]=='.html' and not '_vti_cnf' in path:
#print 'Looking at',path
f=open(path, 'rb')
try:
ensuredir(os.path.dirname(f2path))
f2=open(f2path, 'wb')
try:
data=f.read()
isUTF8=data.startswith(UTF8BOM)
if isUTF8: # It's the UTF-8 BOM
data=data[3:]
if not data.startswith('<?php'):
# Get PHP to mark server side that this is UTF-8 html
f2.write('<?php ')
if isUTF8:
print 'File',path,'changed, removing UTF-8 BOM to',f2path
f2.write('header("Content-type: text/html; charset=utf-8");\r\n');
else:
print 'File',path,'changed, modifying to',f2path
f2.write('header("Last-Modified: " . gmdate("D, d M Y H:i:s", getlastmod()) . " GMT");\r\n')
f2.write('ob_start();\r\n')
f2.write('$gzipthis = (filesize(__FILE__)>65535);\r\n')
f2.write('if($gzipthis) ob_start("ob_gzhandler");\r\n')
f2.write('?>\r\n')
f2.write(data)
f2.write('<?php ')
f2.write('if($gzipthis) ob_end_flush();\r\n')
f2.write('header("Content-Length: " . ob_get_length());\r\n')
f2.write('ob_end_flush();\r\n')
f2.write('?>\r\n')
else:
f2.write(data)
finally:
f2.close()
done=True
finally:
f.close()
if done:
shutil.copymode(path, f2path)
shutil.copystat(path, f2path)
if not done:
print 'File',path,'changed, copying to',f2path
ensuredir(os.path.dirname(f2path))
shutil.copy2(path, f2path)
print "All up to date!"
|