OLD | NEW |
(Empty) | |
| 1 #! /usr/bin/python |
| 2 |
| 3 # Copyright (C) 2009-2010, International Business Machines Corporation, Google a
nd Others. |
| 4 # All rights reserved. |
| 5 |
| 6 # |
| 7 # Script to check and fix svn property settings for ICU source files. |
| 8 # Also check for the correct line endings on files with svn:eol-style = native |
| 9 # |
| 10 # THIS SCRIPT DOES NOT WORK ON WINDOWS |
| 11 # It only works correctly on platforms where the native line ending is a pla
in \n |
| 12 # |
| 13 # usage: |
| 14 # icu-svnprops-check.py [options] |
| 15 # |
| 16 # options: |
| 17 # -f | --fix Fix any problems that are found |
| 18 # -h | --help Print a usage line and exit. |
| 19 # |
| 20 # The tool operates recursively on the directory from which it is run. |
| 21 # Only files from the svn repository are checked. |
| 22 # No changes are made to the repository; only the working copy will be altered. |
| 23 |
| 24 import sys |
| 25 import os |
| 26 import os.path |
| 27 import re |
| 28 import getopt |
| 29 |
| 30 # |
| 31 # svn autoprops definitions. |
| 32 # Copy and paste here the ICU recommended auto-props from |
| 33 # http://icu-project.org/docs/subversion_howto/index.html |
| 34 # |
| 35 # This program will parse this autoprops string, and verify that files in |
| 36 # the repository have the recommeded properties set. |
| 37 # |
| 38 svn_auto_props = """ |
| 39 ### Section for configuring automatic properties. |
| 40 [auto-props] |
| 41 ### The format of the entries is: |
| 42 ### file-name-pattern = propname[=value][;propname[=value]...] |
| 43 ### The file-name-pattern can contain wildcards (such as '*' and |
| 44 ### '?'). All entries which match will be applied to the file. |
| 45 ### Note that auto-props functionality must be enabled, which |
| 46 ### is typically done by setting the 'enable-auto-props' option. |
| 47 *.c = svn:eol-style=native |
| 48 *.cc = svn:eol-style=native |
| 49 *.cpp = svn:eol-style=native |
| 50 *.h = svn:eol-style=native |
| 51 *.rc = svn:eol-style=native |
| 52 *.dsp = svn:eol-style=native |
| 53 *.dsw = svn:eol-style=native |
| 54 *.sln = svn:eol-style=native |
| 55 *.vcproj = svn:eol-style=native |
| 56 configure = svn:eol-style=native;svn:executable |
| 57 *.sh = svn:eol-style=native;svn:executable |
| 58 *.pl = svn:eol-style=native;svn:executable |
| 59 *.py = svn:eol-style=native;svn:executable |
| 60 *.txt = svn:mime-type=text/plain;svn:eol-style=native |
| 61 *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8 |
| 62 *.ucm = svn:eol-style=native |
| 63 *.html = svn:eol-style=native;svn:mime-type=text/html |
| 64 *.htm = svn:eol-style=native;svn:mime-type=text/html |
| 65 *.xml = svn:eol-style=native |
| 66 Makefile = svn:eol-style=native |
| 67 *.in = svn:eol-style=native |
| 68 *.mak = svn:eol-style=native |
| 69 *.mk = svn:eol-style=native |
| 70 *.png = svn:mime-type=image/png |
| 71 *.jpeg = svn:mime-type=image/jpeg |
| 72 *.jpg = svn:mime-type=image/jpeg |
| 73 *.bin = svn:mime-type=application/octet-stream |
| 74 *.brk = svn:mime-type=application/octet-stream |
| 75 *.cnv = svn:mime-type=application/octet-stream |
| 76 *.dat = svn:mime-type=application/octet-stream |
| 77 *.icu = svn:mime-type=application/octet-stream |
| 78 *.res = svn:mime-type=application/octet-stream |
| 79 *.spp = svn:mime-type=application/octet-stream |
| 80 # new additions 2007-dec-5 srl |
| 81 *.rtf = mime-type=text/rtf |
| 82 *.pdf = mime-type=application/pdf |
| 83 # changed 2008-04-08: modified .txt, above, adding mime-type |
| 84 # changed 2010-11-09: modified .java, adding mime-type |
| 85 # Note: The escape syntax for semicolon (";;") is supported since subversion 1.6
.1 |
| 86 """ |
| 87 |
| 88 |
| 89 # file_types: The parsed form of the svn auto-props specification. |
| 90 # A list of file types - .cc, .cpp, .txt, etc. |
| 91 # each element is a [type, proplist] |
| 92 # "type" is a regular expression string that will match a file name |
| 93 # prop list is another list, one element per property. |
| 94 # Each property item is a two element list, [prop name, prop value] |
| 95 file_types = list() |
| 96 |
| 97 def parse_auto_props(): |
| 98 aprops = svn_auto_props.splitlines() |
| 99 for propline in aprops: |
| 100 if re.match("\s*(#.*)?$", propline): # Match comment and blank l
ines |
| 101 continue |
| 102 if re.match("\s*\[auto-props\]", propline): # Match the [auto-props] li
ne. |
| 103 continue |
| 104 if not re.match("\s*[^\s]+\s*=", propline): # minimal syntax check for
<file-type> = |
| 105 print "Bad line from autoprops definitions: " + propline |
| 106 continue |
| 107 file_type, string_proplist = propline.split("=", 1) |
| 108 |
| 109 #transform the file type expression from autoprops into a normal regular
expression. |
| 110 # e.g. "*.cpp" ==> ".*\.cpp$" |
| 111 file_type = file_type.strip() |
| 112 file_type = file_type.replace(".", "\.") |
| 113 file_type = file_type.replace("*", ".*") |
| 114 file_type = file_type + "$" |
| 115 |
| 116 # example string_proplist at this point: " svn:eol-style=native;svn:exec
utable" |
| 117 # split on ';' into a list of properties. The negative lookahead and lo
okbehind |
| 118 # in the split regexp are to prevent matching on ';;', which is an escap
ed ';' |
| 119 # within a property value. |
| 120 string_proplist = re.split("(?<!;);(?!;)", string_proplist) |
| 121 proplist = list() |
| 122 for prop in string_proplist: |
| 123 if prop.find("=") >= 0: |
| 124 prop_name, prop_val = prop.split("=", 1) |
| 125 else: |
| 126 # properties with no explicit value, e.g. svn:executable |
| 127 prop_name, prop_val = prop, "" |
| 128 prop_name = prop_name.strip() |
| 129 prop_val = prop_val.strip() |
| 130 # unescape any ";;" in a property value, e.g. the mime-type from |
| 131 # *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset
=utf-8 |
| 132 prop_val = prop_val.replace(";;", ";"); |
| 133 proplist.append((prop_name, prop_val)) |
| 134 |
| 135 file_types.append((file_type, proplist)) |
| 136 # print file_types |
| 137 |
| 138 |
| 139 def runCommand(cmd): |
| 140 output_file = os.popen(cmd); |
| 141 output_text = output_file.read(); |
| 142 exit_status = output_file.close(); |
| 143 if exit_status: |
| 144 print >>sys.stderr, '"', cmd, '" failed. Exiting.' |
| 145 sys.exit(exit_status) |
| 146 return output_text |
| 147 |
| 148 |
| 149 def usage(): |
| 150 print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]" |
| 151 |
| 152 |
| 153 # |
| 154 # UTF-8 file check. For text files, add a charset to the mime-type if their c
ontents are UTF-8 |
| 155 # file_name: name of a text file. |
| 156 # base_mime_type: svn:mime-type property value from the auto-props file (no
charset= part) |
| 157 # actual_mime_type: existing svn:mime-type property value for the file. |
| 158 # return: svn:mime-type property value, with charset added when app
ropriate. |
| 159 # |
| 160 def check_utf8(file_name, base_mime_type, actual_mime_type): |
| 161 |
| 162 # If the file already has a charset in its mime-type, don't make any change. |
| 163 |
| 164 if actual_mime_type.find("charset=") > 0: |
| 165 return actual_mime_type; |
| 166 |
| 167 f = open(file_name, 'r') |
| 168 bytes = f.read() |
| 169 f.close() |
| 170 |
| 171 if all(ord(byte) < 128 for byte in bytes): |
| 172 # pure ASCII. |
| 173 # print "Pure ASCII " + file_name |
| 174 return base_mime_type |
| 175 |
| 176 try: |
| 177 bytes.decode("UTF-8") |
| 178 except UnicodeDecodeError: |
| 179 print "warning: %s: not ASCII, not UTF-8" % file_name |
| 180 return base_mime_type |
| 181 |
| 182 if ord(bytes[0]) != 0xef: |
| 183 print "UTF-8 file with no BOM: " + file_name |
| 184 |
| 185 # Append charset=utf-8. Need to escape the ';' because it is ultimately goi
ng to a shell. |
| 186 return base_mime_type + '\\;charset=utf-8' |
| 187 |
| 188 |
| 189 def main(argv): |
| 190 fix_problems = False; |
| 191 try: |
| 192 opts, args = getopt.getopt(argv, "fh", ("fix", "help")) |
| 193 except getopt.GetoptError: |
| 194 print "unrecognized option: " + argv[0] |
| 195 usage() |
| 196 sys.exit(2) |
| 197 for opt, arg in opts: |
| 198 if opt in ("-h", "--help"): |
| 199 usage() |
| 200 sys.exit() |
| 201 if opt in ("-f", "--fix"): |
| 202 fix_problems = True |
| 203 if args: |
| 204 print "unexpected command line argument" |
| 205 usage() |
| 206 sys.exit() |
| 207 |
| 208 parse_auto_props() |
| 209 output = runCommand("svn ls -R "); |
| 210 file_list = output.splitlines() |
| 211 |
| 212 for f in file_list: |
| 213 if os.path.isdir(f): |
| 214 # print "Skipping dir " + f |
| 215 continue |
| 216 if not os.path.isfile(f): |
| 217 print "Repository file not in working copy: " + f |
| 218 continue; |
| 219 |
| 220 for file_pattern, props in file_types: |
| 221 if re.match(file_pattern, f): |
| 222 # print "doing " + f |
| 223 for propname, propval in props: |
| 224 actual_propval = runCommand("svn propget --strict " + propna
me + " " + f) |
| 225 #print propname + ": " + actual_propval |
| 226 if propname == "svn:mime-type" and propval.find("text/") ==
0: |
| 227 # check for UTF-8 text files, should have svn:mime-type=
text/something; charset=utf8 |
| 228 propval = check_utf8(f, propval, actual_propval) |
| 229 if not (propval == actual_propval or (propval == "" and actu
al_propval == "*")): |
| 230 print "svn propset %s '%s' %s" % (propname, propval, f) |
| 231 if fix_problems: |
| 232 os.system("svn propset %s '%s' %s" % (propname, prop
val, f)) |
| 233 if propname == "svn:eol-style" and propval == "native": |
| 234 if os.system("grep -q -v \r " + f): |
| 235 if fix_problems: |
| 236 print f + ": Removing DOS CR characters." |
| 237 os.system("sed -i s/\r// " + f); |
| 238 else: |
| 239 print f + " contains DOS CR characters." |
| 240 |
| 241 |
| 242 if __name__ == "__main__": |
| 243 main(sys.argv[1:]) |
OLD | NEW |