OLD | NEW |
(Empty) | |
| 1 """SCons.Scanner.LaTeX |
| 2 |
| 3 This module implements the dependency scanner for LaTeX code. |
| 4 |
| 5 """ |
| 6 |
| 7 # |
| 8 # Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 The S
Cons Foundation |
| 9 # |
| 10 # Permission is hereby granted, free of charge, to any person obtaining |
| 11 # a copy of this software and associated documentation files (the |
| 12 # "Software"), to deal in the Software without restriction, including |
| 13 # without limitation the rights to use, copy, modify, merge, publish, |
| 14 # distribute, sublicense, and/or sell copies of the Software, and to |
| 15 # permit persons to whom the Software is furnished to do so, subject to |
| 16 # the following conditions: |
| 17 # |
| 18 # The above copyright notice and this permission notice shall be included |
| 19 # in all copies or substantial portions of the Software. |
| 20 # |
| 21 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY |
| 22 # KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE |
| 23 # WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 24 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE |
| 25 # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
| 26 # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
| 27 # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
| 28 # |
| 29 |
| 30 __revision__ = "src/engine/SCons/Scanner/LaTeX.py 5134 2010/08/16 23:02:40 bdeeg
an" |
| 31 |
| 32 import os.path |
| 33 import re |
| 34 |
| 35 import SCons.Scanner |
| 36 import SCons.Util |
| 37 |
| 38 # list of graphics file extensions for TeX and LaTeX |
| 39 TexGraphics = ['.eps', '.ps'] |
| 40 LatexGraphics = ['.pdf', '.png', '.jpg', '.gif', '.tif'] |
| 41 |
| 42 # Used as a return value of modify_env_var if the variable is not set. |
| 43 class _Null(object): |
| 44 pass |
| 45 _null = _Null |
| 46 |
| 47 # The user specifies the paths in env[variable], similar to other builders. |
| 48 # They may be relative and must be converted to absolute, as expected |
| 49 # by LaTeX and Co. The environment may already have some paths in |
| 50 # env['ENV'][var]. These paths are honored, but the env[var] paths have |
| 51 # higher precedence. All changes are un-done on exit. |
| 52 def modify_env_var(env, var, abspath): |
| 53 try: |
| 54 save = env['ENV'][var] |
| 55 except KeyError: |
| 56 save = _null |
| 57 env.PrependENVPath(var, abspath) |
| 58 try: |
| 59 if SCons.Util.is_List(env[var]): |
| 60 env.PrependENVPath(var, [os.path.abspath(str(p)) for p in env[var]]) |
| 61 else: |
| 62 # Split at os.pathsep to convert into absolute path |
| 63 env.PrependENVPath(var, [os.path.abspath(p) for p in str(env[var]).s
plit(os.pathsep)]) |
| 64 except KeyError: |
| 65 pass |
| 66 |
| 67 # Convert into a string explicitly to append ":" (without which it won't sea
rch system |
| 68 # paths as well). The problem is that env.AppendENVPath(var, ":") |
| 69 # does not work, refuses to append ":" (os.pathsep). |
| 70 |
| 71 if SCons.Util.is_List(env['ENV'][var]): |
| 72 env['ENV'][var] = os.pathsep.join(env['ENV'][var]) |
| 73 # Append the trailing os.pathsep character here to catch the case with no en
v[var] |
| 74 env['ENV'][var] = env['ENV'][var] + os.pathsep |
| 75 |
| 76 return save |
| 77 |
| 78 class FindENVPathDirs(object): |
| 79 """A class to bind a specific *PATH variable name to a function that |
| 80 will return all of the *path directories.""" |
| 81 def __init__(self, variable): |
| 82 self.variable = variable |
| 83 def __call__(self, env, dir=None, target=None, source=None, argument=None): |
| 84 import SCons.PathList |
| 85 try: |
| 86 path = env['ENV'][self.variable] |
| 87 except KeyError: |
| 88 return () |
| 89 |
| 90 dir = dir or env.fs._cwd |
| 91 path = SCons.PathList.PathList(path).subst_path(env, target, source) |
| 92 return tuple(dir.Rfindalldirs(path)) |
| 93 |
| 94 |
| 95 |
| 96 def LaTeXScanner(): |
| 97 """Return a prototype Scanner instance for scanning LaTeX source files |
| 98 when built with latex. |
| 99 """ |
| 100 ds = LaTeX(name = "LaTeXScanner", |
| 101 suffixes = '$LATEXSUFFIXES', |
| 102 # in the search order, see below in LaTeX class docstring |
| 103 graphics_extensions = TexGraphics, |
| 104 recursive = 0) |
| 105 return ds |
| 106 |
| 107 def PDFLaTeXScanner(): |
| 108 """Return a prototype Scanner instance for scanning LaTeX source files |
| 109 when built with pdflatex. |
| 110 """ |
| 111 ds = LaTeX(name = "PDFLaTeXScanner", |
| 112 suffixes = '$LATEXSUFFIXES', |
| 113 # in the search order, see below in LaTeX class docstring |
| 114 graphics_extensions = LatexGraphics, |
| 115 recursive = 0) |
| 116 return ds |
| 117 |
| 118 class LaTeX(SCons.Scanner.Base): |
| 119 """Class for scanning LaTeX files for included files. |
| 120 |
| 121 Unlike most scanners, which use regular expressions that just |
| 122 return the included file name, this returns a tuple consisting |
| 123 of the keyword for the inclusion ("include", "includegraphics", |
| 124 "input", or "bibliography"), and then the file name itself. |
| 125 Based on a quick look at LaTeX documentation, it seems that we |
| 126 should append .tex suffix for the "include" keywords, append .tex if |
| 127 there is no extension for the "input" keyword, and need to add .bib |
| 128 for the "bibliography" keyword that does not accept extensions by itself. |
| 129 |
| 130 Finally, if there is no extension for an "includegraphics" keyword |
| 131 latex will append .ps or .eps to find the file, while pdftex may use .pdf, |
| 132 .jpg, .tif, .mps, or .png. |
| 133 |
| 134 The actual subset and search order may be altered by |
| 135 DeclareGraphicsExtensions command. This complication is ignored. |
| 136 The default order corresponds to experimentation with teTeX |
| 137 $ latex --version |
| 138 pdfeTeX 3.141592-1.21a-2.2 (Web2C 7.5.4) |
| 139 kpathsea version 3.5.4 |
| 140 The order is: |
| 141 ['.eps', '.ps'] for latex |
| 142 ['.png', '.pdf', '.jpg', '.tif']. |
| 143 |
| 144 Another difference is that the search path is determined by the type |
| 145 of the file being searched: |
| 146 env['TEXINPUTS'] for "input" and "include" keywords |
| 147 env['TEXINPUTS'] for "includegraphics" keyword |
| 148 env['TEXINPUTS'] for "lstinputlisting" keyword |
| 149 env['BIBINPUTS'] for "bibliography" keyword |
| 150 env['BSTINPUTS'] for "bibliographystyle" keyword |
| 151 |
| 152 FIXME: also look for the class or style in document[class|style]{} |
| 153 FIXME: also look for the argument of bibliographystyle{} |
| 154 """ |
| 155 keyword_paths = {'include': 'TEXINPUTS', |
| 156 'input': 'TEXINPUTS', |
| 157 'includegraphics': 'TEXINPUTS', |
| 158 'bibliography': 'BIBINPUTS', |
| 159 'bibliographystyle': 'BSTINPUTS', |
| 160 'usepackage': 'TEXINPUTS', |
| 161 'lstinputlisting': 'TEXINPUTS'} |
| 162 env_variables = SCons.Util.unique(list(keyword_paths.values())) |
| 163 |
| 164 def __init__(self, name, suffixes, graphics_extensions, *args, **kw): |
| 165 |
| 166 # We have to include \n with the % we exclude from the first part |
| 167 # part of the regex because the expression is compiled with re.M. |
| 168 # Without the \n, the ^ could match the beginning of a *previous* |
| 169 # line followed by one or more newline characters (i.e. blank |
| 170 # lines), interfering with a match on the next line. |
| 171 # add option for whitespace before the '[options]' or the '{filename}' |
| 172 regex = r'^[^%\n]*\\(include|includegraphics(?:\s*\[[^\]]+\])?|lstinputl
isting(?:\[[^\]]+\])?|input|bibliography|usepackage)\s*{([^}]*)}' |
| 173 self.cre = re.compile(regex, re.M) |
| 174 self.comment_re = re.compile(r'^((?:(?:\\%)|[^%\n])*)(.*)$', re.M) |
| 175 |
| 176 self.graphics_extensions = graphics_extensions |
| 177 |
| 178 def _scan(node, env, path=(), self=self): |
| 179 node = node.rfile() |
| 180 if not node.exists(): |
| 181 return [] |
| 182 return self.scan_recurse(node, path) |
| 183 |
| 184 class FindMultiPathDirs(object): |
| 185 """The stock FindPathDirs function has the wrong granularity: |
| 186 it is called once per target, while we need the path that depends |
| 187 on what kind of included files is being searched. This wrapper |
| 188 hides multiple instances of FindPathDirs, one per the LaTeX path |
| 189 variable in the environment. When invoked, the function calculates |
| 190 and returns all the required paths as a dictionary (converted into |
| 191 a tuple to become hashable). Then the scan function converts it |
| 192 back and uses a dictionary of tuples rather than a single tuple |
| 193 of paths. |
| 194 """ |
| 195 def __init__(self, dictionary): |
| 196 self.dictionary = {} |
| 197 for k,n in dictionary.items(): |
| 198 self.dictionary[k] = ( SCons.Scanner.FindPathDirs(n), |
| 199 FindENVPathDirs(n) ) |
| 200 |
| 201 def __call__(self, env, dir=None, target=None, source=None, |
| 202 argument=None): |
| 203 di = {} |
| 204 for k,(c,cENV) in self.dictionary.items(): |
| 205 di[k] = ( c(env, dir=None, target=None, source=None, |
| 206 argument=None) , |
| 207 cENV(env, dir=None, target=None, source=None, |
| 208 argument=None) ) |
| 209 # To prevent "dict is not hashable error" |
| 210 return tuple(di.items()) |
| 211 |
| 212 class LaTeXScanCheck(object): |
| 213 """Skip all but LaTeX source files, i.e., do not scan *.eps, |
| 214 *.pdf, *.jpg, etc. |
| 215 """ |
| 216 def __init__(self, suffixes): |
| 217 self.suffixes = suffixes |
| 218 def __call__(self, node, env): |
| 219 current = not node.has_builder() or node.is_up_to_date() |
| 220 scannable = node.get_suffix() in env.subst_list(self.suffixes)[0
] |
| 221 # Returning false means that the file is not scanned. |
| 222 return scannable and current |
| 223 |
| 224 kw['function'] = _scan |
| 225 kw['path_function'] = FindMultiPathDirs(LaTeX.keyword_paths) |
| 226 kw['recursive'] = 0 |
| 227 kw['skeys'] = suffixes |
| 228 kw['scan_check'] = LaTeXScanCheck(suffixes) |
| 229 kw['name'] = name |
| 230 |
| 231 SCons.Scanner.Base.__init__(self, *args, **kw) |
| 232 |
| 233 def _latex_names(self, include): |
| 234 filename = include[1] |
| 235 if include[0] == 'input': |
| 236 base, ext = os.path.splitext( filename ) |
| 237 if ext == "": |
| 238 return [filename + '.tex'] |
| 239 if (include[0] == 'include'): |
| 240 return [filename + '.tex'] |
| 241 if include[0] == 'bibliography': |
| 242 base, ext = os.path.splitext( filename ) |
| 243 if ext == "": |
| 244 return [filename + '.bib'] |
| 245 if include[0] == 'usepackage': |
| 246 base, ext = os.path.splitext( filename ) |
| 247 if ext == "": |
| 248 return [filename + '.sty'] |
| 249 if include[0] == 'includegraphics': |
| 250 base, ext = os.path.splitext( filename ) |
| 251 if ext == "": |
| 252 #return [filename+e for e in self.graphics_extensions + TexGraph
ics] |
| 253 # use the line above to find dependencies for the PDF builder |
| 254 # when only an .eps figure is present. Since it will be found |
| 255 # if the user tells scons how to make the pdf figure, leave |
| 256 # it out for now. |
| 257 return [filename+e for e in self.graphics_extensions] |
| 258 return [filename] |
| 259 |
| 260 def sort_key(self, include): |
| 261 return SCons.Node.FS._my_normcase(str(include)) |
| 262 |
| 263 def find_include(self, include, source_dir, path): |
| 264 try: |
| 265 sub_path = path[include[0]] |
| 266 except (IndexError, KeyError): |
| 267 sub_path = () |
| 268 try_names = self._latex_names(include) |
| 269 for n in try_names: |
| 270 # see if we find it using the path in env[var] |
| 271 i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[0]) |
| 272 if i: |
| 273 return i, include |
| 274 # see if we find it using the path in env['ENV'][var] |
| 275 i = SCons.Node.FS.find_file(n, (source_dir,) + sub_path[1]) |
| 276 if i: |
| 277 return i, include |
| 278 return i, include |
| 279 |
| 280 def canonical_text(self, text): |
| 281 """Standardize an input TeX-file contents. |
| 282 |
| 283 Currently: |
| 284 * removes comments, unwrapping comment-wrapped lines. |
| 285 """ |
| 286 out = [] |
| 287 line_continues_a_comment = False |
| 288 for line in text.splitlines(): |
| 289 line,comment = self.comment_re.findall(line)[0] |
| 290 if line_continues_a_comment == True: |
| 291 out[-1] = out[-1] + line.lstrip() |
| 292 else: |
| 293 out.append(line) |
| 294 line_continues_a_comment = len(comment) > 0 |
| 295 return '\n'.join(out).rstrip()+'\n' |
| 296 |
| 297 def scan(self, node): |
| 298 # Modify the default scan function to allow for the regular |
| 299 # expression to return a comma separated list of file names |
| 300 # as can be the case with the bibliography keyword. |
| 301 |
| 302 # Cache the includes list in node so we only scan it once: |
| 303 # path_dict = dict(list(path)) |
| 304 # add option for whitespace (\s) before the '[' |
| 305 noopt_cre = re.compile('\s*\[.*$') |
| 306 if node.includes != None: |
| 307 includes = node.includes |
| 308 else: |
| 309 text = self.canonical_text(node.get_text_contents()) |
| 310 includes = self.cre.findall(text) |
| 311 # 1. Split comma-separated lines, e.g. |
| 312 # ('bibliography', 'phys,comp') |
| 313 # should become two entries |
| 314 # ('bibliography', 'phys') |
| 315 # ('bibliography', 'comp') |
| 316 # 2. Remove the options, e.g., such as |
| 317 # ('includegraphics[clip,width=0.7\\linewidth]', 'picture.eps') |
| 318 # should become |
| 319 # ('includegraphics', 'picture.eps') |
| 320 split_includes = [] |
| 321 for include in includes: |
| 322 inc_type = noopt_cre.sub('', include[0]) |
| 323 inc_list = include[1].split(',') |
| 324 for j in range(len(inc_list)): |
| 325 split_includes.append( (inc_type, inc_list[j]) ) |
| 326 # |
| 327 includes = split_includes |
| 328 node.includes = includes |
| 329 |
| 330 return includes |
| 331 |
| 332 def scan_recurse(self, node, path=()): |
| 333 """ do a recursive scan of the top level target file |
| 334 This lets us search for included files based on the |
| 335 directory of the main file just as latex does""" |
| 336 |
| 337 path_dict = dict(list(path)) |
| 338 |
| 339 queue = [] |
| 340 queue.extend( self.scan(node) ) |
| 341 seen = {} |
| 342 |
| 343 # This is a hand-coded DSU (decorate-sort-undecorate, or |
| 344 # Schwartzian transform) pattern. The sort key is the raw name |
| 345 # of the file as specifed on the \include, \input, etc. line. |
| 346 # TODO: what about the comment in the original Classic scanner: |
| 347 # """which lets |
| 348 # us keep the sort order constant regardless of whether the file |
| 349 # is actually found in a Repository or locally.""" |
| 350 nodes = [] |
| 351 source_dir = node.get_dir() |
| 352 #for include in includes: |
| 353 while queue: |
| 354 |
| 355 include = queue.pop() |
| 356 try: |
| 357 if seen[include[1]] == 1: |
| 358 continue |
| 359 except KeyError: |
| 360 seen[include[1]] = 1 |
| 361 |
| 362 # |
| 363 # Handle multiple filenames in include[1] |
| 364 # |
| 365 n, i = self.find_include(include, source_dir, path_dict) |
| 366 if n is None: |
| 367 # Do not bother with 'usepackage' warnings, as they most |
| 368 # likely refer to system-level files |
| 369 if include[0] != 'usepackage': |
| 370 SCons.Warnings.warn(SCons.Warnings.DependencyWarning, |
| 371 "No dependency generated for file: %s (i
ncluded from: %s) -- file not found" % (i, node)) |
| 372 else: |
| 373 sortkey = self.sort_key(n) |
| 374 nodes.append((sortkey, n)) |
| 375 # recurse down |
| 376 queue.extend( self.scan(n) ) |
| 377 |
| 378 return [pair[1] for pair in sorted(nodes)] |
| 379 |
| 380 # Local Variables: |
| 381 # tab-width:4 |
| 382 # indent-tabs-mode:nil |
| 383 # End: |
| 384 # vim: set expandtab tabstop=4 shiftwidth=4: |
OLD | NEW |