| 1 | """Common operations on Posix pathnames.
 | 
| 2 | 
 | 
| 3 | Instead of importing this module directly, import os and refer to
 | 
| 4 | this module as os.path.  The "os.path" name is an alias for this
 | 
| 5 | module on Posix systems; on other systems (e.g. Mac, Windows),
 | 
| 6 | os.path provides the same operations in a manner specific to that
 | 
| 7 | platform, and is an alias to another module (e.g. macpath, ntpath).
 | 
| 8 | 
 | 
| 9 | Some of this can actually be useful on non-Posix systems too, e.g.
 | 
| 10 | for manipulation of the pathname component of URLs.
 | 
| 11 | """
 | 
| 12 | 
 | 
| 13 | import os
 | 
| 14 | import sys
 | 
| 15 | import stat
 | 
| 16 | import genericpath
 | 
| 17 | import warnings
 | 
| 18 | from genericpath import *
 | 
| 19 | from genericpath import _unicode
 | 
| 20 | 
 | 
| 21 | __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
 | 
| 22 |            "basename","dirname","commonprefix","getsize","getmtime",
 | 
| 23 |            "getatime","getctime","islink","exists","lexists","isdir","isfile",
 | 
| 24 |            "ismount","walk","expanduser","expandvars","normpath","abspath",
 | 
| 25 |            "samefile","sameopenfile","samestat",
 | 
| 26 |            "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
 | 
| 27 |            "devnull","realpath","supports_unicode_filenames","relpath"]
 | 
| 28 | 
 | 
| 29 | # strings representing various path-related bits and pieces
 | 
| 30 | curdir = '.'
 | 
| 31 | pardir = '..'
 | 
| 32 | extsep = '.'
 | 
| 33 | sep = '/'
 | 
| 34 | pathsep = ':'
 | 
| 35 | defpath = ':/bin:/usr/bin'
 | 
| 36 | altsep = None
 | 
| 37 | devnull = '/dev/null'
 | 
| 38 | 
 | 
| 39 | # Normalize the case of a pathname.  Trivial in Posix, string.lower on Mac.
 | 
| 40 | # On MS-DOS this may also turn slashes into backslashes; however, other
 | 
| 41 | # normalizations (such as optimizing '../' away) are not allowed
 | 
| 42 | # (another function should be defined to do that).
 | 
| 43 | 
 | 
| 44 | def normcase(s):
 | 
| 45 |     """Normalize case of pathname.  Has no effect under Posix"""
 | 
| 46 |     return s
 | 
| 47 | 
 | 
| 48 | 
 | 
| 49 | # Return whether a path is absolute.
 | 
| 50 | # Trivial in Posix, harder on the Mac or MS-DOS.
 | 
| 51 | 
 | 
| 52 | def isabs(s):
 | 
| 53 |     """Test whether a path is absolute"""
 | 
| 54 |     return s.startswith('/')
 | 
| 55 | 
 | 
| 56 | 
 | 
| 57 | # Join pathnames.
 | 
| 58 | # Ignore the previous parts if a part is absolute.
 | 
| 59 | # Insert a '/' unless the first part is empty or already ends in '/'.
 | 
| 60 | 
 | 
| 61 | def join(a, *p):
 | 
| 62 |     """Join two or more pathname components, inserting '/' as needed.
 | 
| 63 |     If any component is an absolute path, all previous path components
 | 
| 64 |     will be discarded.  An empty last part will result in a path that
 | 
| 65 |     ends with a separator."""
 | 
| 66 |     path = a
 | 
| 67 |     for b in p:
 | 
| 68 |         if b.startswith('/'):
 | 
| 69 |             path = b
 | 
| 70 |         elif path == '' or path.endswith('/'):
 | 
| 71 |             path +=  b
 | 
| 72 |         else:
 | 
| 73 |             path += '/' + b
 | 
| 74 |     return path
 | 
| 75 | 
 | 
| 76 | 
 | 
| 77 | # Split a path in head (everything up to the last '/') and tail (the
 | 
| 78 | # rest).  If the path ends in '/', tail will be empty.  If there is no
 | 
| 79 | # '/' in the path, head  will be empty.
 | 
| 80 | # Trailing '/'es are stripped from head unless it is the root.
 | 
| 81 | 
 | 
| 82 | def split(p):
 | 
| 83 |     """Split a pathname.  Returns tuple "(head, tail)" where "tail" is
 | 
| 84 |     everything after the final slash.  Either part may be empty."""
 | 
| 85 |     i = p.rfind('/') + 1
 | 
| 86 |     head, tail = p[:i], p[i:]
 | 
| 87 |     if head and head != '/'*len(head):
 | 
| 88 |         head = head.rstrip('/')
 | 
| 89 |     return head, tail
 | 
| 90 | 
 | 
| 91 | 
 | 
| 92 | # Split a path in root and extension.
 | 
| 93 | # The extension is everything starting at the last dot in the last
 | 
| 94 | # pathname component; the root is everything before that.
 | 
| 95 | # It is always true that root + ext == p.
 | 
| 96 | 
 | 
| 97 | def splitext(p):
 | 
| 98 |     return genericpath._splitext(p, sep, altsep, extsep)
 | 
| 99 | splitext.__doc__ = genericpath._splitext.__doc__
 | 
| 100 | 
 | 
| 101 | # Split a pathname into a drive specification and the rest of the
 | 
| 102 | # path.  Useful on DOS/Windows/NT; on Unix, the drive is always empty.
 | 
| 103 | 
 | 
| 104 | def splitdrive(p):
 | 
| 105 |     """Split a pathname into drive and path. On Posix, drive is always
 | 
| 106 |     empty."""
 | 
| 107 |     return '', p
 | 
| 108 | 
 | 
| 109 | 
 | 
| 110 | # Return the tail (basename) part of a path, same as split(path)[1].
 | 
| 111 | 
 | 
| 112 | def basename(p):
 | 
| 113 |     """Returns the final component of a pathname"""
 | 
| 114 |     i = p.rfind('/') + 1
 | 
| 115 |     return p[i:]
 | 
| 116 | 
 | 
| 117 | 
 | 
| 118 | # Return the head (dirname) part of a path, same as split(path)[0].
 | 
| 119 | 
 | 
| 120 | def dirname(p):
 | 
| 121 |     """Returns the directory component of a pathname"""
 | 
| 122 |     i = p.rfind('/') + 1
 | 
| 123 |     head = p[:i]
 | 
| 124 |     if head and head != '/'*len(head):
 | 
| 125 |         head = head.rstrip('/')
 | 
| 126 |     return head
 | 
| 127 | 
 | 
| 128 | 
 | 
| 129 | # Is a path a symbolic link?
 | 
| 130 | # This will always return false on systems where os.lstat doesn't exist.
 | 
| 131 | 
 | 
| 132 | def islink(path):
 | 
| 133 |     """Test whether a path is a symbolic link"""
 | 
| 134 |     try:
 | 
| 135 |         st = os.lstat(path)
 | 
| 136 |     except (os.error, AttributeError):
 | 
| 137 |         return False
 | 
| 138 |     return stat.S_ISLNK(st.st_mode)
 | 
| 139 | 
 | 
| 140 | # Being true for dangling symbolic links is also useful.
 | 
| 141 | 
 | 
| 142 | def lexists(path):
 | 
| 143 |     """Test whether a path exists.  Returns True for broken symbolic links"""
 | 
| 144 |     try:
 | 
| 145 |         os.lstat(path)
 | 
| 146 |     except os.error:
 | 
| 147 |         return False
 | 
| 148 |     return True
 | 
| 149 | 
 | 
| 150 | 
 | 
| 151 | # Are two filenames really pointing to the same file?
 | 
| 152 | 
 | 
| 153 | def samefile(f1, f2):
 | 
| 154 |     """Test whether two pathnames reference the same actual file"""
 | 
| 155 |     s1 = os.stat(f1)
 | 
| 156 |     s2 = os.stat(f2)
 | 
| 157 |     return samestat(s1, s2)
 | 
| 158 | 
 | 
| 159 | 
 | 
| 160 | # Are two open files really referencing the same file?
 | 
| 161 | # (Not necessarily the same file descriptor!)
 | 
| 162 | 
 | 
| 163 | def sameopenfile(fp1, fp2):
 | 
| 164 |     """Test whether two open file objects reference the same file"""
 | 
| 165 |     s1 = os.fstat(fp1)
 | 
| 166 |     s2 = os.fstat(fp2)
 | 
| 167 |     return samestat(s1, s2)
 | 
| 168 | 
 | 
| 169 | 
 | 
| 170 | # Are two stat buffers (obtained from stat, fstat or lstat)
 | 
| 171 | # describing the same file?
 | 
| 172 | 
 | 
| 173 | def samestat(s1, s2):
 | 
| 174 |     """Test whether two stat buffers reference the same file"""
 | 
| 175 |     return s1.st_ino == s2.st_ino and \
 | 
| 176 |            s1.st_dev == s2.st_dev
 | 
| 177 | 
 | 
| 178 | 
 | 
| 179 | # Is a path a mount point?
 | 
| 180 | # (Does this work for all UNIXes?  Is it even guaranteed to work by Posix?)
 | 
| 181 | 
 | 
| 182 | def ismount(path):
 | 
| 183 |     """Test whether a path is a mount point"""
 | 
| 184 |     if islink(path):
 | 
| 185 |         # A symlink can never be a mount point
 | 
| 186 |         return False
 | 
| 187 |     try:
 | 
| 188 |         s1 = os.lstat(path)
 | 
| 189 |         s2 = os.lstat(realpath(join(path, '..')))
 | 
| 190 |     except os.error:
 | 
| 191 |         return False # It doesn't exist -- so not a mount point :-)
 | 
| 192 |     dev1 = s1.st_dev
 | 
| 193 |     dev2 = s2.st_dev
 | 
| 194 |     if dev1 != dev2:
 | 
| 195 |         return True     # path/.. on a different device as path
 | 
| 196 |     ino1 = s1.st_ino
 | 
| 197 |     ino2 = s2.st_ino
 | 
| 198 |     if ino1 == ino2:
 | 
| 199 |         return True     # path/.. is the same i-node as path
 | 
| 200 |     return False
 | 
| 201 | 
 | 
| 202 | 
 | 
| 203 | # Directory tree walk.
 | 
| 204 | # For each directory under top (including top itself, but excluding
 | 
| 205 | # '.' and '..'), func(arg, dirname, filenames) is called, where
 | 
| 206 | # dirname is the name of the directory and filenames is the list
 | 
| 207 | # of files (and subdirectories etc.) in the directory.
 | 
| 208 | # The func may modify the filenames list, to implement a filter,
 | 
| 209 | # or to impose a different order of visiting.
 | 
| 210 | 
 | 
| 211 | def walk(top, func, arg):
 | 
| 212 |     """Directory tree walk with callback function.
 | 
| 213 | 
 | 
| 214 |     For each directory in the directory tree rooted at top (including top
 | 
| 215 |     itself, but excluding '.' and '..'), call func(arg, dirname, fnames).
 | 
| 216 |     dirname is the name of the directory, and fnames a list of the names of
 | 
| 217 |     the files and subdirectories in dirname (excluding '.' and '..').  func
 | 
| 218 |     may modify the fnames list in-place (e.g. via del or slice assignment),
 | 
| 219 |     and walk will only recurse into the subdirectories whose names remain in
 | 
| 220 |     fnames; this can be used to implement a filter, or to impose a specific
 | 
| 221 |     order of visiting.  No semantics are defined for, or required of, arg,
 | 
| 222 |     beyond that arg is always passed to func.  It can be used, e.g., to pass
 | 
| 223 |     a filename pattern, or a mutable object designed to accumulate
 | 
| 224 |     statistics.  Passing None for arg is common."""
 | 
| 225 |     warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.",
 | 
| 226 |                       stacklevel=2)
 | 
| 227 |     try:
 | 
| 228 |         names = os.listdir(top)
 | 
| 229 |     except os.error:
 | 
| 230 |         return
 | 
| 231 |     func(arg, top, names)
 | 
| 232 |     for name in names:
 | 
| 233 |         name = join(top, name)
 | 
| 234 |         try:
 | 
| 235 |             st = os.lstat(name)
 | 
| 236 |         except os.error:
 | 
| 237 |             continue
 | 
| 238 |         if stat.S_ISDIR(st.st_mode):
 | 
| 239 |             walk(name, func, arg)
 | 
| 240 | 
 | 
| 241 | 
 | 
| 242 | # Expand paths beginning with '~' or '~user'.
 | 
| 243 | # '~' means $HOME; '~user' means that user's home directory.
 | 
| 244 | # If the path doesn't begin with '~', or if the user or $HOME is unknown,
 | 
| 245 | # the path is returned unchanged (leaving error reporting to whatever
 | 
| 246 | # function is called with the expanded path as argument).
 | 
| 247 | # See also module 'glob' for expansion of *, ? and [...] in pathnames.
 | 
| 248 | # (A function should also be defined to do full *sh-style environment
 | 
| 249 | # variable expansion.)
 | 
| 250 | 
 | 
| 251 | def expanduser(path):
 | 
| 252 |     """Expand ~ and ~user constructions.  If user or $HOME is unknown,
 | 
| 253 |     do nothing."""
 | 
| 254 |     if not path.startswith('~'):
 | 
| 255 |         return path
 | 
| 256 |     i = path.find('/', 1)
 | 
| 257 |     if i < 0:
 | 
| 258 |         i = len(path)
 | 
| 259 |     if i == 1:
 | 
| 260 |         if 'HOME' not in os.environ:
 | 
| 261 |             import pwd
 | 
| 262 |             userhome = pwd.getpwuid(os.getuid()).pw_dir
 | 
| 263 |         else:
 | 
| 264 |             userhome = os.environ['HOME']
 | 
| 265 |     else:
 | 
| 266 |         import pwd
 | 
| 267 |         try:
 | 
| 268 |             pwent = pwd.getpwnam(path[1:i])
 | 
| 269 |         except KeyError:
 | 
| 270 |             return path
 | 
| 271 |         userhome = pwent.pw_dir
 | 
| 272 |     userhome = userhome.rstrip('/')
 | 
| 273 |     return (userhome + path[i:]) or '/'
 | 
| 274 | 
 | 
| 275 | 
 | 
| 276 | # Expand paths containing shell variable substitutions.
 | 
| 277 | # This expands the forms $variable and ${variable} only.
 | 
| 278 | # Non-existent variables are left unchanged.
 | 
| 279 | 
 | 
| 280 | _varprog = None
 | 
| 281 | _uvarprog = None
 | 
| 282 | 
 | 
| 283 | def expandvars(path):
 | 
| 284 |     """Expand shell variables of form $var and ${var}.  Unknown variables
 | 
| 285 |     are left unchanged."""
 | 
| 286 |     global _varprog, _uvarprog
 | 
| 287 |     if '$' not in path:
 | 
| 288 |         return path
 | 
| 289 |     if isinstance(path, _unicode):
 | 
| 290 |         if not _uvarprog:
 | 
| 291 |             import re
 | 
| 292 |             _uvarprog = re.compile(ur'\$(\w+|\{[^}]*\})', re.UNICODE)
 | 
| 293 |         varprog = _uvarprog
 | 
| 294 |         encoding = sys.getfilesystemencoding()
 | 
| 295 |     else:
 | 
| 296 |         if not _varprog:
 | 
| 297 |             import re
 | 
| 298 |             _varprog = re.compile(r'\$(\w+|\{[^}]*\})')
 | 
| 299 |         varprog = _varprog
 | 
| 300 |         encoding = None
 | 
| 301 |     i = 0
 | 
| 302 |     while True:
 | 
| 303 |         m = varprog.search(path, i)
 | 
| 304 |         if not m:
 | 
| 305 |             break
 | 
| 306 |         i, j = m.span(0)
 | 
| 307 |         name = m.group(1)
 | 
| 308 |         if name.startswith('{') and name.endswith('}'):
 | 
| 309 |             name = name[1:-1]
 | 
| 310 |         if encoding:
 | 
| 311 |             name = name.encode(encoding)
 | 
| 312 |         if name in os.environ:
 | 
| 313 |             tail = path[j:]
 | 
| 314 |             value = os.environ[name]
 | 
| 315 |             if encoding:
 | 
| 316 |                 value = value.decode(encoding)
 | 
| 317 |             path = path[:i] + value
 | 
| 318 |             i = len(path)
 | 
| 319 |             path += tail
 | 
| 320 |         else:
 | 
| 321 |             i = j
 | 
| 322 |     return path
 | 
| 323 | 
 | 
| 324 | 
 | 
| 325 | # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
 | 
| 326 | # It should be understood that this may change the meaning of the path
 | 
| 327 | # if it contains symbolic links!
 | 
| 328 | 
 | 
| 329 | def normpath(path):
 | 
| 330 |     """Normalize path, eliminating double slashes, etc."""
 | 
| 331 |     # Preserve unicode (if path is unicode)
 | 
| 332 |     slash, dot = (u'/', u'.') if isinstance(path, _unicode) else ('/', '.')
 | 
| 333 |     if path == '':
 | 
| 334 |         return dot
 | 
| 335 |     initial_slashes = path.startswith('/')
 | 
| 336 |     # POSIX allows one or two initial slashes, but treats three or more
 | 
| 337 |     # as single slash.
 | 
| 338 |     if (initial_slashes and
 | 
| 339 |         path.startswith('//') and not path.startswith('///')):
 | 
| 340 |         initial_slashes = 2
 | 
| 341 |     comps = path.split('/')
 | 
| 342 |     new_comps = []
 | 
| 343 |     for comp in comps:
 | 
| 344 |         if comp in ('', '.'):
 | 
| 345 |             continue
 | 
| 346 |         if (comp != '..' or (not initial_slashes and not new_comps) or
 | 
| 347 |              (new_comps and new_comps[-1] == '..')):
 | 
| 348 |             new_comps.append(comp)
 | 
| 349 |         elif new_comps:
 | 
| 350 |             new_comps.pop()
 | 
| 351 |     comps = new_comps
 | 
| 352 |     path = slash.join(comps)
 | 
| 353 |     if initial_slashes:
 | 
| 354 |         path = slash*initial_slashes + path
 | 
| 355 |     return path or dot
 | 
| 356 | 
 | 
| 357 | 
 | 
| 358 | def abspath(path):
 | 
| 359 |     """Return an absolute path."""
 | 
| 360 |     if not isabs(path):
 | 
| 361 |         if isinstance(path, _unicode):
 | 
| 362 |             cwd = os.getcwdu()
 | 
| 363 |         else:
 | 
| 364 |             cwd = os.getcwd()
 | 
| 365 |         path = join(cwd, path)
 | 
| 366 |     return normpath(path)
 | 
| 367 | 
 | 
| 368 | 
 | 
| 369 | # Return a canonical path (i.e. the absolute location of a file on the
 | 
| 370 | # filesystem).
 | 
| 371 | 
 | 
| 372 | def realpath(filename):
 | 
| 373 |     """Return the canonical path of the specified filename, eliminating any
 | 
| 374 | symbolic links encountered in the path."""
 | 
| 375 |     path, ok = _joinrealpath('', filename, {})
 | 
| 376 |     return abspath(path)
 | 
| 377 | 
 | 
| 378 | # Join two paths, normalizing and eliminating any symbolic links
 | 
| 379 | # encountered in the second path.
 | 
| 380 | def _joinrealpath(path, rest, seen):
 | 
| 381 |     if isabs(rest):
 | 
| 382 |         rest = rest[1:]
 | 
| 383 |         path = sep
 | 
| 384 | 
 | 
| 385 |     while rest:
 | 
| 386 |         name, _, rest = rest.partition(sep)
 | 
| 387 |         if not name or name == curdir:
 | 
| 388 |             # current dir
 | 
| 389 |             continue
 | 
| 390 |         if name == pardir:
 | 
| 391 |             # parent dir
 | 
| 392 |             if path:
 | 
| 393 |                 path, name = split(path)
 | 
| 394 |                 if name == pardir:
 | 
| 395 |                     path = join(path, pardir, pardir)
 | 
| 396 |             else:
 | 
| 397 |                 path = pardir
 | 
| 398 |             continue
 | 
| 399 |         newpath = join(path, name)
 | 
| 400 |         if not islink(newpath):
 | 
| 401 |             path = newpath
 | 
| 402 |             continue
 | 
| 403 |         # Resolve the symbolic link
 | 
| 404 |         if newpath in seen:
 | 
| 405 |             # Already seen this path
 | 
| 406 |             path = seen[newpath]
 | 
| 407 |             if path is not None:
 | 
| 408 |                 # use cached value
 | 
| 409 |                 continue
 | 
| 410 |             # The symlink is not resolved, so we must have a symlink loop.
 | 
| 411 |             # Return already resolved part + rest of the path unchanged.
 | 
| 412 |             return join(newpath, rest), False
 | 
| 413 |         seen[newpath] = None # not resolved symlink
 | 
| 414 |         path, ok = _joinrealpath(path, os.readlink(newpath), seen)
 | 
| 415 |         if not ok:
 | 
| 416 |             return join(path, rest), False
 | 
| 417 |         seen[newpath] = path # resolved symlink
 | 
| 418 | 
 | 
| 419 |     return path, True
 | 
| 420 | 
 | 
| 421 | 
 | 
| 422 | supports_unicode_filenames = (sys.platform == 'darwin')
 | 
| 423 | 
 | 
| 424 | def relpath(path, start=curdir):
 | 
| 425 |     """Return a relative version of a path"""
 | 
| 426 | 
 | 
| 427 |     if not path:
 | 
| 428 |         raise ValueError("no path specified")
 | 
| 429 | 
 | 
| 430 |     start_list = [x for x in abspath(start).split(sep) if x]
 | 
| 431 |     path_list = [x for x in abspath(path).split(sep) if x]
 | 
| 432 | 
 | 
| 433 |     # Work out how much of the filepath is shared by start and path.
 | 
| 434 |     i = len(commonprefix([start_list, path_list]))
 | 
| 435 | 
 | 
| 436 |     rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
 | 
| 437 |     if not rel_list:
 | 
| 438 |         return curdir
 | 
| 439 |     return join(*rel_list)
 |