1 | """Common operations on Posix pathnames.
|
2 |
|
3 | Instead of importing this module directly, import os and refer to
|
4 | this module as os.path. The "os.path" name is an alias for this
|
5 | module on Posix systems; on other systems (e.g. Mac, Windows),
|
6 | os.path provides the same operations in a manner specific to that
|
7 | platform, and is an alias to another module (e.g. macpath, ntpath).
|
8 |
|
9 | Some of this can actually be useful on non-Posix systems too, e.g.
|
10 | for manipulation of the pathname component of URLs.
|
11 | """
|
12 |
|
13 | import os
|
14 | import sys
|
15 | import stat
|
16 | import genericpath
|
17 | import warnings
|
18 | from genericpath import *
|
19 | from genericpath import _unicode
|
20 |
|
21 | __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
|
22 | "basename","dirname","commonprefix","getsize","getmtime",
|
23 | "getatime","getctime","islink","exists","lexists","isdir","isfile",
|
24 | "ismount","walk","expanduser","expandvars","normpath","abspath",
|
25 | "samefile","sameopenfile","samestat",
|
26 | "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
|
27 | "devnull","realpath","supports_unicode_filenames","relpath"]
|
28 |
|
29 | # strings representing various path-related bits and pieces
|
30 | curdir = '.'
|
31 | pardir = '..'
|
32 | extsep = '.'
|
33 | sep = '/'
|
34 | pathsep = ':'
|
35 | defpath = ':/bin:/usr/bin'
|
36 | altsep = None
|
37 | devnull = '/dev/null'
|
38 |
|
39 | # Normalize the case of a pathname. Trivial in Posix, string.lower on Mac.
|
40 | # On MS-DOS this may also turn slashes into backslashes; however, other
|
41 | # normalizations (such as optimizing '../' away) are not allowed
|
42 | # (another function should be defined to do that).
|
43 |
|
44 | def normcase(s):
|
45 | """Normalize case of pathname. Has no effect under Posix"""
|
46 | return s
|
47 |
|
48 |
|
49 | # Return whether a path is absolute.
|
50 | # Trivial in Posix, harder on the Mac or MS-DOS.
|
51 |
|
52 | def isabs(s):
|
53 | """Test whether a path is absolute"""
|
54 | return s.startswith('/')
|
55 |
|
56 |
|
57 | # Join pathnames.
|
58 | # Ignore the previous parts if a part is absolute.
|
59 | # Insert a '/' unless the first part is empty or already ends in '/'.
|
60 |
|
61 | def join(a, *p):
|
62 | """Join two or more pathname components, inserting '/' as needed.
|
63 | If any component is an absolute path, all previous path components
|
64 | will be discarded. An empty last part will result in a path that
|
65 | ends with a separator."""
|
66 | path = a
|
67 | for b in p:
|
68 | if b.startswith('/'):
|
69 | path = b
|
70 | elif path == '' or path.endswith('/'):
|
71 | path += b
|
72 | else:
|
73 | path += '/' + b
|
74 | return path
|
75 |
|
76 |
|
77 | # Split a path in head (everything up to the last '/') and tail (the
|
78 | # rest). If the path ends in '/', tail will be empty. If there is no
|
79 | # '/' in the path, head will be empty.
|
80 | # Trailing '/'es are stripped from head unless it is the root.
|
81 |
|
82 | def split(p):
|
83 | """Split a pathname. Returns tuple "(head, tail)" where "tail" is
|
84 | everything after the final slash. Either part may be empty."""
|
85 | i = p.rfind('/') + 1
|
86 | head, tail = p[:i], p[i:]
|
87 | if head and head != '/'*len(head):
|
88 | head = head.rstrip('/')
|
89 | return head, tail
|
90 |
|
91 |
|
92 | # Split a path in root and extension.
|
93 | # The extension is everything starting at the last dot in the last
|
94 | # pathname component; the root is everything before that.
|
95 | # It is always true that root + ext == p.
|
96 |
|
97 | def splitext(p):
|
98 | return genericpath._splitext(p, sep, altsep, extsep)
|
99 | splitext.__doc__ = genericpath._splitext.__doc__
|
100 |
|
101 | # Split a pathname into a drive specification and the rest of the
|
102 | # path. Useful on DOS/Windows/NT; on Unix, the drive is always empty.
|
103 |
|
104 | def splitdrive(p):
|
105 | """Split a pathname into drive and path. On Posix, drive is always
|
106 | empty."""
|
107 | return '', p
|
108 |
|
109 |
|
110 | # Return the tail (basename) part of a path, same as split(path)[1].
|
111 |
|
112 | def basename(p):
|
113 | """Returns the final component of a pathname"""
|
114 | i = p.rfind('/') + 1
|
115 | return p[i:]
|
116 |
|
117 |
|
118 | # Return the head (dirname) part of a path, same as split(path)[0].
|
119 |
|
120 | def dirname(p):
|
121 | """Returns the directory component of a pathname"""
|
122 | i = p.rfind('/') + 1
|
123 | head = p[:i]
|
124 | if head and head != '/'*len(head):
|
125 | head = head.rstrip('/')
|
126 | return head
|
127 |
|
128 |
|
129 | # Is a path a symbolic link?
|
130 | # This will always return false on systems where os.lstat doesn't exist.
|
131 |
|
132 | def islink(path):
|
133 | """Test whether a path is a symbolic link"""
|
134 | try:
|
135 | st = os.lstat(path)
|
136 | except (os.error, AttributeError):
|
137 | return False
|
138 | return stat.S_ISLNK(st.st_mode)
|
139 |
|
140 | # Being true for dangling symbolic links is also useful.
|
141 |
|
142 | def lexists(path):
|
143 | """Test whether a path exists. Returns True for broken symbolic links"""
|
144 | try:
|
145 | os.lstat(path)
|
146 | except os.error:
|
147 | return False
|
148 | return True
|
149 |
|
150 |
|
151 | # Are two filenames really pointing to the same file?
|
152 |
|
153 | def samefile(f1, f2):
|
154 | """Test whether two pathnames reference the same actual file"""
|
155 | s1 = os.stat(f1)
|
156 | s2 = os.stat(f2)
|
157 | return samestat(s1, s2)
|
158 |
|
159 |
|
160 | # Are two open files really referencing the same file?
|
161 | # (Not necessarily the same file descriptor!)
|
162 |
|
163 | def sameopenfile(fp1, fp2):
|
164 | """Test whether two open file objects reference the same file"""
|
165 | s1 = os.fstat(fp1)
|
166 | s2 = os.fstat(fp2)
|
167 | return samestat(s1, s2)
|
168 |
|
169 |
|
170 | # Are two stat buffers (obtained from stat, fstat or lstat)
|
171 | # describing the same file?
|
172 |
|
173 | def samestat(s1, s2):
|
174 | """Test whether two stat buffers reference the same file"""
|
175 | return s1.st_ino == s2.st_ino and \
|
176 | s1.st_dev == s2.st_dev
|
177 |
|
178 |
|
179 | # Is a path a mount point?
|
180 | # (Does this work for all UNIXes? Is it even guaranteed to work by Posix?)
|
181 |
|
182 | def ismount(path):
|
183 | """Test whether a path is a mount point"""
|
184 | if islink(path):
|
185 | # A symlink can never be a mount point
|
186 | return False
|
187 | try:
|
188 | s1 = os.lstat(path)
|
189 | s2 = os.lstat(realpath(join(path, '..')))
|
190 | except os.error:
|
191 | return False # It doesn't exist -- so not a mount point :-)
|
192 | dev1 = s1.st_dev
|
193 | dev2 = s2.st_dev
|
194 | if dev1 != dev2:
|
195 | return True # path/.. on a different device as path
|
196 | ino1 = s1.st_ino
|
197 | ino2 = s2.st_ino
|
198 | if ino1 == ino2:
|
199 | return True # path/.. is the same i-node as path
|
200 | return False
|
201 |
|
202 |
|
203 | # Directory tree walk.
|
204 | # For each directory under top (including top itself, but excluding
|
205 | # '.' and '..'), func(arg, dirname, filenames) is called, where
|
206 | # dirname is the name of the directory and filenames is the list
|
207 | # of files (and subdirectories etc.) in the directory.
|
208 | # The func may modify the filenames list, to implement a filter,
|
209 | # or to impose a different order of visiting.
|
210 |
|
211 | def walk(top, func, arg):
|
212 | """Directory tree walk with callback function.
|
213 |
|
214 | For each directory in the directory tree rooted at top (including top
|
215 | itself, but excluding '.' and '..'), call func(arg, dirname, fnames).
|
216 | dirname is the name of the directory, and fnames a list of the names of
|
217 | the files and subdirectories in dirname (excluding '.' and '..'). func
|
218 | may modify the fnames list in-place (e.g. via del or slice assignment),
|
219 | and walk will only recurse into the subdirectories whose names remain in
|
220 | fnames; this can be used to implement a filter, or to impose a specific
|
221 | order of visiting. No semantics are defined for, or required of, arg,
|
222 | beyond that arg is always passed to func. It can be used, e.g., to pass
|
223 | a filename pattern, or a mutable object designed to accumulate
|
224 | statistics. Passing None for arg is common."""
|
225 | warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.",
|
226 | stacklevel=2)
|
227 | try:
|
228 | names = os.listdir(top)
|
229 | except os.error:
|
230 | return
|
231 | func(arg, top, names)
|
232 | for name in names:
|
233 | name = join(top, name)
|
234 | try:
|
235 | st = os.lstat(name)
|
236 | except os.error:
|
237 | continue
|
238 | if stat.S_ISDIR(st.st_mode):
|
239 | walk(name, func, arg)
|
240 |
|
241 |
|
242 | # Expand paths beginning with '~' or '~user'.
|
243 | # '~' means $HOME; '~user' means that user's home directory.
|
244 | # If the path doesn't begin with '~', or if the user or $HOME is unknown,
|
245 | # the path is returned unchanged (leaving error reporting to whatever
|
246 | # function is called with the expanded path as argument).
|
247 | # See also module 'glob' for expansion of *, ? and [...] in pathnames.
|
248 | # (A function should also be defined to do full *sh-style environment
|
249 | # variable expansion.)
|
250 |
|
251 | def expanduser(path):
|
252 | """Expand ~ and ~user constructions. If user or $HOME is unknown,
|
253 | do nothing."""
|
254 | if not path.startswith('~'):
|
255 | return path
|
256 | i = path.find('/', 1)
|
257 | if i < 0:
|
258 | i = len(path)
|
259 | if i == 1:
|
260 | if 'HOME' not in os.environ:
|
261 | import pwd
|
262 | userhome = pwd.getpwuid(os.getuid()).pw_dir
|
263 | else:
|
264 | userhome = os.environ['HOME']
|
265 | else:
|
266 | import pwd
|
267 | try:
|
268 | pwent = pwd.getpwnam(path[1:i])
|
269 | except KeyError:
|
270 | return path
|
271 | userhome = pwent.pw_dir
|
272 | userhome = userhome.rstrip('/')
|
273 | return (userhome + path[i:]) or '/'
|
274 |
|
275 |
|
276 | # Expand paths containing shell variable substitutions.
|
277 | # This expands the forms $variable and ${variable} only.
|
278 | # Non-existent variables are left unchanged.
|
279 |
|
280 | _varprog = None
|
281 | _uvarprog = None
|
282 |
|
283 | def expandvars(path):
|
284 | """Expand shell variables of form $var and ${var}. Unknown variables
|
285 | are left unchanged."""
|
286 | global _varprog, _uvarprog
|
287 | if '$' not in path:
|
288 | return path
|
289 | if isinstance(path, _unicode):
|
290 | if not _uvarprog:
|
291 | import re
|
292 | _uvarprog = re.compile(ur'\$(\w+|\{[^}]*\})', re.UNICODE)
|
293 | varprog = _uvarprog
|
294 | encoding = sys.getfilesystemencoding()
|
295 | else:
|
296 | if not _varprog:
|
297 | import re
|
298 | _varprog = re.compile(r'\$(\w+|\{[^}]*\})')
|
299 | varprog = _varprog
|
300 | encoding = None
|
301 | i = 0
|
302 | while True:
|
303 | m = varprog.search(path, i)
|
304 | if not m:
|
305 | break
|
306 | i, j = m.span(0)
|
307 | name = m.group(1)
|
308 | if name.startswith('{') and name.endswith('}'):
|
309 | name = name[1:-1]
|
310 | if encoding:
|
311 | name = name.encode(encoding)
|
312 | if name in os.environ:
|
313 | tail = path[j:]
|
314 | value = os.environ[name]
|
315 | if encoding:
|
316 | value = value.decode(encoding)
|
317 | path = path[:i] + value
|
318 | i = len(path)
|
319 | path += tail
|
320 | else:
|
321 | i = j
|
322 | return path
|
323 |
|
324 |
|
325 | # Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B.
|
326 | # It should be understood that this may change the meaning of the path
|
327 | # if it contains symbolic links!
|
328 |
|
329 | def normpath(path):
|
330 | """Normalize path, eliminating double slashes, etc."""
|
331 | # Preserve unicode (if path is unicode)
|
332 | slash, dot = (u'/', u'.') if isinstance(path, _unicode) else ('/', '.')
|
333 | if path == '':
|
334 | return dot
|
335 | initial_slashes = path.startswith('/')
|
336 | # POSIX allows one or two initial slashes, but treats three or more
|
337 | # as single slash.
|
338 | if (initial_slashes and
|
339 | path.startswith('//') and not path.startswith('///')):
|
340 | initial_slashes = 2
|
341 | comps = path.split('/')
|
342 | new_comps = []
|
343 | for comp in comps:
|
344 | if comp in ('', '.'):
|
345 | continue
|
346 | if (comp != '..' or (not initial_slashes and not new_comps) or
|
347 | (new_comps and new_comps[-1] == '..')):
|
348 | new_comps.append(comp)
|
349 | elif new_comps:
|
350 | new_comps.pop()
|
351 | comps = new_comps
|
352 | path = slash.join(comps)
|
353 | if initial_slashes:
|
354 | path = slash*initial_slashes + path
|
355 | return path or dot
|
356 |
|
357 |
|
358 | def abspath(path):
|
359 | """Return an absolute path."""
|
360 | if not isabs(path):
|
361 | if isinstance(path, _unicode):
|
362 | cwd = os.getcwdu()
|
363 | else:
|
364 | cwd = os.getcwd()
|
365 | path = join(cwd, path)
|
366 | return normpath(path)
|
367 |
|
368 |
|
369 | # Return a canonical path (i.e. the absolute location of a file on the
|
370 | # filesystem).
|
371 |
|
372 | def realpath(filename):
|
373 | """Return the canonical path of the specified filename, eliminating any
|
374 | symbolic links encountered in the path."""
|
375 | path, ok = _joinrealpath('', filename, {})
|
376 | return abspath(path)
|
377 |
|
378 | # Join two paths, normalizing and eliminating any symbolic links
|
379 | # encountered in the second path.
|
380 | def _joinrealpath(path, rest, seen):
|
381 | if isabs(rest):
|
382 | rest = rest[1:]
|
383 | path = sep
|
384 |
|
385 | while rest:
|
386 | name, _, rest = rest.partition(sep)
|
387 | if not name or name == curdir:
|
388 | # current dir
|
389 | continue
|
390 | if name == pardir:
|
391 | # parent dir
|
392 | if path:
|
393 | path, name = split(path)
|
394 | if name == pardir:
|
395 | path = join(path, pardir, pardir)
|
396 | else:
|
397 | path = pardir
|
398 | continue
|
399 | newpath = join(path, name)
|
400 | if not islink(newpath):
|
401 | path = newpath
|
402 | continue
|
403 | # Resolve the symbolic link
|
404 | if newpath in seen:
|
405 | # Already seen this path
|
406 | path = seen[newpath]
|
407 | if path is not None:
|
408 | # use cached value
|
409 | continue
|
410 | # The symlink is not resolved, so we must have a symlink loop.
|
411 | # Return already resolved part + rest of the path unchanged.
|
412 | return join(newpath, rest), False
|
413 | seen[newpath] = None # not resolved symlink
|
414 | path, ok = _joinrealpath(path, os.readlink(newpath), seen)
|
415 | if not ok:
|
416 | return join(path, rest), False
|
417 | seen[newpath] = path # resolved symlink
|
418 |
|
419 | return path, True
|
420 |
|
421 |
|
422 | supports_unicode_filenames = (sys.platform == 'darwin')
|
423 |
|
424 | def relpath(path, start=curdir):
|
425 | """Return a relative version of a path"""
|
426 |
|
427 | if not path:
|
428 | raise ValueError("no path specified")
|
429 |
|
430 | start_list = [x for x in abspath(start).split(sep) if x]
|
431 | path_list = [x for x in abspath(path).split(sep) if x]
|
432 |
|
433 | # Work out how much of the filepath is shared by start and path.
|
434 | i = len(commonprefix([start_list, path_list]))
|
435 |
|
436 | rel_list = [pardir] * (len(start_list)-i) + path_list[i:]
|
437 | if not rel_list:
|
438 | return curdir
|
439 | return join(*rel_list)
|