Working with Paths

Intermediate ~30 min read

File paths are different on Windows (C:\Users\name) vs Unix (/home/name). Python provides tools to handle paths in a cross-platform way. The traditional os.path module uses string functions, while the modern pathlib module (Python 3.4+) treats paths as objects. Understanding both makes you prepared for any codebase!

The os.path Module

The os.path module has been Python's path-handling solution since the beginning. It provides functions to join, split, normalize, and query paths. The key function is os.path.join() which uses the correct separator for the operating system.

# os.path Module - Traditional Path Handling

import os
import os.path

# 1. Joining paths (cross-platform!)
print("=== os.path.join() ===")
path = os.path.join("folder", "subfolder", "file.txt")
print(f"Joined path: {path}")

# Works on any OS
base = os.path.join(os.path.expanduser("~"), "Documents")
print(f"Home docs: {base}")
print()

# 2. Splitting paths
print("=== Splitting Paths ===")
full_path = "/home/user/documents/report.pdf"

dirname = os.path.dirname(full_path)
basename = os.path.basename(full_path)
print(f"Directory: {dirname}")
print(f"Filename: {basename}")

# Split into (directory, filename)
head, tail = os.path.split(full_path)
print(f"split(): {head} + {tail}")

# Split extension
name, ext = os.path.splitext(full_path)
print(f"splitext(): {name} + {ext}")
print()

# 3. Path information
print("=== Path Information ===")
test_path = os.path.abspath(".")
print(f"Absolute path: {test_path}")
print(f"Current working directory: {os.getcwd()}")
print(f"Path exists: {os.path.exists(test_path)}")
print(f"Is file: {os.path.isfile(test_path)}")
print(f"Is directory: {os.path.isdir(test_path)}")
print()

# 4. Normalizing paths
print("=== Normalizing Paths ===")
messy = "folder//subfolder/../subfolder/./file.txt"
clean = os.path.normpath(messy)
print(f"Before: {messy}")
print(f"After:  {clean}")
print()

# 5. Getting file info
print("=== File Information ===")
if os.path.exists(__file__ if '__file__' in dir() else "."):
    sample = __file__ if '__file__' in dir() else "."
    print(f"Size: {os.path.getsize(sample)} bytes")
    import time
    mtime = os.path.getmtime(sample)
    print(f"Modified: {time.ctime(mtime)}")
print()

# 6. Common os.path functions summary
print("=== Common os.path Functions ===")
print("""
os.path.join(a, b)      - Join paths
os.path.split(path)     - Split into (dir, file)
os.path.splitext(path)  - Split into (name, .ext)
os.path.basename(path)  - Get filename
os.path.dirname(path)   - Get directory
os.path.exists(path)    - Check if exists
os.path.isfile(path)    - Check if file
os.path.isdir(path)     - Check if directory
os.path.abspath(path)   - Get absolute path
os.path.expanduser("~") - Expand ~ to home
os.path.getsize(path)   - Get file size
os.path.getmtime(path)  - Get modification time
""")

Output

Click Run to execute your code

Essential os.path Functions:
os.path.join(a, b) - Join paths with correct separator
os.path.split(path) - Split into (directory, filename)
os.path.splitext(path) - Split into (name, extension)
os.path.exists(path) - Check if path exists
os.path.isfile(path) / isdir(path) - Check type
os.path.abspath(path) - Get absolute path

The pathlib Module (Modern)

The pathlib module, introduced in Python 3.4, represents paths as objects with methods and properties. This is now the recommended approach for new code. Paths can be joined with the / operator, making code more readable and Pythonic.

# pathlib Module - Modern Path Handling (Python 3.4+)

from pathlib import Path

# 1. Creating Path objects
print("=== Creating Paths ===")
p = Path("folder/subfolder/file.txt")
print(f"Path: {p}")
print(f"Type: {type(p)}")

# Path from parts
p2 = Path("folder") / "subfolder" / "file.txt"
print(f"Using /: {p2}")

# Current directory
cwd = Path.cwd()
print(f"Current dir: {cwd}")

# Home directory
home = Path.home()
print(f"Home dir: {home}")
print()

# 2. Path components
print("=== Path Components ===")
p = Path("/home/user/documents/report.pdf")
print(f"Full path: {p}")
print(f"Parent: {p.parent}")
print(f"Name: {p.name}")
print(f"Stem: {p.stem}")
print(f"Suffix: {p.suffix}")
print(f"Parts: {p.parts}")
print()

# 3. Multiple parents
print("=== Parent Chain ===")
p = Path("/home/user/docs/project/file.py")
print(f"Path: {p}")
for i, parent in enumerate(p.parents):
    print(f"  Parent {i}: {parent}")
print()

# 4. Path properties
print("=== Path Properties ===")
p = Path(".")
print(f"Absolute: {p.absolute()}")
print(f"Resolved: {p.resolve()}")
print(f"Exists: {p.exists()}")
print(f"Is file: {p.is_file()}")
print(f"Is dir: {p.is_dir()}")
print()

# 5. Changing path parts
print("=== Modifying Paths ===")
p = Path("/home/user/document.txt")
print(f"Original: {p}")
print(f"New name: {p.with_name('report.txt')}")
print(f"New suffix: {p.with_suffix('.pdf')}")
print(f"New stem: {p.with_stem('data')}")
print()

# 6. Comparing pathlib to os.path
print("=== pathlib vs os.path ===")
print("""
pathlib                    os.path
----------------------------------------------
Path("a") / "b"            os.path.join("a", "b")
p.parent                   os.path.dirname(p)
p.name                     os.path.basename(p)
p.suffix                   os.path.splitext(p)[1]
p.stem                     os.path.splitext(basename)[0]
p.exists()                 os.path.exists(p)
p.is_file()                os.path.isfile(p)
p.is_dir()                 os.path.isdir(p)
p.resolve()                os.path.abspath(p)
Path.home()                os.path.expanduser("~")
p.stat().st_size           os.path.getsize(p)
""")

Output

Click Run to execute your code

Why Prefer pathlib?
- Object-oriented: methods on path objects, not functions
- Readable: Path("a") / "b" / "c" vs os.path.join("a", "b", "c")
- Convenient: properties like .name, .suffix, .parent
- Powerful: built-in glob(), read_text(), write_text()
- Type hints: better IDE support and static analysis

File and Directory Operations

Beyond just manipulating path strings, you'll often need to create directories, copy files, rename items, and delete things. The pathlib module handles most operations, but shutil is needed for copying files and recursively deleting directories.

# File and Directory Operations

from pathlib import Path
import shutil

# 1. Creating directories
print("=== Creating Directories ===")
# Single directory
Path("test_dir").mkdir(exist_ok=True)
print("Created: test_dir")

# Nested directories (like mkdir -p)
Path("test_dir/sub1/sub2").mkdir(parents=True, exist_ok=True)
print("Created: test_dir/sub1/sub2")
print()

# 2. Creating files
print("=== Creating Files ===")
# touch equivalent
Path("test_dir/file1.txt").touch()
print("Created: test_dir/file1.txt")

# Write and read with pathlib
p = Path("test_dir/file2.txt")
p.write_text("Hello from pathlib!")
print(f"Wrote: {p}")
content = p.read_text()
print(f"Read: {content}")
print()

# 3. Renaming and moving
print("=== Renaming Files ===")
src = Path("test_dir/file1.txt")
dst = Path("test_dir/renamed.txt")
src.rename(dst)
print(f"Renamed: {src} -> {dst}")
print()

# 4. Copying files (use shutil)
print("=== Copying Files ===")
shutil.copy("test_dir/file2.txt", "test_dir/file2_copy.txt")
print("Copied: file2.txt -> file2_copy.txt")

# Copy directory
shutil.copytree("test_dir/sub1", "test_dir/sub1_copy")
print("Copied directory: sub1 -> sub1_copy")
print()

# 5. Checking existence
print("=== Existence Checks ===")
p = Path("test_dir")
print(f"Exists: {p.exists()}")
print(f"Is file: {p.is_file()}")
print(f"Is directory: {p.is_dir()}")
print(f"Is symlink: {p.is_symlink()}")
print()

# 6. File stats
print("=== File Statistics ===")
p = Path("test_dir/file2.txt")
stat = p.stat()
print(f"Size: {stat.st_size} bytes")
print(f"Mode: {oct(stat.st_mode)}")

from datetime import datetime
mtime = datetime.fromtimestamp(stat.st_mtime)
print(f"Modified: {mtime}")
print()

# 7. Deleting files and directories
print("=== Deleting ===")
# Delete file
Path("test_dir/renamed.txt").unlink()
print("Deleted: renamed.txt")

# Delete empty directory
# Path("empty_dir").rmdir()  # Only works if empty

# Delete directory with contents
shutil.rmtree("test_dir")
print("Deleted: test_dir and all contents")
print()

# 8. Common operations summary
print("=== Operations Summary ===")
print("""
Path("dir").mkdir()          - Create directory
Path("dir").mkdir(parents=True)  - Create nested dirs
Path("file").touch()         - Create empty file
Path("file").write_text(s)   - Write string to file
Path("file").read_text()     - Read file as string
Path("file").write_bytes(b)  - Write bytes
Path("file").read_bytes()    - Read as bytes
Path("old").rename("new")    - Rename/move
Path("file").unlink()        - Delete file
Path("dir").rmdir()          - Delete empty directory
shutil.copy(src, dst)        - Copy file
shutil.copytree(src, dst)    - Copy directory
shutil.rmtree(path)          - Delete directory tree
""")

Output

Click Run to execute your code

Destructive Operations! Be careful with shutil.rmtree() - it recursively deletes everything without confirmation. Always double-check the path before deleting. Consider using send2trash package to move files to trash instead.

Directory Traversal

Finding files in a directory tree is a common task. Use iterdir() for immediate contents, glob() for pattern matching, and os.walk() when you need full control over the traversal. The ** pattern enables recursive searching.

# Directory Traversal and Pattern Matching

from pathlib import Path
import os

# Setup test structure
print("=== Creating Test Structure ===")
base = Path("test_project")
(base / "src").mkdir(parents=True, exist_ok=True)
(base / "tests").mkdir(exist_ok=True)
(base / "docs").mkdir(exist_ok=True)

# Create sample files
(base / "README.md").touch()
(base / "setup.py").touch()
(base / "src" / "main.py").touch()
(base / "src" / "utils.py").touch()
(base / "src" / "config.json").touch()
(base / "tests" / "test_main.py").touch()
(base / "tests" / "test_utils.py").touch()
(base / "docs" / "guide.md").touch()
print("Created test project structure")
print()

# 1. iterdir() - list directory contents
print("=== iterdir() - List Contents ===")
for item in base.iterdir():
    item_type = "DIR" if item.is_dir() else "FILE"
    print(f"  [{item_type}] {item.name}")
print()

# 2. glob() - pattern matching
print("=== glob() - Find Files ===")
print("All .py files:")
for py_file in base.glob("*.py"):
    print(f"  {py_file}")

print("\nAll .py files (recursive):")
for py_file in base.glob("**/*.py"):
    print(f"  {py_file}")

print("\nAll .md files (recursive):")
for md_file in base.glob("**/*.md"):
    print(f"  {md_file}")
print()

# 3. rglob() - recursive glob shortcut
print("=== rglob() - Recursive Search ===")
print("All test files:")
for test_file in base.rglob("test_*.py"):
    print(f"  {test_file}")
print()

# 4. os.walk() - traverse directory tree
print("=== os.walk() - Walk Directory Tree ===")
for dirpath, dirnames, filenames in os.walk(base):
    level = dirpath.replace(str(base), "").count(os.sep)
    indent = "  " * level
    print(f"{indent}{Path(dirpath).name}/")
    for filename in filenames:
        print(f"{indent}  {filename}")
print()

# 5. Filter by criteria
print("=== Custom Filtering ===")
print("Files larger than 0 bytes or recently modified:")
for path in base.rglob("*"):
    if path.is_file():
        stat = path.stat()
        print(f"  {path.name}: {stat.st_size} bytes")
print()

# 6. Practical example: find all Python files
print("=== Find Python Files ===")
def find_python_files(directory):
    """Find all Python files in directory."""
    root = Path(directory)
    return list(root.rglob("*.py"))

python_files = find_python_files(base)
print(f"Found {len(python_files)} Python files:")
for f in python_files:
    print(f"  {f.relative_to(base)}")
print()

# 7. Cleanup
import shutil
shutil.rmtree(base)
print("(Cleaned up test structure)")
print()

# 8. Traversal methods summary
print("=== Traversal Summary ===")
print("""
path.iterdir()          - List immediate contents
path.glob("*.txt")      - Match pattern in directory
path.glob("**/*.txt")   - Match pattern recursively
path.rglob("*.txt")     - Shortcut for **/*.txt
os.walk(path)           - Walk entire tree

Common glob patterns:
  *           - Any characters (not /)
  **          - Any path (recursive)
  ?           - Single character
  [abc]       - Character set
  *.py        - Python files
  **/test_*   - All test files anywhere
""")

Output

Click Run to execute your code

Glob Patterns:
* - Match any characters (except path separator)
** - Match any path (recursive, crosses directories)
? - Match single character
[abc] - Match character set
*.py - All Python files in current directory
**/*.py - All Python files recursively

Common Mistakes

1. Hardcoding path separators

# Wrong - breaks on Windows!
path = "folder/subfolder/file.txt"
path = "folder" + "/" + "file.txt"

# Correct - use os.path.join or pathlib
import os
path = os.path.join("folder", "subfolder", "file.txt")

from pathlib import Path
path = Path("folder") / "subfolder" / "file.txt"

2. Not checking if path exists before operating

# Wrong - crashes if file doesn't exist!
from pathlib import Path
p = Path("maybe_exists.txt")
content = p.read_text()  # FileNotFoundError!

# Correct - check first
if p.exists():
    content = p.read_text()
else:
    content = ""

# Or use try/except
try:
    content = p.read_text()
except FileNotFoundError:
    content = ""

3. Forgetting parents=True for nested directories

# Wrong - fails if parent doesn't exist!
from pathlib import Path
Path("new/nested/dir").mkdir()  # FileNotFoundError!

# Correct - create parents too
Path("new/nested/dir").mkdir(parents=True)

# And exist_ok to avoid error if exists
Path("new/nested/dir").mkdir(parents=True, exist_ok=True)

4. Using strings instead of Path objects

# Mixing strings and Paths can cause issues
from pathlib import Path

# Wrong - string concatenation
base = Path("/home/user")
full = str(base) + "/file.txt"  # String, not Path!

# Correct - use / operator
full = base / "file.txt"  # Still a Path object

# Or convert at the end if needed
path_str = str(base / "file.txt")

5. Forgetting glob returns an iterator

# Wrong - iterator exhausted after first use!
from pathlib import Path
files = Path(".").glob("*.py")
print(f"Count: {len(list(files))}")
for f in files:  # Empty! Iterator already consumed
    print(f)

# Correct - convert to list first
files = list(Path(".").glob("*.py"))
print(f"Count: {len(files)}")
for f in files:
    print(f)

Exercise: File Organizer

Task: Create a function that organizes files into folders by extension.

Requirements:

Use pathlib for all path operations
Find all files in a directory (not subdirectories)
Group them by extension (.py, .txt, etc.)
Move each file to a folder named after its extension

Output

Click Run to execute your code

Show Solution

from pathlib import Path
import shutil

def organize_by_extension(directory):
    """Organize files into folders by their extension."""
    base = Path(directory)

    # Find all files (not directories)
    files = [f for f in base.iterdir() if f.is_file()]

    for file in files:
        # Get extension without dot, or 'no_extension'
        ext = file.suffix[1:] if file.suffix else "no_extension"

        # Create extension folder
        ext_folder = base / ext
        ext_folder.mkdir(exist_ok=True)

        # Move file to extension folder
        dest = ext_folder / file.name
        file.rename(dest)
        print(f"Moved: {file.name} -> {ext}/{file.name}")


# Test it
test_dir = Path("test_organize")
test_dir.mkdir(exist_ok=True)

# Create sample files
(test_dir / "script.py").touch()
(test_dir / "utils.py").touch()
(test_dir / "data.txt").touch()
(test_dir / "notes.txt").touch()
(test_dir / "image.png").touch()
(test_dir / "README").touch()

print("Before organizing:")
for f in test_dir.iterdir():
    print(f"  {f.name}")
print()

organize_by_extension(test_dir)
print()

print("After organizing:")
for item in sorted(test_dir.iterdir()):
    if item.is_dir():
        print(f"  {item.name}/")
        for f in item.iterdir():
            print(f"    {f.name}")

# Cleanup
shutil.rmtree(test_dir)

Summary

os.path.join(): Join paths with correct separator
pathlib.Path: Modern OOP approach to paths
Path / operator: Path("a") / "b" joins paths
Path properties: .name, .suffix, .parent, .stem
Path methods: .exists(), .is_file(), .is_dir()
Create dirs: Path.mkdir(parents=True, exist_ok=True)
File ops: .read_text(), .write_text(), .touch()
Traversal: .iterdir(), .glob(), .rglob()
Copying: Use shutil.copy(), shutil.copytree()
Deleting: .unlink() for files, shutil.rmtree() for dirs

What's Next?

Now that you can navigate the filesystem, let's learn about CSV files - one of the most common data formats. Python's csv module makes it easy to read and write spreadsheet-like data, and you'll see how to handle headers, different delimiters, and common pitfalls!

Previous Context Managers

Next CSV Files

The os.path Module

The pathlib Module (Modern)

File and Directory Operations

Directory Traversal

Common Mistakes

1. Hardcoding path separators

2. Not checking if path exists before operating

3. Forgetting parents=True for nested directories

4. Using strings instead of Path objects

5. Forgetting glob returns an iterator

Exercise: File Organizer

Summary

What's Next?

Enjoying these tutorials?