##### Setup

In [16]:
from pathlib import Path

p = Path('my_directory')
p.mkdir(exist_ok=True)
fnames = ['a.txt','b.txt','c.txt','d.dat']
for fname in fnames:
    with open(p / fname, 'w') as f:
        f.write("Test\n")
(p / 'e').mkdir(exist_ok=True)
(p / 'z').mkdir(exist_ok=True)
(p / 'y' / 'x').mkdir(parents=True, exist_ok=True)
(p / 'u' / 'v').mkdir(parents=True, exist_ok=True)

### Finding Files

In [17]:
import os
import fnmatch
for entry in os.scandir('my_directory/'):
#     print(entry.name)
    if fnmatch.fnmatch(entry.name
                       , '*.txt'):
        print(entry.name)

c.txt
b.txt
a.txt


In [22]:
import glob
glob.glob('my_directory/*.txt')

['my_directory/c.txt', 'my_directory/b.txt', 'my_directory/a.txt']

In [20]:
%%writefile my_directory/u/v/z.txt
Testing

Writing my_directory/u/v/z.txt


In [23]:
import glob
glob.glob('my_directory/**/*.txt',recursive=True)

['my_directory/c.txt',
 'my_directory/b.txt',
 'my_directory/a.txt',
 'my_directory/u/v/z.txt']

In [24]:
from pathlib import Path
p = Path('my_directory')
for matching_path in p.glob('*.d*'):
    print(matching_path.name)

d.dat


In [25]:
from pathlib import Path
p = Path('my_directory')
for name in p.glob('**/*.txt'):
    print(name)

my_directory/c.txt
my_directory/b.txt
my_directory/a.txt
my_directory/u/v/z.txt


In [26]:
from pathlib import Path
path = Path('my_directory/')
for entry in path.iterdir():
    if entry.is_file():
        print(entry.stem, ':', entry.suffix)

c : .txt
b : .txt
a : .txt
d : .dat


In [30]:
subdir = list((path / "u").iterdir())

[PosixPath('my_directory/u/v')]

In [31]:
e_path = os.path.join("my_directory", "e") # "my_directory/e"

'my_directory/e'

In [32]:
os.path.splitext('d.csv')

('d', '.csv')

In [33]:
p = Path('d.csv.gz')

PosixPath('d.csv.gz')

In [35]:
p.suffix

'.gz'

In [34]:
p.stem, p.suffixes

('d.csv', ['.csv', '.gz'])

In [36]:
for dirpath, dirnames, files in os.walk('my_directory'):
    print(f'Walking directory: {dirpath}')
    for file_name in files:
        print(file_name)
    for dir_name in dirnames:
        print("DIR:", dir_name)

Walking directory: my_directory
c.txt
b.txt
a.txt
d.dat
DIR: u
DIR: z
DIR: e
DIR: y
Walking directory: my_directory/u
DIR: v
Walking directory: my_directory/u/v
z.txt
Walking directory: my_directory/z
Walking directory: my_directory/e
Walking directory: my_directory/y
DIR: x
Walking directory: my_directory/y/x


In [37]:
from tempfile import TemporaryFile
with TemporaryFile('w+t') as fp:
    fp.write('Hello universe!')
    # call function
    fp.seek(0)
    val = fp.read()
    # File is now closed and removed
val

'Hello universe!'

### Removing Files and Directories

In [41]:
%%writefile my_directory/e/f.txt
When will the traffic lights on Lincoln Highway be synced better?

Writing my_directory/e/f.txt


In [42]:
from pathlib import Path
Path('my_directory/e/f.txt').unlink()

In [43]:
for dirpath, dirnames, files in os.walk('my_directory'):
    print(f'Found directory: {dirpath}')
    for file_name in files:
        print(file_name)

Found directory: my_directory
c.txt
b.txt
a.txt
d.dat
Found directory: my_directory/u
Found directory: my_directory/u/v
z.txt
Found directory: my_directory/z
Found directory: my_directory/e
Found directory: my_directory/y
Found directory: my_directory/y/x


In [44]:
os.rmdir('my_directory/e')

In [45]:
from pathlib import Path
path = Path('my_directory/')
for entry in path.iterdir():
    print(entry.stem, ':', entry.suffix)

u : 
z : 
c : .txt
b : .txt
a : .txt
d : .dat
y : 


In [46]:
os.rmdir('my_directory/u')

OSError: [Errno 66] Directory not empty: 'my_directory/u'

In [47]:
import shutil
shutil.rmtree('my_directory/u')

In [48]:
for dirpath, dirnames, files in os.walk('my_directory'):
    print(f'Found directory: {dirpath}')
    for file_name in files:
        print(file_name)

Found directory: my_directory
c.txt
b.txt
a.txt
d.dat
Found directory: my_directory/z
Found directory: my_directory/y
Found directory: my_directory/y/x


### psutil

In [49]:
import psutil

psutil.cpu_count()

14

In [50]:
psutil.cpu_percent()

16.5

In [51]:
m = psutil.virtual_memory()

svmem(total=38654705664, available=10138124288, percent=73.8, used=14122156032, free=423460864, active=9708617728, inactive=9624977408, wired=4413538304)

In [52]:
# physical memory in GB
m.total / 2**30

36.0

In [53]:
psutil.disk_partitions()

[sdiskpart(device='/dev/disk3s1s1', mountpoint='/', fstype='apfs', opts='ro,local,rootfs,dovolfs,journaled,multilabel'),
 sdiskpart(device='/dev/disk3s6', mountpoint='/System/Volumes/VM', fstype='apfs', opts='rw,noexec,local,dovolfs,dontbrowse,journaled,multilabel,noatime'),
 sdiskpart(device='/dev/disk3s2', mountpoint='/System/Volumes/Preboot', fstype='apfs', opts='rw,local,dovolfs,dontbrowse,journaled,multilabel'),
 sdiskpart(device='/dev/disk3s4', mountpoint='/System/Volumes/Update', fstype='apfs', opts='rw,local,dovolfs,dontbrowse,journaled,multilabel'),
 sdiskpart(device='/dev/disk2s2', mountpoint='/System/Volumes/xarts', fstype='apfs', opts='rw,noexec,local,dovolfs,dontbrowse,journaled,multilabel,noatime'),
 sdiskpart(device='/dev/disk2s1', mountpoint='/System/Volumes/iSCPreboot', fstype='apfs', opts='rw,local,dovolfs,dontbrowse,journaled,multilabel'),
 sdiskpart(device='/dev/disk2s3', mountpoint='/System/Volumes/Hardware', fstype='apfs', opts='rw,local,dovolfs,dontbrowse,journal

In [54]:
psutil.pids()

[0,
 1,
 521,
 523,
 525,
 526,
 529,
 532,
 534,
 535,
 539,
 543,
 545,
 549,
 551,
 552,
 556,
 559,
 560,
 562,
 563,
 564,
 565,
 566,
 569,
 571,
 572,
 574,
 578,
 579,
 580,
 583,
 584,
 586,
 587,
 588,
 591,
 593,
 594,
 595,
 596,
 597,
 599,
 600,
 601,
 602,
 604,
 605,
 608,
 610,
 625,
 627,
 634,
 637,
 639,
 642,
 643,
 644,
 647,
 650,
 654,
 667,
 686,
 691,
 692,
 693,
 698,
 700,
 710,
 713,
 714,
 715,
 725,
 732,
 748,
 756,
 763,
 764,
 767,
 770,
 819,
 825,
 826,
 841,
 893,
 918,
 929,
 935,
 954,
 970,
 974,
 977,
 997,
 1002,
 1004,
 1006,
 1007,
 1008,
 1011,
 1015,
 1016,
 1022,
 1023,
 1032,
 1041,
 1045,
 1046,
 1048,
 1053,
 1058,
 1061,
 1069,
 1072,
 1080,
 1088,
 1091,
 1094,
 1097,
 1105,
 1109,
 1114,
 1127,
 1129,
 1133,
 1157,
 1164,
 1166,
 1170,
 1171,
 1184,
 1207,
 1213,
 1235,
 1250,
 1306,
 1321,
 1341,
 1351,
 1375,
 1378,
 1379,
 1381,
 1389,
 1390,
 1391,
 1392,
 1393,
 1396,
 1399,
 1400,
 1403,
 1404,
 1407,
 1410,
 1417,
 1418,
 1421

In [55]:
p = psutil.Process(96764)

psutil.Process(pid=96764, name='gitstatusd-darwin-arm64', status='running', started='2025-10-24 08:53:58')

In [56]:
p.memory_info()

pmem(rss=507904, vms=420684611584, pfaults=1723, pageins=101)

In [57]:
conns = psutil.net_connections(kind='inet')

AccessDenied: (pid=92415)

In [58]:
import getpass

current_user = getpass.getuser()

for proc in psutil.process_iter(['pid', 'name', 'username']):
    try:
        # Check if the process belongs to the current user
        if proc.info['username'] == current_user and 'python3.13' in proc.info['name']:
            print(proc)
    except Exception as e:
        print(e)


psutil.Process(pid=977, name='python3.13', status='running', started='2025-10-24 08:55:55')
psutil.Process(pid=1048, name='python3.13', status='running', started='2025-10-24 08:55:55')
psutil.Process(pid=24441, name='python3.13', status='running', started='2025-10-23 14:15:41')
psutil.Process(pid=29588, name='python3.13', status='running', started='09:12:17')
psutil.Process(pid=30364, name='python3.13', status='running', started='2025-10-19 23:57:02')
psutil.Process(pid=30722, name='python3.13', status='running', started='2025-10-28 13:45:45')
psutil.Process(pid=30766, name='python3.13', status='running', started='2025-10-19 23:57:04')
psutil.Process(pid=33028, name='python3.13', status='running', started='09:13:44')
psutil.Process(pid=33416, name='python3.13', status='running', started='2025-10-28 13:46:59')
psutil.Process(pid=46953, name='python3.13', status='running', started='09:19:55')
psutil.Process(pid=50507, name='python3.13', status='running', started='09:21:29')
psutil.Proces

In [59]:
p = psutil.Process(91482)

psutil.Process(pid=91482, name='python3.13', status='running', started='2025-10-20 15:39:57')

In [60]:
p.memory_info()

pmem(rss=6537216, vms=421514035200, pfaults=1206221, pageins=74)

In [61]:
p.net_connections()

[pconn(fd=14, family=<AddressFamily.AF_INET: 2>, type=<SocketKind.SOCK_STREAM: 1>, laddr=addr(ip='127.0.0.1', port=9012), raddr=(), status='LISTEN'),
 pconn(fd=17, family=<AddressFamily.AF_INET: 2>, type=<SocketKind.SOCK_STREAM: 1>, laddr=addr(ip='127.0.0.1', port=9013), raddr=(), status='LISTEN'),
 pconn(fd=20, family=<AddressFamily.AF_INET: 2>, type=<SocketKind.SOCK_STREAM: 1>, laddr=addr(ip='127.0.0.1', port=9011), raddr=(), status='LISTEN'),
 pconn(fd=25, family=<AddressFamily.AF_INET: 2>, type=<SocketKind.SOCK_STREAM: 1>, laddr=addr(ip='127.0.0.1', port=61083), raddr=addr(ip='127.0.0.1', port=9012), status='ESTABLISHED'),
 pconn(fd=26, family=<AddressFamily.AF_INET: 2>, type=<SocketKind.SOCK_STREAM: 1>, laddr=addr(ip='127.0.0.1', port=9012), raddr=addr(ip='127.0.0.1', port=61083), status='ESTABLISHED'),
 pconn(fd=32, family=<AddressFamily.AF_INET: 2>, type=<SocketKind.SOCK_STREAM: 1>, laddr=addr(ip='127.0.0.1', port=9014), raddr=(), status='LISTEN'),
 pconn(fd=38, family=<AddressF

### Threading

In [2]:
import threading

def printer(num):
    print(num)

for i in range(10):
    t = threading.Thread(target=printer, args=(i,))
    t.start()

01

2
3
4
5
6
7
8
9


In [3]:
my_lock = threading.Lock()

def printer(num):
    with my_lock:
        print(num)

for i in range(5):
    t = threading.Thread(target=printer, args=(i,))
    t.start()

0
2
1
3
4


In [4]:
import concurrent.futures
import time
import random

# my_lock = threading.Lock()
def printer(num):
    # with my_lock:
    time.sleep(random.randint(0,4)/4)
    print(num)
    return num ** 2

with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
    results = executor.map(printer, range(10))

1
0
5
2
3
4
6
7
8
9


In [5]:
list(results)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [6]:
import concurrent.futures

my_lock = threading.Lock()
def printer(num1, num2):
    with my_lock:
        print(num1, num2)
    return num1

with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
    results = executor.map(printer, range(5), range(4,-1,-1))

0 4
3 1
2 2
1 3
4 0


In [7]:
list(results)

[0, 1, 2, 3, 4]

### Multiprocessing

In [None]:
# !!! will not run in the notebook !!!

# import multiprocessing

# def printer(num):
#     print(num)
    
# with multiprocessing.Pool() as pool:
#     pool.map(printer, range(5))

In [8]:
%%writefile test-multiprocessing.py
import multiprocessing
import time

def square(num):
    time.sleep(2)
    return num * num
    
if __name__ == '__main__':
    __spec__ = None    
    with multiprocessing.Pool(5) as pool:
        result = pool.map(square, range(5))
        print(result)

Overwriting test-multiprocessing.py


In [9]:
%run test-multiprocessing.py

[0, 1, 4, 9, 16]


In [10]:
import concurrent.futures
import multiprocessing as mp
import time

def dummy(num):
    time.sleep(0.5)
    return num ** 2

start_time = time.time()
with concurrent.futures.ProcessPoolExecutor(max_workers=5, mp_context=mp.get_context('fork')) as executor:
    results = executor.map(dummy, range(10))
print("Total Time:", time.time() - start_time)

Total Time: 1.041306972503662


In [11]:
for r in results:
    print(r)

0
1
4
9
16
25
36
49
64
81


### Comparing single-thread, multi-thread, and asyncio

[Example by J. Anderson](https://realpython.com/python-concurrency/#how-to-speed-up-an-io-bound-program)

In [12]:
# https://realpython.com/python-concurrency/#how-to-speed-up-an-io-bound-program

import requests
import time

def download_site(url, session):
    with session.get(url) as response:
        # print(f"Read {len(response.content)} from {url}")
        pass

def download_all_sites(sites):
    with requests.Session() as session:
        for url in sites:
            download_site(url, session)

if __name__ == "__main__":
    sites = [
        "https://www.jython.org",
        "http://olympus.realpython.org/dice",
    ] * 80
    start_time = time.time()
    download_all_sites(sites)
    duration = time.time() - start_time
    print(f"Downloaded {len(sites)} in {duration} seconds")

Downloaded 160 in 4.613065004348755 seconds


In [13]:
# https://realpython.com/python-concurrency/#how-to-speed-up-an-io-bound-program

import concurrent.futures
import requests
import threading
import time

thread_local = threading.local()

def get_session():
    if not hasattr(thread_local, "session"):
        thread_local.session = requests.Session()
    return thread_local.session

def download_site(url):
    session = get_session()
    with session.get(url) as response:
        # print(f"Read {len(response.content)} from {url}")
        pass

def download_all_sites(sites):
    with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
        executor.map(download_site, sites)

if __name__ == "__main__":
    sites = [
        "https://www.jython.org",
        "http://olympus.realpython.org/dice",
    ] * 80
    start_time = time.time()
    download_all_sites(sites)
    duration = time.time() - start_time
    print(f"Downloaded {len(sites)} in {duration} seconds")

Downloaded 160 in 0.9302489757537842 seconds


In [14]:
# !pip install nest_asyncio
import nest_asyncio

nest_asyncio.apply()

In [15]:
# https://realpython.com/python-concurrency/#how-to-speed-up-an-io-bound-program

import asyncio
import time
import aiohttp

async def download_site(session, url):
    async with session.get(url) as response:
        # print("Read {0} from {1}".format(response.content_length, url))
        pass

async def download_all_sites(sites):
    async with aiohttp.ClientSession() as session:
        tasks = []
        for url in sites:
            task = asyncio.ensure_future(download_site(session, url))
            tasks.append(task)
        await asyncio.gather(*tasks, return_exceptions=True)

if __name__ == "__main__":
    sites = [
        "https://www.jython.org",
        "http://olympus.realpython.org/dice",
    ] * 80
    start_time = time.time()
    asyncio.get_event_loop().run_until_complete(download_all_sites(sites))
    duration = time.time() - start_time
    print(f"Downloaded {len(sites)} sites in {duration} seconds")

Downloaded 160 sites in 0.1838667392730713 seconds


In [16]:
# this is not a function call, creates a coroutine object!
download_site("foo", "bar")

<coroutine object download_site at 0x107c407c0>