## String Comparisons

In [79]:
'abi' < 'aef'

True

In [80]:
'a' < 'a'

False

In [81]:
'Z' < 'a'

True

In [82]:
ord('Z'), ord('a')

(90, 97)

## Formatted Strings

In [1]:
# formatted string literal (f-string)
first_name = "Bob"
last_name = "Smith"
f"My name is {last_name}, {first_name}"

'My name is Smith, Bob'

In [2]:
# .format (empty)
"My name is {} {}".format(first_name, last_name)

'My name is Bob Smith'

In [3]:
# .format (position)
"My name is {1}, {1} {0}".format(last_name, first_name)

'My name is Bob, Bob Smith'

In [4]:
# .format (kwarg)
"My name is {first_name} {last_name}".format(first_name=first_name, last_name=last_name)

'My name is Bob Smith'

In [5]:
# no space before positive number
print(f'{27}\n{-27}')

27
-27


In [6]:
# space before positive number
print(f'{27: d}\n{-27: d}')

 27
-27


In [7]:
# plus sign before positive number
print(f'{27:+d}\n{-27:+d}')

+27
-27


In [8]:
f'{"a string":<12s}'

'a string    '

In [9]:
f'{"a string":>12s}'

'    a string'

In [12]:
f'{"a string":^12s}'

'  a string  '

In [23]:
f'{"a really long string":^12s}'

'a really long string'

In [14]:
f'{"a really long string"[:12]:^12s}'

'a really lon'

In [15]:
f'{"a string"}'

'a string'

In [16]:
num = 34
f'{{bracket}} {num}'

'{bracket} 34'

In [17]:
val = 67.2318809238409234
f"{val:.2f}"

'67.23'

In [18]:
val = 67.2318809238409234
f"{val:.2e}"

'6.72e+01'

In [19]:
val

67.23188092384092

In [20]:
f'[{27:10}]'

'[        27]'

In [21]:
f'[{"27":10}]'

'[27        ]'

In [24]:
f"{12.34}"

'12.34'

In [25]:
f"{1.34e123}"

'1.34e+123'

In [26]:
f"{12.34:e}"

'1.234000e+01'

In [27]:
f"{1.34e123:f}"

'1340000000000000125722305548847985105555788496070772977344360517543929764150412311353189102690429605927633722566001385537536.000000'

## Raw Strings

In [28]:
# normal
s = '\\n is the way you write a newline, \\\\ for \\.'

'\\n is the way you write a newline, \\\\ for \\.'

In [29]:
print(s)

\n is the way you write a newline, \\ for \.


In [30]:
# with raw strings
r"\n is the way you write a newline, \\ for \."

'\\n is the way you write a newline, \\\\ for \\.'

In [None]:
import re
re.match(r'\d+/\d+/\d+','12/31/2012 is a date.')

## Regular Expressions

In [31]:
import re

In [33]:
s0 = "No full dates here, just 02/15"
s1 = "02/14/2024 is a date"
s2 = "Another date is 12/25/2024"
s3 = "Halloween is 10/31/2024 and Thanksgiving is 11/24/2024"

'Halloween is 10/31/2024 and Thanksgiving is 11/24/2024'

In [34]:
re.match(r'\d+/\d+/\d+',s0)

In [35]:
re.match(r'\d+/\d+/\d+',s1)

<re.Match object; span=(0, 10), match='02/14/2024'>

In [38]:
if re.match(r'\d+/\d+/\d+',s0) is not None:
    print("GOT A DATE")
else:
    print("NO DATE")

NO DATE


In [39]:
re.match(r'\d+/\d+/\d+',s2)

In [40]:
re.search(r'\d+/\d+/\d+',s2)

<re.Match object; span=(16, 26), match='12/25/2024'>

In [44]:
re.search(r'\d+/\d+/\d+',s0)

In [45]:
re.search(r'\d+/\d+/\d+',s3)

<re.Match object; span=(13, 23), match='10/31/2024'>

In [46]:
re.findall(r'\d+/\d+/\d+',s3)

['10/31/2024', '11/24/2024']

In [47]:
for m in re.finditer(r'\d+/\d+/\d+',s3):
    print(m)

<re.Match object; span=(13, 23), match='10/31/2024'>
<re.Match object; span=(44, 54), match='11/24/2024'>


### Groups in Regex

In [48]:
match = re.search(r'\d+/\d+/\d+',s3)

<re.Match object; span=(13, 23), match='10/31/2024'>

In [49]:
match.group(0)

'10/31/2024'

In [50]:
s5 = "Halloween is 10/31/2024 and Thanksgiving is 11/28/2024"
re.findall(r'\d+/\d+/\d+',s5)

['10/31/2024', '11/28/2024']

In [51]:
for match in re.finditer(r'\d+/\d+/\d+',s3):
    print(match.group(0))

10/31/2024
11/24/2024


In [52]:
s5 = "Halloween is 10/31/2024/11/28 and Thanksgiving is 11/28/2024"
re.findall(r'\d+/\d+/\d+',s5)

['10/31/2024', '11/28/2024']

In [53]:
for match in re.finditer(r'(\d+)/(\d+)/(\d+)',s3):
    print(match.groups())

('10', '31', '2024')
('11', '24', '2024')


In [56]:
for match in re.finditer(r'(\d+)/(\d+)/(\d+)',s3):
    print(match.group(1))

10
11


In [57]:
for match in re.finditer(r'(\d+)/(\d+)/(\d+)',s3):
    print(match.group(1), match.group(3))

10 2024
11 2024


In [58]:
s21 = "Date 3/1/2024 and 10/31/2024"
for match in re.finditer(r'(\d+)/(\d+)/(\d+)',s21):
    print('{2}-{0:02d}-{1:02d}'.format(*[int(x) for x in match.groups()]))

2024-03-01
2024-10-31


In [None]:
s3

In [59]:
re.sub(r'(\d+)/(\d+)/(\d+)',r'a holiday',s3)

'Halloween is a holiday and Thanksgiving is a holiday'

In [60]:
re.sub(r'(\d+)/(\d+)/(\d+)',r'\3-\1-\2',s3)

'Halloween is 2024-10-31 and Thanksgiving is 2024-11-24'

In [61]:
s6 = "New Years Day was 1/1/2024"
re.sub(r'(\d+)/(\d+)/(\d+)',r'\3-\1-\2',s6)

'New Years Day was 2024-1-1'

In [63]:
re.sub(r'(\d+)/(\d+)/(\d+)',lambda m: 
       f'{m.group(3)}-{m.group(1):02s}-{m.group(2):02s}',s6)

'New Years Day was 2024-10-10'

In [64]:
re.sub(r'(\d+)/(\d+)/(\d+)',lambda m: 
       f'{m.group(3)}-{m.group(1):>02}-{int(m.group(2)):02d}',s6)

'New Years Day was 2024-01-01'

In [66]:
re.sub(r'(\d+)/(\d+)/(\d+)',lambda m: 
       f'{m.group(3)}-{int(m.group(1)):02d}-{int(m.group(2)):02d}',s6)

'New Years Day was 2024-01-01'

## Files

In [67]:
# print the whole file
# !cat huck-finn.txt

﻿

The Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete
by Mark Twain (Samuel Clemens)

This eBook is for the use of anyone anywhere at no cost and with almost
no restrictions whatsoever. You may copy it, give it away or re-use
it under the terms of the Project Gutenberg License included with this
eBook or online at www.gutenberg.net



In [None]:
f = open('huck-finn.txt', 'r')
for line in f:
    if 'Huckleberry' in line:
        print(line)

In [71]:
f = open('huck-finn.txt')
for i, line in enumerate(f):
    print(line.strip())
    if i > 20:
        break
    # if 'Huckleberry' in line:
    #     print(line.strip())

﻿

The Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete
by Mark Twain (Samuel Clemens)

This eBook is for the use of anyone anywhere at no cost and with almost
no restrictions whatsoever. You may copy it, give it away or re-use
it under the terms of the Project Gutenberg License included with this
eBook or online at www.gutenberg.net

Title: Adventures of Huckleberry Finn, Complete

Author: Mark Twain (Samuel Clemens)

Release Date: August 20, 2006 [EBook #76]

Last Updated: April 18, 2015]

Language: English


*** START OF THIS PROJECT GUTENBERG EBOOK HUCKLEBERRY FINN ***


In [70]:
f = open('huck-finn.txt')
for i, line in enumerate(f):
    print(line, end="")
    if i > 20:
        break
    # if 'Huckleberry' in line:
    #     print(line.strip())

﻿

The Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete
by Mark Twain (Samuel Clemens)

This eBook is for the use of anyone anywhere at no cost and with almost
no restrictions whatsoever. You may copy it, give it away or re-use
it under the terms of the Project Gutenberg License included with this
eBook or online at www.gutenberg.net

Title: Adventures of Huckleberry Finn, Complete

Author: Mark Twain (Samuel Clemens)

Release Date: August 20, 2006 [EBook #76]

Last Updated: April 18, 2015]

Language: English


*** START OF THIS PROJECT GUTENBERG EBOOK HUCKLEBERRY FINN ***


In [74]:
f = open('huck-finn.txt')
for i, line in enumerate(f):
    if 'Huckleberry' in line:
        print(line.strip())

The Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete
Title: Adventures of Huckleberry Finn, Complete
"Don't put your feet up there, Huckleberry;" and "Don't scrunch up
like that, Huckleberry--set up straight;" and pretty soon she would
say, "Don't gap and stretch like that, Huckleberry--why don't you try to
and crossed me off. She says, "Take your hands away, Huckleberry; what
Huckleberry; we'll come down to the village on her."
End of the Project Gutenberg EBook of Adventures of Huckleberry Finn,


In [75]:
f = open('huck-finn.txt')
for i, line in enumerate(f):
    if 'Huck ' in line:
        print(line.strip())

CHAPTER IV. Huck and the Judge.--Superstition.
CHAPTER VI. He Went for Judge Thatcher.--Huck Decided to Leave.--Political
CHAPTER XI. Huck and the Woman.--The Search.--Prevarication.--Going to
CHAPTER XV. Huck Loses the Raft.--In the Fog.--Huck Finds the Raft.--Trash.
CHAPTER XX. Huck Explains.--Laying Out a Campaign.--Working the
Pardon.--Hiding in the Room.--Huck Takes the Money.
Leave.--Huck Parting with Mary Jane.--Mumps.--The Opposition Line.
Question of Handwriting.--Digging up the Corpse.--Huck Escapes.
Huck Stealing Away
Huck Creeps into his Window
Huck and his Father
Huck Stealing Away
Huck Creeps into his Window
Huck and his Father
Huck takes the Money
"Here's Huck Finn, he hain't got no family; what you going to do 'bout
"Oh, she'll do.  That's all right.  Huck can come in."
"How you talk, Huck Finn.  Why, you'd _have_ to come when he rubbed it,
"Shucks, it ain't no use to talk to you, Huck Finn.  You don't seem to
Hookerville, but we don't know who 'twas that killed Huck Fi

In [78]:
f = open('huck-finn.txt')
for i, line in enumerate(f):
    if re.search(r'Huck\W', line):
        print(line.strip())

CHAPTER I. Civilizing Huck.--Miss Watson.--Tom Sawyer Waits.
CHAPTER IV. Huck and the Judge.--Superstition.
CHAPTER V. Huck's Father.--The Fond Parent.--Reform.
CHAPTER VI. He Went for Judge Thatcher.--Huck Decided to Leave.--Political
CHAPTER XI. Huck and the Woman.--The Search.--Prevarication.--Going to
CHAPTER XV. Huck Loses the Raft.--In the Fog.--Huck Finds the Raft.--Trash.
CHAPTER XX. Huck Explains.--Laying Out a Campaign.--Working the
Pardon.--Hiding in the Room.--Huck Takes the Money.
Huck,--Quick Sales and Small.
Leave.--Huck Parting with Mary Jane.--Mumps.--The Opposition Line.
Question of Handwriting.--Digging up the Corpse.--Huck Escapes.
CHAPTER THE LAST. Out of Bondage.--Paying the Captive.--Yours Truly, Huck
Huck Stealing Away
Huck Creeps into his Window
Huck and his Father
Huck Stealing Away
Huck Creeps into his Window
Huck and his Father
"Here I is, Huck"
Huck takes the Money
The Doctor leads Huck
The King shakes Huck
Aunt Sally talks to Huck
"Here's Huck Finn, he hai