## Splitting and Joining Strings

In [17]:
s = "Venkata, Ranjit, Pankaj, Ali, Karthika"

'Venkata, Ranjit, Pankaj, Ali, Karthika'

In [18]:
names = s.split(',')

['Venkata', ' Ranjit', ' Pankaj', ' Ali', ' Karthika']

In [19]:
names = [n.strip() for n in s.split(',')]

['Venkata', 'Ranjit', 'Pankaj', 'Ali', 'Karthika']

In [20]:
names = s.split(', ')

['Venkata', 'Ranjit', 'Pankaj', 'Ali', 'Karthika']

In [21]:
s = "Venkata, Ranjit, Pankaj,Ali,Karthika"
names = s.split(', ')

['Venkata', 'Ranjit', 'Pankaj,Ali,Karthika']

In [22]:
names = [name.strip() for name in s.split(',')]

['Venkata', 'Ranjit', 'Pankaj', 'Ali', 'Karthika']

In [24]:
s = """Venkata Ranjit    Pankaj
Ali    
 Karthika"""
s.split()

['Venkata', 'Ranjit', 'Pankaj', 'Ali', 'Karthika']

In [25]:
s = """Venkata Ranjit    Pankaj
Ali    
 Karthika"""
s.split(" ")

['Venkata', 'Ranjit', '', '', '', 'Pankaj\nAli', '', '', '', '\n', 'Karthika']

In [26]:
s = "Venkata, Ranjit,, Pankaj,Ali,Karthika"
names = s.split(',')

['Venkata', ' Ranjit', '', ' Pankaj', 'Ali', 'Karthika']

In [27]:
s = "Venkata, Ranjit, Pankaj, Ali, Karthika"
s.split(', ', maxsplit=3)

['Venkata', 'Ranjit', 'Pankaj', 'Ali, Karthika']

In [28]:
s.rsplit(', ', maxsplit=3)

['Venkata, Ranjit', 'Pankaj', 'Ali', 'Karthika']

In [29]:
s = "Venkata, Ranjit, Pankaj, Ali, Karthika"
names = s.split(', ')

['Venkata', 'Ranjit', 'Pankaj', 'Ali', 'Karthika']

In [30]:
', '.join(names)

'Venkata, Ranjit, Pankaj, Ali, Karthika'

## String Comparisons

In [31]:
'abi' < 'aef'

True

In [32]:
'a' < 'a'

False

In [33]:
'Z' < 'a'

True

In [34]:
ord('Z'), ord('a')

(90, 97)

## Formatted Strings

In [35]:
# formatted string literal (f-string)
first_name = "Bob"
last_name = "Smith"
f"My name is {last_name}, {first_name}"

'My name is Smith, Bob'

In [36]:
# .format (empty)
"My name is {} {}".format(first_name, last_name)

'My name is Bob Smith'

In [37]:
# .format (position)
"My name is {1}, {1} {0}".format(last_name, first_name)

'My name is Bob, Bob Smith'

In [38]:
# .format (kwarg)
"My name is {first_name} {last_name}".format(first_name=first_name, last_name=last_name)

'My name is Bob Smith'

In [39]:
# no space before positive number
print(f'{27}\n{-27}')

27
-27


In [42]:
# space before positive number
print(f'{27: d}\n{-27: d}')

 27
-27


In [44]:
# plus sign before positive number
print(f'{27:+d}\n{-27:+d}')

+27
-27


In [47]:
f'{"a string":<12s}'

'a string    '

In [48]:
f'{"a string":>12s}'

'    a string'

In [49]:
f'{"a string":^12s}'

'  a string  '

In [51]:
f'{"a really long string":^12s}'

'a really long string'

In [52]:
f'{"a string"}'

'a string'

In [53]:
val = 67.2318809238409234
f"{val:.2f}"

'67.23'

In [54]:
val = 67.2318809238409234
f"{val:.2e}"

'6.72e+01'

In [55]:
val

67.23188092384092

In [65]:
f'[{27:10}]'

'[        27]'

In [62]:
f'[{"27":10}]'

'[27        ]'

## Raw Strings

In [66]:
# normal
s = '\\n is the way you write a newline, \\\\ for \\.'

'\\n is the way you write a newline, \\\\ for \\.'

In [67]:
print(s)

\n is the way you write a newline, \\ for \.


In [68]:
# with raw strings
r"\n is the way you write a newline, \\ for \."

'\\n is the way you write a newline, \\\\ for \\.'

In [73]:
import re
re.match(r'\d+/\d+/\d+','12/31/2012 is a date.')

<re.Match object; span=(0, 10), match='12/31/2012'>

## Regular Expressions

In [74]:
import re

In [75]:
s0 = "No full dates here, just 02/15"
s1 = "02/14/2024 is a date"
s2 = "Another date is 12/25/2024"
s3 = "Halloween is 10/31/2024 and Thanksgiving is 11/24/2024"

'Halloween is 10/31/2024 and Thanksgiving is 11/24/2024'

In [76]:
re.match(r'\d+/\d+/\d+',s0)

In [77]:
re.match(r'\d+/\d+/\d+',s1)

<re.Match object; span=(0, 10), match='02/14/2024'>

In [79]:
if re.match(r'\d+/\d+/\d+',s0) is not None:
    print("GOT A DATE")
else:
    print("NO DATE")

NO DATE


In [80]:
re.match(r'\d+/\d+/\d+',s2)

In [81]:
re.search(r'\d+/\d+/\d+',s2)

<re.Match object; span=(16, 26), match='12/25/2024'>

In [82]:
re.search(r'\d+/\d+/\d+',s3)

<re.Match object; span=(13, 23), match='10/31/2024'>

In [83]:
re.findall(r'\d+/\d+/\d+',s3)

['10/31/2024', '11/24/2024']

In [84]:
for m in re.finditer(r'\d+/\d+/\d+',s3):
    print(m)

<re.Match object; span=(13, 23), match='10/31/2024'>
<re.Match object; span=(44, 54), match='11/24/2024'>


### Groups in Regex

In [85]:
match = re.search(r'\d+/\d+/\d+',s3)

<re.Match object; span=(13, 23), match='10/31/2024'>

In [86]:
match.group(0)

'10/31/2024'

In [87]:
s5 = "Halloween is 10/31/2024 and Thanksgiving is 11/28/2024"
re.findall(r'\d+/\d+/\d+',s5)

['10/31/2024', '11/28/2024']

In [88]:
for match in re.finditer(r'\d+/\d+/\d+',s3):
    print(match.group(0))

10/31/2024
11/24/2024


In [89]:
s5 = "Halloween is 10/31/2024/11/28 and Thanksgiving is 11/28/2024"
re.findall(r'\d+/\d+/\d+',s5)

['10/31/2024', '11/28/2024']

In [90]:
for match in re.finditer(r'(\d+)/(\d+)/(\d+)',s3):
    print(match.groups())

('10', '31', '2024')
('11', '24', '2024')


In [96]:
for match in re.finditer(r'(\d+)/(\d+)/(\d+)',s3):
    print(match.group(1))

10
11


In [97]:
for match in re.finditer(r'(\d+)/(\d+)/(\d+)',s3):
    print(match.group(1), match.group(3))

10 2024
11 2024


In [98]:
s21 = "Date 3/1/2024 and 10/31/2024"
for match in re.finditer(r'(\d+)/(\d+)/(\d+)',s21):
    print('{2}-{0:02d}-{1:02d}'.format(*[int(x) for x in match.groups()]))

2024-03-01
2024-10-31


In [99]:
s3

'Halloween is 10/31/2024 and Thanksgiving is 11/24/2024'

In [100]:
re.sub(r'(\d+)/(\d+)/(\d+)',r'a holiday',s3)

'Halloween is a holiday and Thanksgiving is a holiday'

In [101]:
re.sub(r'(\d+)/(\d+)/(\d+)',r'\3-\1-\2',s3)

'Halloween is 2024-10-31 and Thanksgiving is 2024-11-24'

In [102]:
s6 = "New Years Day was 1/1/2024"
re.sub(r'(\d+)/(\d+)/(\d+)',r'\3-\1-\2',s6)

'New Years Day was 2024-1-1'

In [103]:
re.sub(r'(\d+)/(\d+)/(\d+)',lambda m: 
       f'{m.group(3)}-{m.group(1):>02}-{int(m.group(2)):02d}',s6)

'New Years Day was 2024-01-01'

In [None]:
re.sub(r'(\d+)/(\d+)/(\d+)',lambda m: 
       f'{m.group(3)}-{int(m.group(1)):02d}-{int(m.group(2)):02d}',s3)

## Files

In [None]:
# output
"hi" + " there"

In [None]:
# stderr
print "hi"

In [None]:
print("hi")

In [None]:
# print the whole file
# !cat huck-finn.txt

In [104]:
f = open('huck-finn.txt', 'r')
for line in f:
    if 'Huckleberry' in line:
        print(line)

The Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete

Title: Adventures of Huckleberry Finn, Complete

"Don't put your feet up there, Huckleberry;" and "Don't scrunch up

like that, Huckleberry--set up straight;" and pretty soon she would

say, "Don't gap and stretch like that, Huckleberry--why don't you try to

and crossed me off. She says, "Take your hands away, Huckleberry; what

Huckleberry; we'll come down to the village on her."

End of the Project Gutenberg EBook of Adventures of Huckleberry Finn,



In [105]:
f = open('huck-finn.txt')
for line in f:
    if 'Huckleberry' in line:
        print(line.strip())

The Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete
Title: Adventures of Huckleberry Finn, Complete
"Don't put your feet up there, Huckleberry;" and "Don't scrunch up
like that, Huckleberry--set up straight;" and pretty soon she would
say, "Don't gap and stretch like that, Huckleberry--why don't you try to
and crossed me off. She says, "Take your hands away, Huckleberry; what
Huckleberry; we'll come down to the village on her."
End of the Project Gutenberg EBook of Adventures of Huckleberry Finn,
