## Formatted Strings

In [1]:
# no space before positive number
print(f'{27}\n{-27}')

27
-27


In [2]:
# space before positive number
print(f'{27: d}\n{-27: d}')

 27
-27


In [3]:
# plus sign before positive number
print(f'{27:+d}\n{-27:+d}')

+27
-27


In [4]:
f'{"a string":<12s}'

'a string    '

In [5]:
f'{"a string":>12s}'

'    a string'

In [6]:
f'{"a string":^12s}'

'  a string  '

In [7]:
f'{"a really long string":^12s}'

'a really long string'

In [8]:
f'{"a really long string"[:12]:^12s}'

'a really lon'

In [9]:
f'{"a string"}'

'a string'

In [10]:
num = 34
f'{{bracket}} {num}'

'{bracket} 34'

In [11]:
val = 67.2318809238409234
f"{val:.2f}"

'67.23'

In [12]:
val = 67.2318809238409234
f"{val:.2e}"

'6.72e+01'

In [13]:
val

67.23188092384092

In [14]:
f'[{27:10}]'

'[        27]'

In [15]:
f'[{"27":10}]'

'[27        ]'

In [16]:
f"{12.34}"

'12.34'

In [17]:
f"{1.34e123}"

'1.34e+123'

In [18]:
f"{12.34:e}"

'1.234000e+01'

In [19]:
f"{1.34e123:f}"

'1340000000000000125722305548847985105555788496070772977344360517543929764150412311353189102690429605927633722566001385537536.000000'

## Raw Strings

In [20]:
# normal
s = '\\n is the way you write a newline, \\\\ for \\.'

'\\n is the way you write a newline, \\\\ for \\.'

In [21]:
print(s)

\n is the way you write a newline, \\ for \.


In [22]:
# with raw strings
r"\n is the way you write a newline, \\ for \."

'\\n is the way you write a newline, \\\\ for \\.'

In [23]:
import re
re.match(r'\d+/\d+/\d+','12/31/2012 is a date.')

<re.Match object; span=(0, 10), match='12/31/2012'>

## Regular Expressions

In [24]:
import re

In [25]:
s0 = "No full dates here, just 02/15"
s1 = "02/14/2024 is a date"
s2 = "Another date is 12/25/2024"
s3 = "Halloween is 10/31/2024 and Thanksgiving is 11/24/2024"

'Halloween is 10/31/2024 and Thanksgiving is 11/24/2024'

In [26]:
re.match(r'\d+/\d+/\d+',s0)

In [27]:
re.match(r'\d+/\d+/\d+',s1)

<re.Match object; span=(0, 10), match='02/14/2024'>

In [30]:
if re.match(r'\d+/\d+/\d+',s0) is not None:
    print("GOT A DATE")
else:
    print("NO DATE")

NO DATE


In [31]:
re.match(r'\d+/\d+/\d+',s2)

In [32]:
re.search(r'\d+/\d+/\d+',s2)

<re.Match object; span=(16, 26), match='12/25/2024'>

In [34]:
re.search(r'\d+/\d+/\d+',s0)

In [35]:
re.search(r'\d+/\d+/\d+',s3)

<re.Match object; span=(13, 23), match='10/31/2024'>

In [36]:
re.findall(r'\d+/\d+/\d+',s3)

['10/31/2024', '11/24/2024']

In [37]:
for m in re.finditer(r'\d+/\d+/\d+',s3):
    print(m)

<re.Match object; span=(13, 23), match='10/31/2024'>
<re.Match object; span=(44, 54), match='11/24/2024'>


### Groups in Regex

In [38]:
match = re.search(r'\d+/\d+/\d+',s3)

<re.Match object; span=(13, 23), match='10/31/2024'>

In [39]:
match.group(0)

'10/31/2024'

In [40]:
s5 = "Halloween is 10/31/2024 and Thanksgiving is 11/28/2024"
re.findall(r'\d+/\d+/\d+',s5)

['10/31/2024', '11/28/2024']

In [41]:
for match in re.finditer(r'\d+/\d+/\d+',s3):
    print(match.group(0))

10/31/2024
11/24/2024


In [44]:
s5 = "Halloween is 10/31/2024/11/28 and Thanksgiving is 11/28/2024"
re.findall(r'\d+/\d+/\d+',s5)

['10/31/2024', '11/28/2024']

In [45]:
for match in re.finditer(r'(\d+)/(\d+)/(\d+)',s3):
    print(match.groups())

('10', '31', '2024')
('11', '24', '2024')


In [47]:
match.group(1)

'11'

In [48]:
for match in re.finditer(r'(\d+)/(\d+)/(\d+)',s3):
    print(match.group(1))

10
11


In [49]:
for match in re.finditer(r'(\d+)/(\d+)/(\d+)',s3):
    print(match.group(1), match.group(3))

10 2024
11 2024


In [50]:
s21 = "Date 3/1/2024 and 10/31/2024"
for match in re.finditer(r'(\d+)/(\d+)/(\d+)',s21):
    print('{2}-{0:02d}-{1:02d}'.format(*[int(x) for x in match.groups()]))

2024-03-01
2024-10-31


### Regex Splitting

In [64]:
s = "Venkata, Ranjit, Pankaj,Ali,Karthika"
names = s.split(', ')

['Venkata', 'Ranjit', 'Pankaj,Ali,Karthika']

In [65]:
re.split(r',\s*', s)

['Venkata', 'Ranjit', 'Pankaj', 'Ali', 'Karthika']

### Regex Substitution

In [51]:
s3

'Halloween is 10/31/2024 and Thanksgiving is 11/24/2024'

In [52]:
re.sub(r'(\d+)/(\d+)/(\d+)',r'a holiday',s3)

'Halloween is a holiday and Thanksgiving is a holiday'

In [53]:
re.sub(r'(\d+)/(\d+)/(\d+)',r'\3-\1-\2',s3)

'Halloween is 2024-10-31 and Thanksgiving is 2024-11-24'

In [54]:
s6 = "New Years Day was 1/1/2024"
re.sub(r'(\d+)/(\d+)/(\d+)',r'\3-\1-\2',s6)

'New Years Day was 2024-1-1'

In [57]:
re.sub(r'(\d+)/(\d+)/(\d+)',lambda m: 
       f'{m.group(3)}-{m.group(1):02}-{m.group(2):02}',s6)

'New Years Day was 2024-10-10'

In [61]:
re.sub(r'(\d+)/(\d+)/(\d+)',lambda m: 
       f'{m.group(3)}-{m.group(1):>02}-{m.group(2):>02}',s6)

'New Years Day was 2024-01-01'

In [60]:
re.sub(r'(\d+)/(\d+)/(\d+)',lambda m: 
       f'{m.group(3)}-{int(m.group(1)):02d}-{int(m.group(2)):02d}',s6)

'New Years Day was 2024-01-01'

## Files

In [None]:
# print the whole file
# !cat huck-finn.txt

In [68]:
!head huck-finn.txt

﻿

The Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete
by Mark Twain (Samuel Clemens)

This eBook is for the use of anyone anywhere at no cost and with almost
no restrictions whatsoever. You may copy it, give it away or re-use
it under the terms of the Project Gutenberg License included with this
eBook or online at www.gutenberg.net



In [69]:
f = open('huck-finn.txt', 'r')
for line in f:
    if 'Huckleberry' in line:
        print(line)

The Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete

Title: Adventures of Huckleberry Finn, Complete

"Don't put your feet up there, Huckleberry;" and "Don't scrunch up

like that, Huckleberry--set up straight;" and pretty soon she would

say, "Don't gap and stretch like that, Huckleberry--why don't you try to

and crossed me off. She says, "Take your hands away, Huckleberry; what

Huckleberry; we'll come down to the village on her."

End of the Project Gutenberg EBook of Adventures of Huckleberry Finn,



In [71]:
f = open('huck-finn.txt')
for i, line in enumerate(f):
    print(line.rstrip())
    if i > 20:
        break

﻿

The Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete
by Mark Twain (Samuel Clemens)

This eBook is for the use of anyone anywhere at no cost and with almost
no restrictions whatsoever. You may copy it, give it away or re-use
it under the terms of the Project Gutenberg License included with this
eBook or online at www.gutenberg.net

Title: Adventures of Huckleberry Finn, Complete

Author: Mark Twain (Samuel Clemens)

Release Date: August 20, 2006 [EBook #76]

Last Updated: April 18, 2015]

Language: English


*** START OF THIS PROJECT GUTENBERG EBOOK HUCKLEBERRY FINN ***


In [73]:
f = open('huck-finn.txt')
for i, line in enumerate(f):
    print(i, line, end="")
    if i > 20:
        break

0 ﻿
1 
2 The Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete
3 by Mark Twain (Samuel Clemens)
4 
5 This eBook is for the use of anyone anywhere at no cost and with almost
6 no restrictions whatsoever. You may copy it, give it away or re-use
7 it under the terms of the Project Gutenberg License included with this
8 eBook or online at www.gutenberg.net
9 
10 Title: Adventures of Huckleberry Finn, Complete
11 
12 Author: Mark Twain (Samuel Clemens)
13 
14 Release Date: August 20, 2006 [EBook #76]
15 
16 Last Updated: April 18, 2015]
17 
18 Language: English
19 
20 
21 *** START OF THIS PROJECT GUTENBERG EBOOK HUCKLEBERRY FINN ***


In [82]:
f = open('huck-finn.txt')
for i, line in enumerate(f):
    if 'witch' in line:
        print(line.strip())

up a little lock of my hair with a thread to keep witches away.  But
Afterwards Jim said the witches be witched him and put him in a trance,
always talking about witches in the dark by the kitchen fire; but
Jim would happen in and say, "Hm!  What you know 'bout witches?" and
cure anybody with it and fetch witches whenever he wanted to just by
and been rode by witches.
with witchcraft. I catched a good big catfish, too, and Jim cleaned him
a "divining-rod," "dissipating witch spells," and so on.  By and by he
all tied up in little bunches with thread.  That was to keep witches
off.  He said the witches was pestering him awful these nights, and
strange words and noises, and he didn't believe he was ever witched so
"Oh, it's de dad-blame' witches, sah, en I wisht I was dead, I do.
me; 'kase he say dey _ain't_ no witches.  I jis' wish to goodness he was
witches went for him mostly in the dark, and it was good to have folks
er dem witches jis' wunst--on'y jis' wunst--it's all I'd ast.  But 

In [83]:
f = open('huck-finn.txt')
for i, line in enumerate(f):
    if 'witch ' in line:
        print(line.strip())

a "divining-rod," "dissipating witch spells," and so on.  By and by he
the reason.  You make them a witch pie; that's the thing for _you_ to
"But my lan', Mars Sid, how's I gwyne to make 'm a witch pie?  I doan'
Nat didn't look when we put the witch pie in Jim's pan; and we put the


In [84]:
f = open('huck-finn.txt')
for i, line in enumerate(f):
    if re.search(r'witch\W', line):
        print(line.strip())

a "divining-rod," "dissipating witch spells," and so on.  By and by he
the reason.  You make them a witch pie; that's the thing for _you_ to
"But my lan', Mars Sid, how's I gwyne to make 'm a witch pie?  I doan'
all, don't you _handle_ the witch-things."
Nat didn't look when we put the witch pie in Jim's pan; and we put the


In [77]:
all_lines = open('huck-finn.txt').readlines()
all_lines[0:10]

['\ufeff\n',
 '\n',
 'The Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete\n',
 'by Mark Twain (Samuel Clemens)\n',
 '\n',
 'This eBook is for the use of anyone anywhere at no cost and with almost\n',
 'no restrictions whatsoever. You may copy it, give it away or re-use\n',
 'it under the terms of the Project Gutenberg License included with this\n',
 'eBook or online at www.gutenberg.net\n',
 '\n']

In [78]:
all_lines = open('huck-finn.txt', encoding='utf-8-sig').readlines()
all_lines[0:10]

['\n',
 '\n',
 'The Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete\n',
 'by Mark Twain (Samuel Clemens)\n',
 '\n',
 'This eBook is for the use of anyone anywhere at no cost and with almost\n',
 'no restrictions whatsoever. You may copy it, give it away or re-use\n',
 'it under the terms of the Project Gutenberg License included with this\n',
 'eBook or online at www.gutenberg.net\n',
 '\n']

In [85]:
initial_str = open('huck-finn.txt', encoding='utf-8-sig').read(100)

'\n\nThe Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete\nby Mark Twain (Samuel Clem'

In [86]:
initial_str = open('huck-finn.txt', encoding='utf-8').read(100)

'\ufeff\n\nThe Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete\nby Mark Twain (Samuel Cle'

In [87]:
print(initial_str)

﻿

The Project Gutenberg EBook of Adventures of Huckleberry Finn, Complete
by Mark Twain (Samuel Cle
