xxxxxxxxxx
import re
# The string you want to find a pattern within
test_string = 'Hello greppers!'
# Creating a regular expression pattern
# This is a simple one which finds "Hello"
pattern = re.compile(r'Hello')
# This locates and returns all the occurences of the pattern
# within the test_string
matches = pattern.finditer(test_string)
# Outputs all the ocurrences which were returned as
# as match objects
for match in matches:
print(match)
xxxxxxxxxx
import re
# Regex Cheat sheet : https://www.dataquest.io/blog/regex-cheatsheet/
# Regex python tester : https://pythex.org/
# re doc : https://docs.python.org/3/library/re.html
text = "i like train"
reg = r"[a-c]" #the group of char a to c
if re.match(reg, text): #Check if regex is correct
print(text)
else:
print("Not any match")
xxxxxxxxxx
# You need to - (import re)
# ^ - Matches the beginning of the line
# $ - Matches the end of the line
# . - Matches any character
# \s - Matches whitespace
# \S - Matches any non-whitespace character
# * - Repeat a character zero or more times
# *? - Repeat a character zero or more times (non-greedy)
# + - Repeat a character one or more times
# +? - Repeat a character one or more times (non-greedy)
# [aeiou] - Matches a single character in the listed set
# [^XYZ] - Matches a single character not in the listed set
# [a-z0-9] - The set of characters can include a range
# ( - Indicates where string extraction is to start
# ) - Indicates where string extraction is to end
1. A fixed string -> abc123
2. Arbitrary repetition -> a*b ( "*" means that you can have an arbitrary
number (possibly 0) of the previous char
3. Repeat character at least once -> a+b # ab, aaaab
4. Repeat character at most once -> a?b # b, ab
5. Repeat a character a fixed number of timers -> a{5} # aaaaa
6. Repeat a pattern a fixed number of times -> (a*b){3} # baabab, ababaaaab
7. Repeat a character or pattern a variable number of times -> a{2,4} # aa, aaa, aaaa
8. Choice of several characters -> [ab]c # ac, bc
9. Arbitrary mixture of several characters -> [ab]*c # c, aac, abbac
10. Ranges of characters -> [A-H][a-z]* # Aasdfalsd, Hb, G
11. Characters OTHER than particular one -> [^AB] # C, D
12. Choice of several expressions -> Dr|Mr|Ms|Mrs # Dr, Mr, Mrs, Ms
13. Nesting expressions -> ([A-Z][a-z][0-9])* # A, AzSDFcvfg
14. Start of a line -> ^ab
15. End of a line -> ab$
#Type of pattern
1. Special characters -> \[ # [
2. Any charactter 'except' newline -> . # a, *, -
3. Nongreedy evaluation -> <.*>? # <h1></h2 name = "foo">
4. Whitespace -> \s
xxxxxxxxxx
import re
# returns a match object if found else None
txt = "Hello world"
x = re.search(r"[a-zA-z]+", txt)
if x:
print("YES! We have a match!", x)
else:
print("No match")
# output YES! We have a match! <re.Match object; span=(0, 5), match='Hello'>
# returns a list of all matches found - regular express finds all vowels in this example
txt = "This is a test"
x = re.findall(r"[aeiou]", txt)
print(x)
# output ['i', 'i', 'a', 'e']
# returns a list of all matches found - regular expression find is or test in string case-insensitive
txt = "This iS a Test"
x = re.findall("(is|test)", txt, flags=re.IGNORECASE)
print(x)
# output ['is', 'iS', 'Test']
txt = "This is a silly string"
# splits a string into a list using regular expression
x = re.split(r"silly", txt)
print(x)
# output ['This is a ', ' string']
# replace concatenated tototo with to
txt = "We need tototo run "
x = re.sub(r"(to)+", "to", txt)
print(x)
# output We need to run
xxxxxxxxxx
# A Python program to demonstrate working of re.match().
import re
# Lets use a regular expression to match a date string
# in the form of Month name followed by day number
regex = r"([a-zA-Z]+) (\d+)"
match = re.search(regex, "I was born on June 24")
if match != None:
# We reach here when the expression "([a-zA-Z]+) (\d+)"
# matches the date string.
# This will print [14, 21), since it matches at index 14
# and ends at 21.
print ("Match at index %s, %s" % (match.start(), match.end()))
# We us group() method to get all the matches and
# captured groups. The groups contain the matched values.
# In particular:
# match.group(0) always returns the fully matched string
# match.group(1) match.group(2), ... return the capture
# groups in order from left to right in the input string
# match.group() is equivalent to match.group(0)
# So this will print "June 24"
print ("Full match: %s" % (match.group(0)))
# So this will print "June"
print ("Month: %s" % (match.group(1)))
# So this will print "24"
print ("Day: %s" % (match.group(2)))
else:
print ("The regex pattern does not match.")
xxxxxxxxxx
# Step-By-Step breakdown:
import re # We need this module
# First make a regex object containing your regex search pattern. Replace REGEX_GOES_HERE with your regex search. Use either of these:
regex_obj = re.compile(r'REGEX_GOES_HERE', flags=re.IGNORECASE) # Case-insensitive search:
regex_obj = re.compile(r'REGEX_GOES_HERE') # Case-sensitive search
# Define the string you want to search inside:
search_txt = "These are oranges and apples and pears"
# Combine the two to find your result/s:
regex_obj.findall(search_txt)
#And it wrapped in print:
print(regex_obj.findall(search_txt)) # Will return a LIST of all matches. Will return empty list on no matches.
xxxxxxxxxx
# You need to - (import re)
# ^ - Matches the beginning of the line
# $ - Matches the end of the line
# . - Matches any character
# \s - Matches whitespace
# \S - Matches any non-whitespace character
# * - Repeat a character zero or more times
# *? - Repeat a character zero or more times (non-greedy)
# + - Repeat a character one or more times
# +? - Repeat a character one or more times (non-greedy)
# [aeiou] - Matches a single character in the listed set
# [^XYZ] - Matches a single character not in the listed set
# [a-z0-9] - The set of characters can include a range
# ( - Indicates where string extraction is to start
# ) - Indicates where string extraction is to end
xxxxxxxxxx
import re
s = 'GeeksforGeeks: A computer science portal for geeks'
match = re.search(r'portal', s)
print('Start Index:', match.start())
print('End Index:', match.end())
xxxxxxxxxx
1. A fixed string -> abc123
2. Arbitrary repetition -> a*b ( "*" means that you can have an arbitrary
number (possibly 0) of the previous char
3. Repeat character at least once -> a+b # ab, aaaab
4. Repeat character at most once -> a?b # b, ab
5. Repeat a character a fixed number of timers -> a{5} # aaaaa
6. Repeat a pattern a fixed number of times -> (a*b){3} # baabab, ababaaaab
7. Repeat a character or pattern a variable number of times -> a{2,4} # aa, aaa, aaaa
8. Choice of several characters -> [ab]c # ac, bc
9. Arbitrary mixture of several characters -> [ab]*c # c, aac, abbac
10. Ranges of characters -> [A-H][a-z]* # Aasdfalsd, Hb, G
11. Characters OTHER than particular one -> [^AB] # C, D
12. Choice of several expressions -> Dr|Mr|Ms|Mrs # Dr, Mr, Mrs, Ms
13. Nesting expressions -> ([A-Z][a-z][0-9])* # A, AzSDFcvfg
14. Start of a line -> ^ab
15. End of a line -> ab$
#Type of pattern
1. Special characters -> \[ # [
2. Any charactter 'except' newline -> . # a, *, -
3. Nongreedy evaluation -> <.*>? # <h1></h2 name = "foo">
4. Whitespace -> \s
xxxxxxxxxx
import re
text = "test1, test2, test3"
regex = re.compile(r"test1")
# Returns range of first match
print(regex.match(text).span())
# Returns text with all matches replaces with other text
print(regex.sub("replace", text))
# Returns every match
print(regex.findall(text))
# OUT:
#
# (0, 5)
# replace, replace, replace
# ['test1', 'test2', 'test3']