import pandas as pd
dataframe = pd.DataFrame ({
'Col1': ['1a', '2a', '3a' ],
'Col2': ['1a.1b', '2a.2b', '3a.3b' ],
'Col3': ['1a.1b.1c', '2a.2b.2c', '3a.3b.3c' ]
})
# using .str.replace() to remove non-digits from Col2
# and .astype() to change to int
dataframe['Col2'] = dataframe['Col2'].str.replace(r'\D+', '', regex=True).astype(int)
dataframe
# Col1 Col2 Col3
# 0 1a 11 1a.1b.1c
# 1 2a 22 2a.2b.2c
# 2 3a 33 3a.3b.3c
# Explaining the str.replace() parameters:
# '/D+' is a regular expression, thats wwhy need to pass 'regex=True'
# the '/D' matches non-digits(non 0-9 characters)
# the '+' will find 1 or more of the preceding match
# the 'r' tells python that '/D+' should be interpreted as raw string
# to know more about regular expressions visit:
# https://www.rexegg.com/regex-quickstart.html