In [1]:
import re
In [2]:
#help(re)

Extract numbers from a string

In [3]:
s1 = "Maison 3 pièce(s) - 68.05 m² - 860 € par mois charges comprises"
In [4]:
s2 = "Maison 3 pièce(s) - 68.05 m² - 1 860 € par mois charges comprises"
In [5]:
re.findall(r'\d+\.?\d*', s1)
Out[5]:
['3', '68.05', '860']
In [6]:
re.findall(r'\d+\.?\d*', s2)
Out[6]:
['3', '68.05', '1', '860']
In [7]:
re.findall(r'\b\d+\.?\d*\b', s1)
Out[7]:
['3', '68.05', '860']
In [8]:
re.findall(r'\b\d+\.?\d*\b', s2)
Out[8]:
['3', '68.05', '1', '860']
In [9]:
re.findall(r'\d+ ?\d*\.?\d*', s1)
Out[9]:
['3 ', '68.05', '860 ']
In [10]:
re.findall(r'\d+ ?\d*\.?\d*', s2)
Out[10]:
['3 ', '68.05', '1 860']
In [11]:
re.findall(r'\b\d+ ?\d*\.?\d*\b', s1)
Out[11]:
['3 ', '68.05', '860']
In [12]:
re.findall(r'\b\d+ ?\d*\.?\d*\b', s2)
Out[12]:
['3 ', '68.05', '1 860']
In [13]:
[float(s.replace(" ", "")) for s in re.findall(r'\b\d+ ?\d*\.?\d*\b', s2)]
Out[13]:
[3.0, 68.05, 1860.0]

Search patterns

In [14]:
s = "Maison 3 pièce(s) - 68.05 m² - 860 € par mois charges comprises"
In [15]:
if re.search(r'Maison', s):
    print("Found!")
else:
    print("Not found!")
Found!
In [16]:
if re.search(r'Appartement', s):
    print("Found!")
else:
    print("Not found!")
Not found!
In [17]:
if re.match(r'Maison', s):
    print("Found!")
else:
    print("Not found!")
Found!

Search and capture patterns

In [18]:
s = "Maison 3 pièce(s) - 68.05 m² - 860 € par mois charges comprises"
In [19]:
m = re.search(r'\b(\d+) pièce', s)
if m:
    print(int(m.group(1)))
else:
    print("Not found!")
3
In [20]:
m = re.search(r'\b(\d+\.?\d*) m²', s)
if m:
    print(float(m.group(1)))
else:
    print("Not found!")
68.05
In [21]:
m = re.search(r'\b(\d+\.?\d*) €', s)
if m:
    print(float(m.group(1)))
else:
    print("Not found!")
860.0
In [22]:
s = "Maison 3 PIÈce(s) - 68.05 m² - 860 € par mois charges comprises"

Without re.compile()

In [23]:
m = re.search(r'\b(\d+) pièce', s, re.IGNORECASE)
if m:
    print(int(m.group(1)))
else:
    print("Not found!")
3

With re.compile()

In [24]:
num_pieces = re.compile(r'\b(\d+) pièce', re.IGNORECASE)

m = num_pieces.search(s)
if m:
    print(int(m.group(1)))
else:
    print("Not found!")
3