import re
content = 'Hello 123 4567 World_This is a Regex Demo'
r = re.match("^Hello\s\d{3}\s\d{4}\s\w{10}",content)
print(r)
print(r.group())
print(r.span())
import re
content = 'Hello 1234567 World_This is a Regex Demo'
result = re.match('^Hello\s(\d+)\sWorld', content)
print(result)
print(result.group())
print(result.group(1))
print(result.span())
import re
content = 'Hello 123 4567 World_This is a Regex Demo'
result = re.match('^Hello.*Demo$', content)
print(result)
print(result.group())
print(result.span())
运行结果:
<_sre.SRE_Match object; span=(0, 41), match='Hello 123 4567 World_This is a Regex Demo'>
Hello 123 4567 World_This is a Regex Demo
(0, 41)
贪婪与非贪婪
贪婪匹配下,.* 会匹配尽可能多的字符
非贪婪匹配的写法是 .*?,尽可能匹配少的字符
import re
# 贪婪
content = 'Hello 123456789 World'
result = re.match('^Hello.*(\d+).*World$', content)
print(result)
print(result.group(1))
# 非贪婪
result = re.match('^Hello.*?(\d+).*World$', content)
print(result)
print(result.group(1))
import re
content = '''Hello 1234567 World_This
is a Regex Demo
'''
result = re.match('^He.*?(\d+).*?Demo$', content,re.S)
print(result.group(1))
运行结果:
1234567
如果没有re.S的话,会报错,因为没有找到数据,不能进行分组
转义匹配
用\将字符进行转义。
import re
content = '(百度)www.baidu.com'
result = re.match('\(百度\)www\.baidu\.com', content)
print(result)
search()
match() 方法是从字符串的开头开始匹配,一旦开头不匹配,那么整个匹配就失败了
import re
content = 'Extra stings Hello 1234567 World_This is a Regex Demo Extra stings'
result = re.match('Hello.*?(\d+).*?Demo', content)
print(result)
pattern = '<a.*?singer="(.*?)">(.*?)</a>'
# re.S:能让.匹配转行
result = re.search(pattern,html,re.S)
if result:
print(result.group(1),result.group(2))
findall()
search() 方法的用法,它可以返回匹配正则表达式的第一个内容
如果想要获取匹配正则表达式的所有内容的就需要借助于 findall() 方法
findall() 方法会搜索整个字符串然后返回匹配正则表达式的所有内容
pattern = '<a.*?singer="(.*?)">(.*?)</a>'
results = re.findall(pattern,html,re.S)
print(results)
print(type(results))
for result in results:
print(result)
print(result[0],result[1],result[2])