Python re模块详解
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 | . 匹配除换行符以外的任意字符^ 匹配字符串的开始$ 匹配字符串的结束[] 用来匹配一个指定的字符类别? 对于前一个字符字符重复0次到1次*对于前一个字符重复0次到无穷次{} 对于前一个字符重复m次{m,n} 对前一个字符重复为m到n次\d 匹配数字,相当于[0-9]\D 匹配任何非数字字符,相当于[^0-9]\s 匹配任意的空白符,相当于[ fv]\S 匹配任何非空白字符,相当于[^ fv]\w 匹配任何字母数字字符,相当于[a-zA-Z0-9_]\W 匹配任何非字母数字字符,相当于[^a-zA-Z0-9_]\b 匹配单词的开始或结束 |
1 | >>> importre |
1 2 3 | >>> dir(re)['DEBUG', 'DOTALL', 'I', 'IGNORECASE', 'L', 'LOCALE', 'M', 'MULTILINE', 'S', 'Scanner', 'T','TEMPLATE', 'U', 'UNICODE', 'VERBOSE', 'X', '_MAXCACHE', '__all__', '__builtins__', '__doc__','__file__', '__name__', '__package__', '__version__', '_alphanum', '_cache', '_cache_repl','_compile', '_compile_repl', '_expand', '_pattern_type', '_pickle', '_subx', 'compile','copy_reg', 'error', 'escape', 'findall', 'finditer', 'match', 'purge', 'search', 'split','sre_compile', 'sre_parse', 'sub', 'subn', 'sys', 'template']>>> |
1 2 | >>> help(re.search)search(pattern, string, flags=0) |
1 2 3 4 5 6 7 8 | >>> name="Hello,My name is kuangl,nice to meet you...">>> k=re.search(r'k(uan)gl',name)>>> ifk:... printk.group(0),k.group(1)... else:... print"Sorry,not search!"...kuangl uan |
1 2 | >>> help(re.match)match(pattern, string, flags=0) |
1 2 3 4 5 6 7 8 9 10 | >>> name="Hello,My name is kuangl,nice to meet you...">>> k=re.match(r"(\H....)",name)>>> ifk:... printk.group(0),'\n',k.group(1)... else:... print"Sorry,not match!"...HelloHello>>> |
1 2 | >>> help(re.findall)findall(pattern, string, flags=0) |
1 2 3 | >>> mail='<user01@mail.com> <user02@mail.com> user04@mail.com'#第3个故意没有尖括号>>> re.findall(r'(\w+@m....[a-z]{3})',mail)['user01@mail.com', 'user02@mail.com', 'user04@mail.com'] |
1 2 | >>> help(re.sub)sub(pattern, repl, string, count=0) |
1 2 3 4 5 6 | >>> test="Hi, nice to meet you where are you from?">>> re.sub(r'\s','-',test)'Hi,-nice-to-meet-you-where-are-you-from?'>>> re.sub(r'\s','-',test,5) #替换至第5个'Hi,-nice-to-meet-you-where are you from?'>>> |
1 2 | >>> help(re.split)split(pattern, string, maxsplit=0) |
1 2 3 4 5 6 | >>> test="Hi, nice to meet you where are you from?">>> re.split(r"\s+",test)['Hi,', 'nice', 'to', 'meet', 'you', 'where', 'are', 'you', 'from?']>>> re.split(r"\s+",test,3) #分割前三个['Hi,', 'nice', 'to', 'meet you where are you from?']>>> |
1 2 | >>> help(re.compile)compile(pattern, flags=0) |
1 2 3 4 5 6 7 8 9 | >>> test="Hi, nice to meet you where are you from?">>> k=re.compile(r'\w*o\w*') #匹配带o的字符串>>> dir(k)['__copy__', '__deepcopy__', 'findall', 'finditer', 'match', 'scanner', 'search', 'split','sub', 'subn']>>> printk.findall(test) #显示所有包涵o的字符串['to', 'you', 'you', 'from']>>> printk.sub(lambdam: '['+m.group(0) +']',test) # 将字符串中含有o的单词用[]括起来Hi, nice [to] meet [you] where are [you] [from]?>>> |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | #!/usr/bin/env pythonimporturllib2importreimportosURL='Http://p_w_picpath.baidu.com/channel/wallpaper'read=urllib2.urlopen(URL).read()pat =re.compile(r'src="http://.+?.js">')urls=re.findall(pat,read)fori inurls:url=i.replace('src="','').replace('">','')try:iread=urllib2.urlopen(url).read()name=os.path.basename(url)with open(name,'wb') as jsname:jsname.write(iread)except:printurl,"url error" |
相关文章