# coding=utf-8 import os, re, sys def use_reg(input_file, output_file): print '\n正则表达式示例:匹配 IP 地址:192.168.[0-9]{1,3}.[0-9]{1,3},匹配域名:[^\s]*\.test\.com' reg = None while reg is None or reg == '': reg = str(raw_input('→ 请输入正则表达式:')) print '\n→ 把正则表达式【匹配到的字符串】输出到文件,请输入 1' print '→ 把正则表达式【匹配到的行】输出到文件,请输入 2' print '→ 把正则表达式【匹配不到的行】输出到文件,请输入 3' is_next_input = True is_fisrt_line = True after_line = None operation_code = '-1' while is_next_input: is_next_input = False operation_code = raw_input('→ 请输入数字:') if not (operation_code == '1' or operation_code == '2' or operation_code == '3'): is_next_input = True print '无效字符,请重新输入!\n' for line in input_file.readlines(): line = re.sub(r'[\r\n]', '', line) match_obj = re.search(reg, line) if operation_code == '1': after_line = match_obj.group() if match_obj else None elif operation_code == '2': after_line = line if match_obj else None elif operation_code == '3': after_line = None if match_obj else line if after_line: if is_fisrt_line: is_fisrt_line = False output_file.write(after_line) else: output_file.write('\n' + after_line) def delete_duplicate_lines(input_file, output_file): lines_set = set() is_fisrt_line = True for line in input_file.readlines(): line = re.sub(r'[\r\n]', '', line) if line not in lines_set: lines_set.add(line) if is_fisrt_line: is_fisrt_line = False output_file.write(line) else: output_file.write('\n' + line) def main(): print '当前 Python 版本号为:' + str(sys.version) + '\n' # print '当前 Python 路径为:' + str(sys.path) + '\n' input_txt = "input.txt" output_txt = "output.txt" # 如果 output.txt 文件存在,则删除 if os.path.exists(output_txt): os.remove(output_txt) print('output.txt 文件已自动删除!\n') output_file = open(output_txt, 'a') input_file = open(input_txt, 'r') print '提示:输入文件为 input.txt ,输出文件为 output.txt\n' print '→ 使用正则表达式请输入 1' print '→ 删除重复的行请输入 2' print '→ 直接退出请按 0' is_next_input = True operation_code = '-1' while is_next_input: is_next_input = False operation_code = raw_input('→ 请输入数字:') if operation_code == '1': use_reg(input_file, output_file) elif operation_code == '2': delete_duplicate_lines(input_file, output_file) elif operation_code != '0': is_next_input = True print '无效字符,请重新输入!\n' input_file.close() output_file.flush() output_file.close() print '\n已退出' if operation_code == '0' else '\n处理完成,请查看 output.txt 文件!(下次执行该脚本时 output.txt 文件会自动删除)' if __name__ == "__main__": main()