前言

最近碰到一个问题，设备出现40W+的告警信息，涉及300多条规则，如何才能给出一个分析报告呢？或者说我如何把规则匹配到的数据包里的东西展示给对方呢？于是，我开发了这样一个仿Snort的工具。

实现逻辑

它可以直接读取snort的规则，然后跟数据包进行匹配。snort规则中最常用到的两个关键字是pcre和content，而content是无需和数据包匹配的，所以只需把提取出的pcre中的内容与数据包中的内容进行比对即可。

脚本会运行两轮，第一轮是直接读取出content，直接写到excel报表中
第二轮是直接读取出pcre中的内容，与经过url解码的数据包进行匹配，并将匹配结果追加到excle报表中
脚本不会对所有数据包进行匹配，它会首先获取到所有数据包。由于一条规则可能对应很多数据包，所以脚本会自动去重，仅对其中的一个数据包进行匹配！！！并且，可能一个规则中存在多个正则，脚本只会获取第一个正则！！！

局限

content是直接拿来用的，没啥好说的，主要是pcre，脚本直接对数据包做url解码。从解码类型说，支持类型单一；从攻击类型上说，也就对web攻击较好。以后如果工作中遇到更复杂的需求的话，再改吧，现在这个脚本已经能帮我顺利完成工作了。

脚本

你需要同时提供：规则文件、数据包、写入结果的excel表

import openpyxl
import re
import pprint
import logging
import urllib.parse
from scapy.all import *

logging.getLogger("scapy.runtime").setLevel(logging.ERROR)  # 清除报错


# 用来从规则明文中提取出tid和content中的内容
def get_tid_content():
    with open(rules_path, 'rb') as f:
        contents = f.readlines()
        for line in contents:  # 按行读取
            line = line.decode('gbk')
            if line == '':
                continue
            if "content" in line:  # 对包含content关键字的规则进行处理
                message = re.findall(r'content\x3a\x22(.*?)\x22', line)  # 获取content中的内容
                tid = re.findall(r'tid\x3a(\d+)', line)  # 获取tid

                # 获取到字符串格式的tid
                for i in tid:
                    re_tid = i

                # 提取到字符串格式的message
                message1 = str(message).replace('[', '').replace(']', '')
                re_message = "存在攻击特征：" + message1

                all_message[re_tid] = re_message  # 加入到字典中
    return all_message


# 用来执行收集到的content写入到excel表操作
def content_to_excel():
    workbook = openpyxl.load_workbook(excel_path)  # 返回一个workbook数据类型的值
    sheet = workbook.active  # 获取活动表
    print('[+] 开始写入content匹配的结果，当前活动表是：' + str(sheet))

    rows = sheet.max_row  # 获取行数,值是int型

    for i in range(1, rows + 1):
        if sheet.cell(i, tid_column).value in all_message.keys():  # 如果单元格中的值在all_message中找到了
            # 把键对应的值写到"判断依据所在的列"，如果值不存在，就写入“NULL”
            sheet.cell(i, evidence_column, all_message.get(sheet.cell(i, tid_column).value, 'NULL'))
    workbook.save(excel_path)  # 保存excel表
    workbook.close()  # 关闭Excel表格
    print('[-] content写入结束')


# 从数据包中，使用正则提取出内容，加入到字典中
def pcap_parser(filename, keyword):
    flag = True  # 一个标志位，用于处理正则测试数据包
    pkts = rdpcap(filename)
    for pkt in pkts.res:
        try:  # decode编码实体内容的时候容易出错，使用异常处理
            pkt_load = pkt.getlayer('Raw').fields['load'].decode().strip()  # 提取负载内容，即wireshatk追踪流看到的东西
            pkt_load = urllib.parse.unquote(pkt_load)  # 全文做url解码
            re_keyword = keyword  # 使用正则过滤出数据
            if re.search(re_keyword, pkt_load, re.I):
                match_re = re.search(re_keyword, pkt_load, re.I).group()  # 匹配上的内容
                print(os.path.basename(filename) + '\t' + '匹配成功：' + '\t' + match_re)
                pcap_path_tid = filename.split('.', 1)[0].split('Event')[-1]  # 从数据包的绝对路径中提取出tid编号
                info[pcap_path_tid].append(match_re)  # 将匹配上的内容加入到info字典中
                flag = False
                break
        except:
            pass
    if flag:
        print(os.path.basename(filename) + '\t' + '匹配失败！')


# 从字典中提取正则匹配的结果，写入到excel表中
def pcre_to_excel():
    workbook = openpyxl.load_workbook(excel_path)  # 返回一个workbook数据类型的值
    sheet = workbook.active  # 获取活动表
    print('====================================================')
    print('[1]：正则匹配数据包结束')
    print('[2]：开始把正则匹配的结果写到Excel，当前活动表是：' + str(sheet))

    rows = sheet.max_row  # 获取行数,值是int型

    for i in range(1, rows + 1):
        if sheet.cell(i, tid_column).value in info.keys():  # 如果单元格中的tid值在info字典中找到了
            # 把键对应的值写到"判断依据所在的列"
            if len(info[sheet.cell(i, tid_column).value]) == 3:  # info字典"值"长度为3，说明正则匹配的有结果
                # info字典"值"的最后一个是正则匹配的结果，将"判断依据"中包含的内容，与正则匹配的内容合并到一个单元格中
                if sheet.cell(i, evidence_column).value is None:  # 如果单元格是空白的，就直接写数据
                    # print('##############写入数据：')
                    # print('正则匹配：' + info[sheet.cell(i, tid_column).value][-1])
                    sheet.cell(i, evidence_column, '正则匹配：' + info[sheet.cell(i, tid_column).value][-1])
                else:  # 如果单元格有内容，合并内容
                    excel_value = str(sheet.cell(i, evidence_column).value) + '正则匹配：' + \
                                  info[sheet.cell(i, tid_column).value][-1]
                    # print('%%%%%%%%%%%%%%合并数据：')
                    # print(excel_value)
                    sheet.cell(i, evidence_column, excel_value)

    workbook.save(excel_path)  # 保存excel表
    workbook.close()  # 关闭Excel表格
    print('[3]：pcre写入结束')


# 预处理，下面三个for循环实现：将tid和正则、数据包绝对路径绑定到一个字典中
def deal_tid_pcre_pcap():
    abs_pcap_filepath = []  # 用一个列表用来存储获取到的所有数据包的绝对路径
    with open(rules_path, 'rb') as f:  # 读取规则文件
        contents = f.readlines()

    # 获取到tid对应的正则
    for line in contents:
        line = line.decode('gbk')
        tid = re.findall(r'tid\x3a(\d+)', line)  # 获取tid
        pcre = re.findall(r'pcre\x3a\x22\x2f(.*?)\x2f\w{0,4}\x22\x3b', line)  # 获取pcre中的内容
        if len(pcre) == 0:  # 如果不加这样一个判断，正则可能取值是空，会导致写入字典失败
            continue
        info[tid[0]] = [pcre[0]]  # 正则不要太多，取其中1个就行了。将正则与tid绑定

    # 获取到tid对应的数据包
    for current_folder, list_folders, files in os.walk(files_path):
        for f in files:  # 用来遍历所有的文件，只取文件名，不取路径名
            if f.endswith('pcap') or f.endswith('pcapng'):  # 操作数据包
                file_path = current_folder + '\\' + f  # 获取数据包的绝对路径
                abs_pcap_filepath.append(file_path)

    # 由于一个tid可能存在多个数据包，这里对abs_pcap_filepath去重，一个tid只保留一个数据包
    for re_tid in info.keys():  # 遍历字典tid_pcre
        for i in abs_pcap_filepath:
            pcap_name = os.path.basename(i)  # 获取数据包的基本名称，即文件名
            if re_tid in pcap_name.split('.', 1)[0]:  # 如果tid出现在数据包的文件名中
                info[re_tid].append(i)
                break


# 负责调动上面所有函数
def work():
    print('[+] 程序开始运行')
    # 下面两个函数完成content写入excle表
    get_tid_content()
    content_to_excel()

    # 下面开始处理pcre匹配的结果写入excel
    print('[+] 开始获取tid、pcre、数据包路径')
    deal_tid_pcre_pcap()

    print('[+] 开始使用正则匹配数据包中的内容')
    for i in info.values():
        if len(i) == 2:
            pcap_parser(i[-1], i[0])
    print('[-] 匹配结束')

    pcre_to_excel()


if __name__ == '__main__':
    all_message = {}  # 用来存储tid及它的content内容
    info = {}  # 一个用来存储tid、tid对应的正则、tid对应的数据包的字典
    tid_column = 2  # tid所在的列
    evidence_column = 7  # "判断依据所在的列"
    excel_path = r'C:\Users\asuka\Desktop\test.xlsx'  # excel表的路径
    rules_path = r'C:\Users\asuka\Desktop\test.rules'	# 存放规则的文件路径
    files_path = r"C:\Users\asuka\Desktop\test"  # 数据包所在的文件夹
    work()
    print('Done,Enjoy It')