技术宅的结界

 找回密码
 立即注册→加入我们

QQ登录

只需一步,快速开始

搜索
热搜: 下载 VB C 实现 编写
查看: 310|回复: 0
收起左侧

【C】将正则表达式转换为FSA

[复制链接]

9

主题

15

帖子

294

积分

用户组: 中·技术宅

UID
6266
精华
5
威望
37 点
宅币
150 个
贡献
30 次
宅之契约
0 份
在线时间
9 小时
注册时间
2020-9-26
发表于 2020-11-30 21:35:00 | 显示全部楼层 |阅读模式

欢迎访问技术宅的结界,请注册或者登录吧。

您需要 登录 才可以下载或查看,没有帐号?立即注册→加入我们

x
以下程序代码实现了将一个正则表达式转换为FSA(Finite-State Automation 有穷状态自动机)的功能。
本程序需要https://github.com/coshcage/StoneValley库的支持。
程序运行后需要输入一个合法的正则表达式,例如 a(ba|ca)|ac|ab) ,程序的输出是该正则表达式的逆波兰形式和转移表。
注意,输入的正则表达式支持‘(’、‘)’、‘|’和‘*’运算。+和?需要自行转换。
a+ == aa*
a? == a | epsilon
在输出的转移表内‘>’表示开始状态,'*'表示结束状态。(数字)为状态,‘\数字’为输入符号。‘\0’表示空转移。
该程序算法的大致原理是:
1、将正则表达式转换为相对应的逆波兰式。
2、扫描逆波兰式,将输入符号取出后转换为状态+输入符号的图。将图的两个节点压入栈内。
3、将运算符取出,并取出相应的图。在相应的图上完成运算后再将图的两个节点压回栈内。
4、对于连接运算,将栈内存储的图节点取出一一做连接运算后压入栈内。
5、最后从站内取出的节点组合就是该自动机的起始节点和结束节点。
该程序未实现自动机的化简。
如果需要化简自动机,需要自行实现删除空环路、删除空变迁、自动机确定化以及自动机约简四个步骤。
有了转移表,就可以使用表驱动的方法运行自动机,识别正则语言。
因为本人水平有限,不到位之处还请大家指出。
[C] 纯文本查看 复制代码
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "svgraph.h"
#include "svstack.h"
#include "svqueue.h"

STACK_L stkOperator, stkOperand;
QUEUE_L queRegexp;
GRAPH_L machine;

struct st_Operator
{
	char name;
	char level;
} operators[] = { '(', -1, ')', -1, '*', 2, '|', 1 };

typedef struct st_Element
{
	union un_Data
	{
		char operator;
		char operand;
	} data;
	BOOL isOperator;
} Element, * P_Element;

typedef struct st_Node
{
	size_t x;
	size_t y;
} Node, * P_Node;

/* Finding info for edges. */
typedef struct _st_FIEDG {
	EDGE     vertex;
	P_NODE_S pnode;
	size_t   bweight; /* TRUE for weighted graph; FALSE for unweighted graph. */
} _FIEDG, * _P_FIEDG;

int cbftvsFindMaxData(void * pitem, size_t param)
{
	if (*(size_t *)((P_TNODE_B)pitem)->pdata > * (size_t *)param)
		*(size_t *)param = *(size_t *)((P_TNODE_B)pitem)->pdata;
	return CBF_CONTINUE;
}

size_t GenerateUniqueState(P_GRAPH_L pgrp)
{
	if (setIsEmptyT(pgrp))
		return 1;
	{
		size_t r = 0;
		setTraverseT(pgrp, cbftvsFindMaxData, (size_t)&r, ETM_LEVELORDER);
		return r + 1;
	}
}

char GetOperator(char o)
{
	int i;
	for (i = 0; i < sizeof operators / sizeof operators[0]; ++i)
		if (operators[i].name == o)
			return operators[i].level;
	return 0;
}

int Splitter(char * pstr)
{
	BOOL turn = FALSE;
	char c;
	Element ele;
	while (*pstr != '\0')
	{
		switch (*pstr)
		{
		case '\\':
			if (turn)
			{
				ele.data.operand = *pstr;
				ele.isOperator = FALSE;
				queInsertL(&queRegexp, &ele, sizeof(Element));
				turn = FALSE;
				break;
			}
			turn = TRUE;
			break;
		case '(':
		case ')':
		case '|':
		case '*':
			if (turn)
			{
				ele.data.operand = *pstr;
				ele.isOperator = FALSE;
				queInsertL(&queRegexp, &ele, sizeof(Element));
			}
			else
			{
				switch (*pstr)
				{
				case '(':
					break;
				case ')':
					if (! stkIsEmptyL(&stkOperator))
					{
						stkPeepL(&c, sizeof(char), &stkOperator);
						while (c != '(')
						{
							stkPopL(&c, sizeof(char), &stkOperator);
							ele.data.operand = c;
							ele.isOperator = TRUE;
							queInsertL(&queRegexp, &ele, sizeof(Element));
							if (stkIsEmptyL(&stkOperator))
								break;
							else
								stkPeepL(&c, sizeof(char), &stkOperator);
						}
						stkPopL(&c, sizeof(char), &stkOperator); /* Drop right brace. */
					}
					else
					{
						printf("Right brace mismatch.\n\n");
						return 1;
					}
					goto Lbl_PassOperator;
				default:
					if (! stkIsEmptyL(&stkOperator))
					{
						stkPeepL(&c, sizeof(char), &stkOperator);
						while (GetOperator(c) >= GetOperator(*pstr))
						{
							stkPopL(&c, sizeof(char), &stkOperator);
							ele.data.operand = c;
							ele.isOperator = TRUE;
							queInsertL(&queRegexp, &ele, sizeof(Element));
							if (stkIsEmptyL(&stkOperator))
								break;
							else
								stkPeepL(&c, sizeof(char), &stkOperator);
						}
					}
					break;
				}
				stkPushL(&stkOperator, pstr, sizeof(char));
			}
		Lbl_PassOperator:
			turn = FALSE;
			break;
		default:
			ele.data.operand = *pstr;
			ele.isOperator = FALSE;
			queInsertL(&queRegexp, &ele, sizeof(Element));
			turn = FALSE;
			break;
		}
		++pstr;
	}
	while (! stkIsEmptyL(&stkOperator))
	{
		stkPopL(&c, sizeof(char), &stkOperator);
		if (c == '(')
		{
			puts("Left brace mismatch.\n");
			return 1;
		}
		ele.data.operand = c;
		ele.isOperator = TRUE;
		queInsertL(&queRegexp, &ele, sizeof(Element));
	}
	if (queIsEmptyL(&queRegexp))
	{
		puts("Invalid Expression.\n");
		return 1;
	}
	return 0;
}

/*
 *  _______          __a__
 *  |     |         |     V
 * (A) a (B) ====> (A)   (B)
 *  |_____|
 */
Node CombineNode5(char transition)
{
	Node node;
	node.x = GenerateUniqueState(&machine);
	grpInsertVertexL(&machine, node.x);
	node.y = GenerateUniqueState(&machine);
	grpInsertVertexL(&machine, node.y);
	grpInsertEdgeL(&machine, node.x, node.y, (size_t)transition);
	return node;
}

/*
 *  ________          __x__  __e__  __y__
 *  |      |         |     V/     V/     V
 * (A) xy (B) ====> (A)   (C)    (D)    (B)
 *  |______|
 */
Node CombineNode1(Node nodeA, Node nodeB)
{
	Node nodeR;
	grpInsertEdgeL(&machine, nodeA.y, nodeB.x, 0); /* Epsilon transition. */
	nodeR.x = nodeA.x;
	nodeR.y = nodeB.y;
	return nodeR;
}

/*
 *  _________          __e__  __x__  __e__
 *  |       |         |     V/     V/     V
 * (A) x|y (B) ====> (A)   (C)    (D)    (B)
 *  |_______|         |      __y___       ^
 *                    \     |      |      |
 *                     \e->(E)    (F)--e-/
 */
Node CombineNode2(Node nodeA, Node nodeB)
{
	Node nodeR;
	nodeR.x = GenerateUniqueState(&machine);
	grpInsertVertexL(&machine, nodeR.x);
	nodeR.y = GenerateUniqueState(&machine);
	grpInsertVertexL(&machine, nodeR.y);

	grpInsertEdgeL(&machine, nodeR.x, nodeA.x, 0); /* Epsilon transition. */
	grpInsertEdgeL(&machine, nodeR.x, nodeB.x, 0);
	grpInsertEdgeL(&machine, nodeA.y, nodeR.y, 0);
	grpInsertEdgeL(&machine, nodeB.y, nodeR.y, 0);

	return nodeR;
}

/*
 *  _________          __e__  __x__  __e__
 *  |       |         |     V/     V/     V
 * (A) x|y (B) ====> (A)   (C)    (D)    (B)
 *  |_______|         ^\_______e____>____/|
 *                    \________e____<_____/
 */
Node CombineNode3(Node nodeA)
{
	Node nodeR;
	nodeR.x = GenerateUniqueState(&machine);
	grpInsertVertexL(&machine, nodeR.x);
	nodeR.y = GenerateUniqueState(&machine);
	grpInsertVertexL(&machine, nodeR.y);

	grpInsertEdgeL(&machine, nodeR.x, nodeA.x, 0);
	grpInsertEdgeL(&machine, nodeA.y, nodeR.y, 0);
	grpInsertEdgeL(&machine, nodeR.x, nodeR.y, 0);
	grpInsertEdgeL(&machine, nodeR.y, nodeR.x, 0);

	return nodeR;
}

int cbftvsPrint(void * pitem, size_t param)
{
	P_Element pele = (P_Element)((P_NODE_S)pitem)->pdata;
	if (! pele->isOperator)
		printf(" (%c)", pele->data.operand);
	else
		printf(" %c", pele->data.operator);
	return CBF_CONTINUE;
}

//int CBFFindEdgeInList(void * pitem, size_t param)
//{
//	_P_FIEDG pd = (_P_FIEDG)param;
//	if (((P_EDGE)((P_NODE_S)pitem)->pdata)->weight == pd->vertex.weight)
//	{
//		pd->vertex.vid = ((P_EDGE)((P_NODE_S)pitem)->pdata)->vid;
//		pd->pnode = (P_NODE_S)pitem;
//		return CBF_TERMINATE;
//	}
//	return CBF_CONTINUE;
//}
//
//extern P_VERTEX_L _grpGetVertexByID(P_GRAPH_L pgrp, size_t vid);
//
//ptrdiff_t FSA_Status(P_GRAPH_L pgrp, size_t vidx, char transition)
//{
//	REGISTER P_VERTEX_L pvtx;
//	if (NULL != (pvtx = _grpGetVertexByID(pgrp, vidx)))
//	{
//		_FIEDG fd;
//		extern int _grpCBFFindEdgeInList(void * pitem, size_t param);
//		fd.pnode = NULL;
//		fd.vertex.weight = transition;
//
//		if (CBF_TERMINATE == strTraverseLinkedListSC_X(pvtx->adjlist, NULL, CBFFindEdgeInList, (size_t)&fd))
//			return fd.vertex.vid; /* Edge already exists. */
//	}
//	return -1; /* Can not find vertex vidx. */
//}

int cbftvsvtxedg(void * pitem, size_t param)
{
	P_NODE_S pnode = (P_NODE_S)pitem;
	P_EDGE pedge = (P_EDGE)pnode->pdata;
	printf("\t\'\\%lu\'\t(%lu)\n", pedge->weight, pedge->vid);
	return CBF_CONTINUE;
}

int cbftvsgrp(void * pitem, size_t param)
{
	P_TNODE_B tnodeb = P2P_TNODE_B(pitem);
	P_VERTEX_L pvtx = (P_VERTEX_L)tnodeb->pdata;
	P_Node pnode = (P_Node)param;
	if (pvtx->vid == pnode->x)
		printf(">(%lu)\n", pvtx->vid); /* Start status. */
	else if (pvtx->vid == pnode->y)
		printf("*(%lu)\n", pvtx->vid); /* End status. */
	else
		printf(" (%lu)\n", pvtx->vid);
	strTraverseLinkedListSC_A(pvtx->adjlist, NULL, cbftvsvtxedg, 0);
	return CBF_CONTINUE;
}

int main(void)
{
	Node node;
	STACK_L operandStack;
	//char exp[] = { "a(a|b)*" };
	char exp[BUFSIZ] = { 0 };
	stkInitL(&stkOperator);
	stkInitL(&stkOperand);
	queInitL(&queRegexp);
	stkInitL(&operandStack);
	grpInitL(&machine);

	printf("Regexp< a(a|d)* >: ");
	scanf_s("%s", exp, BUFSIZ);
	Splitter(exp);
	printf("RPN: ");
	strTraverseLinkedListSC_N(queRegexp.pfront, NULL, cbftvsPrint, 0);
	printf("\n");

	while (!queIsEmptyL(&queRegexp))
	{
		Element ele;
		queRemoveL(&ele, sizeof ele, &queRegexp);
		if (ele.isOperator)
		{
			Node nodeA, nodeB;
			switch (ele.data.operator)
			{
			case '|':
				if (!stkIsEmptyL(&operandStack))
					stkPopL(&nodeA, sizeof nodeA, &operandStack);
				else
				{
					printf("Expression Error!\n");
					return 1;
				}
				if (!stkIsEmptyL(&operandStack))
					stkPopL(&nodeB, sizeof nodeB, &operandStack);
				else
				{
					printf("Expression Error!\n");
					return 2;
				}
				nodeA = CombineNode2(nodeA, nodeB);
				stkPushL(&operandStack, &nodeA, sizeof nodeA);
				break;
			case '*':
				if (!stkIsEmptyL(&operandStack))
					stkPopL(&nodeA, sizeof nodeA, &operandStack);
				else
				{
					printf("Expression Error!\n");
					return 3;
				}
				nodeA = CombineNode3(nodeA);
				stkPushL(&operandStack, &nodeA, sizeof nodeA);
				break;
			}
		}
		else
		{
			node = CombineNode5(ele.data.operand);
			stkPushL(&operandStack, &node, sizeof node);
		}
	}
	/* Deduction: */
	while (!stkIsEmptyL(&operandStack))
	{
		Node nodeA, nodeB;
		stkPopL(&nodeA, sizeof nodeA, &operandStack);
		if (!stkIsEmptyL(&operandStack))
			stkPopL(&nodeB, sizeof nodeB, &operandStack);
		else
		{
			stkPushL(&operandStack, &nodeA, sizeof nodeA);
			break;
		}
		nodeA = CombineNode1(nodeA, nodeB);
		stkPushL(&operandStack, &nodeA, sizeof nodeA);
	}
	/* Pour the last item out of the stack. */
	if (!stkIsEmptyL(&operandStack))
		stkPopL(&node, sizeof node, &operandStack);
	else
	{
		printf("Expression Error!\n");
		return 4;
	}

	printf("Status\tTransition\tStatus\n");
	treTraverseBIn(machine, cbftvsgrp, &node);

	stkFreeL(&stkOperator);
	stkFreeL(&stkOperand);
	queFreeL(&queRegexp);
	stkFreeL(&operandStack);
	grpFreeL(&machine);
	return 0;
}
回复

使用道具 举报

本版积分规则

QQ|申请友链||Archiver|手机版|小黑屋|技术宅的结界 ( 滇ICP备16008837号 )|网站地图  

GMT+8, 2021-1-15 23:12 , Processed in 0.097947 second(s), 32 queries , Gzip On.

Powered by Discuz! X3.4

Copyright © 2001-2020, Tencent Cloud.

快速回复 返回顶部 返回列表