多模字符串匹配算法实现



  • 快速在一个长字符串中查找一组短字符串的位置

    #include <cstdio>
    #include <cstring>
    #define MAX_NODE 256
    #define MAX_CHILDREN_NODE 26
    
    typedef struct _TrieNode
    {
    	struct _TrieNode *aChildPtr[MAX_CHILDREN_NODE], *pFail;
    	int iMark;
    } sTrieNode;
    
    typedef struct
    {
    	sTrieNode TreeBuff[MAX_NODE];
    	int iNodeCount;
    } sTrie;
    
    sTrie tree;
    
    int fnGetIndex(char iCh)
    {
    	return iCh - 'a';
    }
    
    void fnAdd(sTrie *trie, char *szStr, int mark)
    {
    	int iLen = strlen(szStr);
    	sTrieNode *cur = trie->TreeBuff;
    	for (int i = 0; i < iLen; ++i)
    	{
    		int iCh = fnGetIndex(szStr[i]);
    		if (cur->aChildPtr[iCh] == NULL)
    		{
    			cur->aChildPtr[iCh] = trie->TreeBuff + (++trie->iNodeCount);
    			memset(cur->aChildPtr[iCh], 0, sizeof(sTrieNode));
    		}
    		cur = cur->aChildPtr[iCh];
    	}
    	cur->iMark = mark;
    }
    
    void fnBuildFail(sTrie *trie)
    {
    	static sTrieNode *aQueue[MAX_NODE];
    	int iQueHead = 0;
    	int iQueTail = 0;
    	sTrieNode *pCurrent = trie->TreeBuff, *pFailTo;
    	for (int i = 0; i < MAX_CHILDREN_NODE; ++i)
    	{
    		if (pCurrent->aChildPtr[i] != NULL)
    		{
    			aQueue[iQueTail++] = pCurrent->aChildPtr[i];
    			pCurrent->aChildPtr[i]->pFail = pCurrent;
    		}
    	}
    	while (iQueHead < iQueTail)
    	{
    		pCurrent = aQueue[iQueHead++];
    		for (int i = 0; i < MAX_CHILDREN_NODE; ++i)
    		{
    			if (pCurrent->aChildPtr[i] == NULL)
    			{
    				continue;
    			}
    			pFailTo = pCurrent->pFail;
    			while (pFailTo != NULL)
    			{
    				if (pFailTo->aChildPtr[i] != NULL)
    				{
    					pCurrent->aChildPtr[i]->pFail = pFailTo->aChildPtr[i];
    					break;
    				}
    				pFailTo = pFailTo->pFail;
    			}
    			if (pFailTo == NULL)
    			{
    				pCurrent->aChildPtr[i]->pFail = trie->TreeBuff;
    			}
    			aQueue[iQueTail++] = pCurrent->aChildPtr[i];
    		}
    	}
    }
    
    void fnFind(sTrie *trie, char *szStr)
    {
    	int iLen = strlen(szStr);
    	sTrieNode *pCurrent = trie->TreeBuff;
    	for (int i = 0; i < iLen; ++i)
    	{
    		int iCh = fnGetIndex(szStr[i]);
    		while (pCurrent->aChildPtr[iCh] == NULL && pCurrent != trie->TreeBuff)
    		{
    			pCurrent = pCurrent->pFail;
    		}
    		pCurrent = pCurrent->aChildPtr[iCh];
    		if (pCurrent == NULL)
    		{
    			pCurrent = trie->TreeBuff;
    		}
    		if (pCurrent->iMark != 0)
    		{
    			printf("Find %d end at %d\n", pCurrent->iMark, i);
    		}
    	}
    }
    
    char pat[][8] = {"", "sin", "cos", "tan", "ln", "log", "arcsin", "arccos", "arctan", "cot", "arccot", "sinh", "cosh", "sh", "ch", "tanh", "coth"};
    
    int main()
    {
    	for (int i = 1; i <= 5; ++i)
    	{
    		fnAdd(&tree, pat[i], i);
    	}
    	fnBuildFail(&tree);
    	char str[128] = "arcsinx";
    	fnFind(&tree, str);
    	return 0;
    }
    

 

Copyright © 2018 bbs.dian.org.cn All rights reserved.

与 Dian 的连接断开,我们正在尝试重连,请耐心等待