Skip to content

多文件统计字频

Published: at 09:48 AM | 2 min read

假如有60个文件, 文件名为:zipin1.txt到zipin60.txt 文件格式(词语是汉字串,词频是数字):

词语 词频
词语 词频
.
.
.

所有文件中的词语包括顺序都是一样的,只是词频不一样, 现在要把所有文件中相同词语的词频加起来以相同的格式写到main.txt文件中。

#include <windows.h>
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
using namespace std;

const int g_line = 64;

class Word
{
public:
	Word(const string srcPath);
	~Word();
	bool UpdateNum(const string srcPath);
	void Output(const string destPath) const;

private:
	vector<string> m_strName;
	vector<int> m_nNum;
};

Word::Word(const string srcPath)
{
	if (!(srcPath.empty()))
	{
		char szLine[g_line] = {0};
		char *pName = NULL;
		char *pNum = NULL;
		string strTemp;
		ifstream is(srcPath.c_str());
		while (is.getline(szLine, g_line))
		{
			pName = strtok(szLine, "-");
			m_strName.push_back(pName);
			pNum = strtok(NULL, "-");
			m_nNum.push_back(atoi(pNum));
		}
		is.close();
	}
}

Word::~Word()
{
}

bool Word::UpdateNum(const string srcPath)
{	
	if (!(srcPath.empty()))
	{
		char	szLine[g_line] = {0};
		char	*pNum = NULL;
		int		nNum = 0;
		int		i = 0;
		ifstream is(srcPath.c_str());
		while (is.getline(szLine, g_line))
		{
			strtok(szLine, "-");
			pNum = strtok(NULL, "-");
			nNum = atoi(pNum);
			m_nNum.at(i++) += nNum;
		}
		is.close();
	}


	return true;
}

void Word::Output(const string destPath) const
{
	vector<string>::const_iterator itName;
	vector<int>::const_iterator itNum;

	ofstream os(destPath.c_str());
	for (itName = m_strName.begin(), itNum = m_nNum.begin();
		itName != m_strName.end(); ++itName, ++itNum)
	{
		os << *itName << " " << *itNum << endl;
	}
	os.close();
}

int main(void)
{
	DWORD dwStart = ::GetTickCount();

	Word word("zipin1.txt");
	char szPath[_MAX_PATH];
	for (int i = 2; i <= 60; i++)
	{
		memset(szPath, 0, sizeof(szPath));
		sprintf(szPath, "zipin%d.txt", i);
		word.UpdateNum(szPath);
	}
	word.Output("main.txt");


	DWORD dwEnd = ::GetTickCount();
	cout << "elapse time: " << dwEnd - dwStart << endl;

	system("pause");
	return 0;
}

2011-08-15