【Practice】白名单过滤程序

时间:2021-08-20 13:16:17

背景:要求根据largeW文件里的数字过滤largeT文件,如果不在largeW中就存入result.txt文件中。largeW和largeT文件里的数据都有一百万。从《算法4》的官网上得到。

问题:largeW无序,有重复项,要进行二分查找。要求利用有随机访问迭代器的容器。选用vector序列容器。因为deque没有体现出前端和中部插入为固定时间的优势,反而有随机访问慢的劣势。

#include<fstream>
#include<iostream>
#include <vector>
#include<algorithm>
#include<iterator>
using namespace std;

bool BinarySearch(int key,vector<int>& dict)
{
	vector<int>::iterator begin = dict.begin();
	int lo=0;
	int hi=dict.size()-1;
	int mid;
	while(lo<=hi)
	{
		mid = (lo+hi)/2;
		if(key<dict[mid]) 
			hi=mid-1;
		else if(key>dict[mid])
			lo = mid+1;
		else
			return true;
	}
	return false;
}
#include<windows.h>
void main()
{
	ifstream fwhite;
	ifstream fnumber;
	int number;
	vector<int> white_list;
	fnumber.open("largeT.txt");
	fwhite.open("largeW.txt");
	if(!fnumber.is_open() || !fwhite.is_open())
	{//or use .good .fail or directly use ! to judge if the file has been opened successfully
		cout<<"can't open file list"<<endl;
		exit(EXIT_FAILURE);
	}
	while(!fwhite.eof())
	{
		fwhite>>number;
		white_list.push_back(number);
	}
	sort(white_list.begin(),white_list.end());
	white_list.erase(unique(white_list.begin(),white_list.end()),white_list.end());
	ofstream fout("sort_white.txt",ios::trunc);
	copy(white_list.begin(),white_list.end(),ostream_iterator<int,char> (fout,"\n"));


	ofstream fresult("result.txt");
	while(!fnumber.eof())
	{
		fnumber>>number;
		if(!BinarySearch(number,white_list))
		{
			fresult<<number<<endl;
		}
	}
	exit(EXIT_SUCCESS);
};