Ijumaa, 15 Agosti 2014

Intersection_Posting_lists

"utility.h"    file should contain the following lines

#include<iostream>
#include<fstream>
#include<vector>
#include<algorithm>
#include<string>
#include<map>
---------------------------------------------------------------------------------------------
"yangu.txt" should contain the following but not necessarily for the purpose of this program::

MY MOTHER love me too much #
i love my mother too #
do you love me too #
or only you care about your mother #
-----------------------------------------------------------------------------------------------





#include"utility.h"
using namespace std;
//list of function prototypes
string Lower(string& lowercase_term);
string removepunc(string& s);
int digitcheck(string& s);
void dictionery(vector<string> &terms,vector<vector<string> > &documents);
void post1(vector<string> &terms,vector<vector<string> > &documents, vector<int> &take,int
&docID);
void post2(vector<string> &terms,vector<vector<string> > &documents, vector<int> &take1,int
&docID);
void inter2_post(vector<int> &take,vector<int> &take1,vector<vector<string> > &documents);
int main()//main function
{
//declarations
vector<string> v;
vector<int> tk;
vector<int> tk1;
vector<vector<string> > vv;
int ID;
cout<<"THE OUTPUT WILL BE WRITTEN IN TEXT FILE NAMED posting "<<endl;
dictionery(v,vv);//call for dictionery
post1(v,vv,tk,ID);//call for posting creation
post2(v,vv,tk1,ID);//call for posting creation
inter2_post(tk,tk1,vv);
system("pause");//hold the screen
return 0;//exit
}
void dictionery(vector<string> &terms,vector<vector<string> > &documents)//function for
creating dictionery
{
ifstream mem("yangu.txt");
if(!mem)
{
cout<<"The file for documents not created"<<endl;
}
else
{
while(!mem.eof())
{
string word;
vector<string> words;
while(mem>>word && word!="#")
{
Lower(word);
digitcheck(word);
removepunc(word);
words.push_back(word);
terms.push_back(word);
sort(terms.begin(),terms.end());//sorting the terms
terms.erase(unique(terms.begin(),terms.end()),terms.end());//remove term
repeatation
}
if(!words.empty())
{
documents.push_back(words);
}
}
}
}
void post1(vector<string> &terms,vector<vector<string> > &documents,vector<int> &take,int
&docID)//function for creating posting list in text file
{
ifstream file("yangu.txt");//an output file stream obtained here
for(int i=0;i<documents.size();i++)
{
vector<string>::const_iterator p=find(documents[i].begin(),documents[i].end(),
"mother");//searching term in each document
if(p!=documents[i].end())
{
docID=i+1; //store the document ID
}
take.push_back(docID);//put the document IDs into vector
}
take.erase(unique(take.begin(),take.end()),take.end());//remove term repeatation
ofstream outputFile("posting.txt");
outputFile<<"The first posting list[mother]:"<<endl;
outputFile<<"==============================="<<endl;
for(int i=0;i<take.size();i++)
outputFile<<take[i]<<" ";
outputFile<<endl;
}
void post2(vector<string> &terms,vector<vector<string> > &documents,vector<int> &take1,int
&docID)//function for creating posting list in text file
{
ifstream file("yangu.txt");//an output file stream obtained here
for(int i=0;i<documents.size();i++)
{
vector<string>::const_iterator p=find(documents[i].begin(),documents[i].end(),
"love");//searching term in each document
if(p!=documents[i].end())
{
docID=i+1;//store the document ID
}
take1.push_back(docID);//put the document IDs into vector
take1.erase(unique(take1.begin(),take1.end()),take1.end());//remove term
repeatation
}
ofstream outputFile("posting.txt",ios::app);
outputFile<<"The second posting list[love]:"<<endl;
outputFile<<"==============================="<<endl;
for(int i=0;i<take1.size();i++)
outputFile<<take1[i]<<" ";
outputFile<<endl;
}
void inter2_post(vector<int> &take,vector<int> &take1,vector<vector<string> > &documents)
{
ofstream outputFile("posting.txt",ios::app);
outputFile<<"The intersection of the two posting lists"<<"[mother AND love]:"<<endl;
outputFile<<"==============================================================="<<endl
;
for(int i=0;i<1;i++)
{
for(int j=0;j<take1.size();j++)
{
if(take[i]==take1[j])
{
outputFile<<take[i]<<" ";//Output the intersected posting
take[i]=take[i+1];//update the posting
take1[j]=take[j+1];//update the posting
}
else
if(take[i]<take1[j])
{
take[i]=take[i+1];//update the posting
}
else
{
take1[j]=take1[j+1];//update the posting
}
}
}
}
string Lower(string& lowercase_term)//function for converting terms to lowercase
{
for(unsigned int i=0;i<lowercase_term.length();i++)
{
lowercase_term[i] = tolower(lowercase_term[i]);
}
return lowercase_term;
}
string removepunc(string& s)//function for removing punctuations
{
char p;
string holder;
for(unsigned int i=0;i<s.length();i++)
{
if(isalpha(s[i])!=0)//check if punctuation is not found
{
p=s[i];//store character in variable p
holder.push_back(p);//push p into temporary variable holder
}
}
s=holder;//store string into memory
return s;//return a string
}
int digitcheck(string& s)//function for cheching if there exist a digit or & or dash
{
int c=0;
for(unsigned int i=0;i<s.length();i++)
{
if(isdigit(s[i]||s[i]=='&'||s[i]=='-'))
goto out;
else
++c;
}
out:
if(c!=0)
return c;
else
return 0;

}




================================================
output should look like::

The first posting list[mother]:
===============================
1          2          4        
The second posting list[love]:
===============================
1          2          3        
The intersection of the two posting lists[mother AND love]:
===============================================================
1     2