// Parser.h
// declares Parser class
// reads from html file and gives out results

#ifndef Parser_H
#define Parser_H

#include "cs240.h"
#include "openconn.h"
#include "linkslist.h"
#include "database.h"
#include "stopword.h"
#include <string>

class Parser : public CS240 {
		public:
				void testData();					// debugging routine
				Parser( DataBase &, string = "stopword.txt");	// constructor, must pass a reference to a database object, also accepts name of stopword file
				string nextString();							// returns the next string from the file or "" if there are no more strings
				string nextLink();								// returns the next link from the links object
				string getDescription();						// returns description
				void setDomain( string );						// sets links limiting domain
				int open( string );								// opens a new file for reading, returns 1 if succesful
				void close();									// close input file
				int getReadError();								// returns readError
				~Parser();										// Deconstructor
				const int NO_BODY;								// return constant indicating document lacks a body section
				
		private:
				LinksList links;								// data structure for containing links
				int getInput();									// retrieves new section of document from connection object
				Connection connection;							// object for reading HTML documents
				string URLaddress;								// current address being read
				string linksLimit;								// limiting domain
				DataBase *database;								// pointer to database for parser to add things to
				int readError;									// 1 if there was an error on the last read, 0 otherwise
				StopWord stops;									// stopword object
				string cLine;									// section of document currently being parsed
				string leftover;								// carries incomplete strings to add next time GetInput is called
				string description;								// holds the description of the current document
				bool firstrun;									// indicates whether or not tags still need to be stripped from cLine
				int replaceAll( string &, string, string );		// replaces all instances of second string in first string with the last string
				static void convertCase( string & );					// converts passed string to all lower case letters
				void removeTags( string & );					// strips everything inside of tags from cLine
				void extractLinks( string );					// removes all links from passed string
};

#endif
