/*
* word_cnt.c
* This program reads a file and counts the
words in it.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "tree.h"
#define MAX_WORD 200
/* A structure
for storing a word along with its number of occurrences: */
struct _WordCount
{
char *word;
int count;
};
typedef struct
_WordCount WordCount;
/* A comparison
function for two WordCount pointers: */
int comp_word (const void *p1, const void *p2)
{
const
WordCount *p_wc1;
const
WordCount *p_wc2;
/* Convert pointers
and compare the words they contain. */
p_wc1 = ((const
WordCount *)p1);
p_wc2 = ((const
WordCount *)p2);
return
(strcmp (p_wc2->word, p_wc1->word));
}
/* Enumeration
for a state: */
enum _State
{
OUTSIDE_WORD,
INSIDE_WORD
};
typedef enum
_State State;
/* Prototypes:
*/
static void
update_word (char *word, Tree *p_tree);
/*
------------------------------------------------------------------------
* The main:
*/
int main (int argc, char
**argv)
{
const char *delims = " \t\n\r`~!@#$%^&*()-=+|\\[]{};:\",.<>/?";
/* All characters that may be delimiters. */
FILE
*p_file; /* The input file. */
char word [MAX_WORD + 1]; /* The current word. */
int l_word; /* Its
length. */
State
state = OUTSIDE_WORD; /* Are we inside or
outside a word. */
int c; /*
The current character read. */
int is_delim; /* Is
this character a delimiter. */
Tree
*p_tree; /* The tree containing the word count. */
const Node
*p_node; /* The current tree node we scan. */
int n_total; /*
Total occurrences of all words. */
/* Open the input
file. */
if (argc <
2)
{
printf ("Usage:
%s <input file>\n", argv[0]);
return (1);
}
p_file = fopen (argv[1], "r");
if (p_file ==
NULL)
{
printf ("Failed
to open '%s'.\n", argv[1]);
return (1);
}
/* Initialize the
tree. */
p_tree = tree_init (comp_word, 1);
/* Read the input
file, character by character. */
while ((c =
fgetc (p_file)) != EOF)
{
/* Check whether
the current character is a delimiter. */
is_delim = (strchr (delims, c) != NULL);
/* Act according
to the current state. */
if (state
== OUTSIDE_WORD)
{
/* We are
outside a word: Ignore any delimiters, but pay attention
to the case that a new word has just
started. */
if (!
is_delim)
{
word[0]
= c;
l_word = 1;
state = INSIDE_WORD;
}
}
else
{
/* We are
inside a word. */
if (!
is_delim)
{
/* Current
character is not a delimiter - append it to the word. */
word[l_word] = c;
l_word++;
}
else
{
/* We reached
the end of the word. */
state = OUTSIDE_WORD;
/* Create a
new WordCount object and update the tree. */
if
(l_word > 0)
{
word[l_word] = '\0';
update_word (word, p_tree);
}
}
}
}
/* Close the input
file. */
fclose (p_file);
/* Go over all
words in the tree and print their number of occurrences. */
n_total = 0;
for (p_node =
tree_min (p_tree);
p_node != NULL;
p_node = tree_next (p_node))
{
WordCount
*p_wc = (WordCount *)(p_node->p_object);
printf ("%s :
%d\n", p_wc->word, p_wc->count);
n_total += p_wc->count;
}
/* Print the
summary. */
printf ("\n****************
Summary: ****************\n");
printf ("Found
%d unique words,\nwith %d total occurences.\n",
p_tree->n_objects,
n_total);
printf ("******************************************\n");
/* Clear the tree
and free memory. */
tree_reset (p_tree);
free (p_tree);
return (0);
}
/*
------------------------------------------------------------------------
* Function: update_word
* Purpose:
Update the number of occurrences of the word.
* Input:
word - The word to update.
* In/Out:
p_tree - The binary tree of WordCount objects.
* Returns:
Nothing.
*/
static void
update_word (char *word, Tree *p_tree)
{
int l_word; /* The
length of the word. */
WordCount
wc_query; /* An object used to query the tree. */
const
Node *p_node; /* The node
that contains the desired word. */
int i;
/* Change the word
to lower case, in order to be case insensitive. */
l_word = strlen (word);
for (i = 0; i < l_word; i++)
word[i] = tolower (word[i]);
/* Create a
temporary WordCount object and use it to query the tree
and find the word in it. */
wc_query.word = word;
p_node = tree_find (p_tree, &wc_query);
if (p_node !=
NULL)
{
/* The word
exists in the tree - increment its counter. */
WordCount
*p_existing_wc;
p_existing_wc = (WordCount
*)p_node->p_object;
p_existing_wc->count++;
}
else
{
/* Create a new
WordCount object and insert it to the tree. */
WordCount
*p_new_wc;
p_new_wc = (WordCount *) malloc (sizeof(WordCount));
p_new_wc->word = (char *) malloc (sizeof(char) * (strlen(word) + 1));
strcpy (p_new_wc->word, word);
p_new_wc->count = 1;
tree_insert (p_tree, p_new_wc);
}
return;
}