/*
 * word_cnt.c
 * This program reads a file and counts the words in it.
 */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "tree.h"

#define MAX_WORD  200

/* A structure for storing a word along with its number of occurrences: */
struct _WordCount
{
  char  *word;
  int   count;
};

typedef struct _WordCount WordCount;

/* A comparison function for two WordCount pointers: */
int comp_word (const void *p1, const void *p2)
{
  const WordCount   *p_wc1;
  const WordCount   *p_wc2;

  /* Convert pointers and compare the words they contain. */
  p_wc1 = ((const WordCount *)p1);
  p_wc2 = ((const WordCount *)p2);

  return (strcmp (p_wc2->word, p_wc1->word));
}

/* Enumeration for a state: */
enum _State
{
  OUTSIDE_WORD,
  INSIDE_WORD
};

typedef enum _State State;

/* Prototypes: */
static void update_word (char *word, Tree *p_tree);

/* ------------------------------------------------------------------------
 * The main:
 */
int main (int argc, char **argv)
{
  const char *delims = " \t\n\r`~!@#$%^&*()-=+|\\[]{};:\",.<>/?";
                                   /* All characters that may be delimiters. */
  FILE       *p_file;              /* The input file. */
  char       word [MAX_WORD + 1];  /* The current word. */
  int        l_word;               /* Its length. */
  State      state = OUTSIDE_WORD; /* Are we inside or outside a word. */
  int        c;                    /* The current character read. */
  int        is_delim;             /* Is this character a delimiter. */
  Tree       *p_tree;              /* The tree containing the word count. */
  const Node *p_node;              /* The current tree node we scan. */
  int        n_total;              /* Total occurrences of all words. */

  /* Open the input file. */
  if (argc < 2)
  {
    printf ("Usage: %s <input file>\n", argv[0]);
    return (1);
  }

  p_file = fopen (argv[1], "r");

  if (p_file == NULL)
  {
    printf ("Failed to open '%s'.\n", argv[1]);
    return (1);
  }

  /* Initialize the tree. */
  p_tree = tree_init (comp_word, 1);

  /* Read the input file, character by character. */
  while ((c = fgetc (p_file)) != EOF)
  {
    /* Check whether the current character is a delimiter. */
    is_delim = (strchr (delims, c) != NULL);

    /* Act according to the current state. */
    if (state == OUTSIDE_WORD)
    {
      /* We are outside a word: Ignore any delimiters, but pay attention
         to the case that a new word has just started. */
      if (! is_delim)
      {
        word[0] = c;
        l_word = 1;
        state = INSIDE_WORD;
      }
    }
    else
    {
      /* We are inside a word. */
      if (! is_delim)
      {
        /* Current character is not a delimiter - append it to the word. */
        word[l_word] = c;
        l_word++;
      }
      else
      {
        /* We reached the end of the word. */
        state = OUTSIDE_WORD;

        /* Create a new WordCount object and update the tree. */
        if (l_word > 0)
        {
          word[l_word] = '\0';
          update_word (word, p_tree);
        }
      }
    }
  }

  /* Close the input file. */
  fclose (p_file);

  /* Go over all words in the tree and print their number of occurrences. */
  n_total = 0;

  for (p_node = tree_min (p_tree);
       p_node != NULL;
       p_node = tree_next (p_node))
  {
    WordCount  *p_wc = (WordCount *)(p_node->p_object);

    printf ("%s  :  %d\n", p_wc->word, p_wc->count);
    n_total += p_wc->count;
  }

  /* Print the summary. */
  printf ("\n**************** Summary: ****************\n");
  printf ("Found %d unique words,\nwith %d total occurences.\n",
	  p_tree->n_objects, n_total);
  printf ("******************************************\n");

  /* Clear the tree and free memory. */
  tree_reset (p_tree);
  free (p_tree);

  return (0);
}

/* ------------------------------------------------------------------------
 * Function: update_word
 * Purpose:  Update the number of occurrences of the word.
 * Input:    word   - The word to update.
 * In/Out:   p_tree - The binary tree of WordCount objects.
 * Returns:  Nothing.
 */
static void update_word (char *word, Tree *p_tree)
{
  int         l_word;          /* The length of the word. */
  WordCount   wc_query;        /* An object used to query the tree. */
  const Node  *p_node;         /* The node that contains the desired word. */
  int         i;

  /* Change the word to lower case, in order to be case insensitive. */
  l_word = strlen (word);

  for (i = 0; i < l_word; i++)
    word[i] = tolower (word[i]);

  /* Create a temporary WordCount object and use it to query the tree
     and find the word in it. */
  wc_query.word = word;
  p_node = tree_find (p_tree, &wc_query);

  if (p_node != NULL)
  {
    /* The word exists in the tree - increment its counter. */
    WordCount   *p_existing_wc;

    p_existing_wc = (WordCount *)p_node->p_object;
    p_existing_wc->count++;
  }
  else
  {
    /* Create a new WordCount object and insert it to the tree. */
    WordCount   *p_new_wc;

    p_new_wc = (WordCount *) malloc (sizeof(WordCount));
    p_new_wc->word = (char *) malloc (sizeof(char) * (strlen(word) + 1));
    strcpy (p_new_wc->word, word);
    p_new_wc->count = 1;

    tree_insert (p_tree, p_new_wc);
  }

  return;
}

