// Copyright 2000-2006 Adobe Macromedia Software LLC and its licensors. All rights reserved.
// --------------------------------------------------------------------------
//
// Import Word HTML.js
//
// This command is similar to the "Clean Up HTML.js" command, except that it
// is specifically designed to work with HTML documents generated by
// Microsoft Word.
//
// 09/15/2001 ksunder: To run this without a UI, include the same JS files,
//                     and call remoteImportWordHTML(). Optional arg: file dom ptr.
//                     Will use the settings for DEFAULT_WORD_VERSION.
//
// --------------------------------------------------------------------------

var canceledImport = false;

var DEFAULT_WORD_VERSION = "2000";
var DOM;


function receiveArguments()
{
	canceledImport = false;

	if (arguments[0] == "import")
	{
	  var curDOM, newDOM;
	  // Select the word file to be imported, don't show images, supress not in root warnings.
	  var HTMLfileTypes = new Array("Word HTML Files (*.htm; *.html)|*.htm;*.html|TEXT|");
	  var fileName = browseForFileURL("open", MSG_Word_Import, false, true, HTMLfileTypes);  //returns a local filename
	  if (fileName) {
		// Check for name may not exist.
		curDOM = dw.getDocumentDOM(fileName);
		if (curDOM) {
		  newDOM = dw.createDocument();
		  if (newDOM) {
			newDOM.documentElement.outerHTML = curDOM.documentElement.outerHTML;
		  }
		}
	  }
	  else
	  {
		canceledImport = true;
	  }
	}
}

//******************* Commands API *******************

function commandButtons()
{
   return new Array( MM.BTN_OK,     "importWordHTML()",  // main entry point
                     MM.BTN_Cancel, "window.close()",
                     MM.BTN_Help,   "displayHelp()");
}

function canAcceptCommand()
{
  var retVal = false;
  if (dw.getDocumentDOM() && dw.getDocumentDOM().getParseMode() == 'html' && (dw.getFocus() == 'document' || dw.getFocus(true) == 'html' || dw.getFocus() == 'textView')){
    retVal = true;
  }
  return retVal;
}


//******************* Global Variables *******************

var helpDoc = MM.HELP_cmdCleanUpWordHTML;

var gWordVersion;
var gFoundVersion;
var CB;               // The checkbox group
var CBTags;           // The Checkboxes in the dialog

// Logging vars
var gRemoveMetaTags = 0;
var gRemoveWordXML = 0;
var gRemoveConditionals = 0;
var gRemoveEmptyParas = 0;
var gRemoveMargins = 0;
var gRemoveInlineCSS = 0;
var gRemovemsoStyle = 0;
var gRemoveNonCSS = 0;
var gRemoveTableCSS = 0;
var gRemoveUnusedCSS = 0;

var gFontsConverted = 0;

var gNestingFixed = 0;
var gBackgroundSet = "";
var gSourceFormatted = 0;


//************** Main functions *********************

/////////////////////////////////////////////////////////////////////////////
// Function
//    importWordHTML
//
// Purpose
//    This is the "main" function that the dialog calls when the user
//    clicks OK.
//
function importWordHTML()
{
   T.finish(); //ensure Tabs are through getting input

   // Lets save the settings first.  That way if something crashes or
   // goes wrong during the processing, the user doesn't need to reset
   // all of the options again.
   if(doSaveSettings())
      saveSettings();

   // Set up logging particulars
   if ( doShowLog() )
   {
      MM_enableLogging();
      MM_clearLog();
   }
   else
   {
      MM_disableLogging();
   }

   version = getVersion();

   MM.setBusyCursor();

   switch(version)
   {
      case "2000":
         ProcessWord2000();
         break;

      case "97":
         ProcessWord97();
         break;
   }

   // Do some processing that needs to be done no matter the version.
   GeneralProcessing();

   // Cleanup
   PostProcess();

   MM.clearBusyCursor();

   // Show the log, if they said to.
   finish();
}


//These functions provide a way to run Clean Up Word remotely without
//the UI. You can include this file (plus Source Formatting.js and DOM.js)
//and call this function with a dom for any document. By default it
//runs the Word2000 cleanup.

function remoteImportWordHTML(dom) {
  DOM = dom;
  CB = new HeadlessUI();
  ProcessWord2000();  //if no UI, default is Word2000
  GeneralProcessing();
  PostProcess();
}
function HeadlessUI() {
  this.isChecked = HeadlessUI_isChecked;
}
function HeadlessUI_isChecked() {
  return true;
}


/////////////////////////////////////////////////////////////////////////////
function GeneralProcessing()
{
   if(doRemoveMetaLink())
      removeMetaLink();
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    PostProcess
//
// Purpose
//    Anything that needs to be done after we have done all of the cleaning
//    should be done in here.  This gets run no matter what options are
//    turned on.
//
function PostProcess()
{
   var root = DOM.documentElement;
   var html;
   
   // Remove the blank style="" attributes.
   html = root.outerHTML;
   html = html.replace(/\s*style=(""|'')/g, "");
   root.outerHTML = html;

   RemoveEmptyTags();

   if(doApplySourceFormatting())
   {
      // Included from "Source Formatting.js".
      formatSource(DOM);

      gSourceFormatted = 1;
   }
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    initialize
//
// Purpose
//    This is called from body onLoad to initialize the dialog.
//
function initialize()
{

  DOM = dw.getDocumentDOM('document');

	if (canceledImport)
	{
		window.close();
		return;
	}

   getCheckboxes();

   // Initialize the checkboxes.
   initCheckboxes();

   // Ok, we have hooked up all of the checkboxes.  Now we need to set
   // them to initial values of some kind.
   setCheckboxStates();

  var tab0 = dwscripts.findDOMObject("Tab0");
  var tab1 = dwscripts.findDOMObject("Tab1");

  //Use appropriate background & tabs for Mac OS X.
  if (dw.isOSX()) {
    dwscripts.findDOMObject("tabBgWin").src = "../Shared/MM/Images/tabBgOSX335x350.gif";
    var oldMulti = RegExp.multiline;
    RegExp.multiline = true;
    var pat1 = /tabBg\.gif/;
    tab0.innerHTML = tab0.innerHTML.replace(pat1, "tabBgOSX.gif");
	  tab1.innerHTML = tab1.innerHTML.replace(pat1, "tabBgOSX.gif");
    var pat2 = /tabBgSel\.gif/;
    tab0.innerHTML = tab0.innerHTML.replace(pat2, "tabBgSelOSX.gif");
    tab1.innerHTML = tab1.innerHTML.replace(pat2, "tabBgSelOSX.gif");
    RegExp.multiline = oldMulti;
 	  var bgImage = findObject("tabBgWin");
	  bgImage.width = LABEL_OSX_BG_WIDTH;
    window.resizeToContents();
  // Use appropriate background & tabs for WinXP with themes  
  } else if (dw.isXPThemed()) {
    dwscripts.findDOMObject("tabBgWin").src = "../Shared/MM/Images/tabBgWinXP335x290.gif";
    var oldMulti = RegExp.multiline;
    RegExp.multiline = true;
    var pat1 = /tabBg\.gif/;
    tab0.innerHTML = tab0.innerHTML.replace(pat1, "tabBgXP.gif");
	  tab1.innerHTML = tab1.innerHTML.replace(pat1, "tabBgXP.gif");
    var pat2 = /tabBgSel\.gif/;
    tab0.innerHTML = tab0.innerHTML.replace(pat2, "tabBgSelXP.gif");
    tab1.innerHTML = tab1.innerHTML.replace(pat2, "tabBgSelXP.gif");
    RegExp.multiline = oldMulti;
  // Use standard background  
  } else {	
    findObject("tabBgWin").src = "../Shared/MM/Images/tabBgWin335x290.gif";
  }

   //Initialize the TabControl.  (Pass in the prefix used for the tab layers)
   T = new TabControl('Tab');

   //Add tab pages.   (Pass the layer name, and the page object)
   T.addPage('basic', new Pg1(LABEL_Basic));
   T.addPage('detailWord2000', new Pg2(LABEL_Detailed));
   T.addPage('detailWord97', new Pg3(LABEL_Detailed));

   T.addGroup("group2000", new Array("basic","detailWord2000"));
   T.addGroup("group97", new Array("basic","detailWord97"));

   //Show default group
   T.showGroup("group97");

   //Initialize and display the tabs.  (Could pass the name of a page to start on)
   T.start();

   // Determine what version of Word this thing came from.
   detectWordVersion();

   setWordVersion();
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    setDropDownStates
//
// Purpose
//    Retrieve the settings for the font drop downs from the MetaFile.
//
function setDropDownStates()
{
   var path = document.URL;
   var name, font, value, valueStr;
   var metaFile;

   metaFile = MMNotes.open(path, false);

   if(metaFile != 0)
   {
      // We have some stored settings.  Set the checkboxes based on them.
      for(i = 1; i <= 7; i++)
      {
         name = "menuSize" + i;
         font = dwscripts.findDOMObject(name);
         valueStr = MMNotes.get(metaFile, name);

         if (font != null && valueStr) {
           value = parseInt(valueStr);

           if((valueStr == value.toString) && (value >= 0) && (value < fontValues.length))
              font.selectedIndex = value;
         }
      }

      // We are done with the file, close it.
      MMNotes.close(metaFile);
      metaFile = 0;
   }

   // Note that the dropdowns are initialized to some default settings
   // in initDropDowns().  So if there are no keys in the metafile,
   // the dropdowns will be set to something appropriate.
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    initDropDowns
//
// Purpose
//    Initialize the convert font sizes drop downs with the array that
//    is defined in the .htm file.
//
function initDropDowns()
{
   var i, j;
   var font;
   var select;

   for(i = 1; i <= 7; i++)
   {
      font = dwscripts.findDOMObject("menuSize"+i);

      // Select something appropriate as a default.
      switch(i)
      {
         case 1:
            select = 6;
            break;

         case 2:
            select = 7;
            break;

         case 3:
            select = 3;
            break;

         case 4:
				    select = 2;
            break;

         case 5:
				    select = 1;
            break;

         case 6:
         case 7:
            select = 0;
            break;
      }

      if(font != null)
        loadSelectList(font, fontValues, true, select);
   }
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    getCheckboxes
//
// Purpose
//    Search through our dialog and find all of our "checkboxes"
//    This way, if we add or delete them, we don't have to
//    do as much maintainence.
//
function getCheckboxes()
{
   CBTags = new Array();
   var tag;
   var inputArr = document.getElementsByTagName("INPUT");

   while (inputArr.length > 0) {
      tag = inputArr.pop();
      click = tag.getAttribute("onClick");

      if(click != null && click.match(/CB\.clicked/))
      {
         // This is a checkbox add its name to the list.
         CBTags.push(tag);
      }
   }
   return true;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    initCheckboxes
//
// Purpose
//    Create and hookup the hierarchical checkboxes.  Note, the hierarchy is
//    stored in the "parents" attribute on the checkbox in the HTML.  This
//    way, the HTML file sepecifies the relationship and the javascript
//    doesn't care.  Saves us maintainence time when changing the checkboxes.
//
function initCheckboxes()
{
   CB = new CheckboxSet();

   // Add the "parent" checkboxes first because they need to exist
   // for the child checkboxes to name them as a parent.  Parent
   // checkboxes have no "parents" attribute.
   for(i = 0; i < CBTags.length; i++)
   {
      parents = CBTags[i].getAttribute("parents");

      if(parents == null || parents == "")
         CB.addCheckbox(CBTags[i].getAttribute("name"));
   }

   // Now that all of the parent checkboxes have been registered,
   // we can now register the child checkboxes.
   for(i = 0; i < CBTags.length; i++)
   {
      parents = CBTags[i].getAttribute("parents");

      if(parents != null && parents != "")
      {
         CB.addCheckbox(CBTags[i].getAttribute("name"),
            CBTags[i].getAttribute("parents"));
      }
   }

}


/////////////////////////////////////////////////////////////////////////////
// Function
//    setCheckboxStates
//
// Purpose
//    Set the checkbox states based on the saved defaults, or if we don't
//    have any saved defaults, set the checkboxes to our hard coded
//    defaults.
//
function setCheckboxStates()
{
   if(setCheckboxStatesFromSavedDefaults())
      return;

   // Default settings.  Turn all options on by default.
   for(i = 0; i < CBTags.length; i++)
      CB.check(CBTags[i].getAttribute("name"), true);

}


/////////////////////////////////////////////////////////////////////////////
// Function
//    setCheckboxStatesFromSavedDefaults
//
// Purpose
//    Set the checkboxes based on the defaults that we have saved.
//
// Returns
//    true if we were able to read info from the metafile. false if we
//    could not read the metafile (didn't exist, etc) and we should set
//    some defaults ourselves.
//
function setCheckboxStatesFromSavedDefaults()
{
   var path = document.URL;
   var metaFile = MMNotes.open(path, false);

   if(metaFile != 0)
   {
      // We have some stored settings.  Set the checkboxes based on them.
      var keys = MMNotes.getKeys(metaFile);
      
      // If there aren't any keys in the metafile, use the default settings
      if (keys.length == 0)
      {
        MMNotes.close(metaFile);
        return false;
      }
              
      var i, j;

      for(i = 0; i < keys.length; i++)
      {
         for(j = 0; j < CBTags.length; j++)
         {
            if(keys[i] == CBTags[j].getAttribute("name"))
            {
               CB.check(CBTags[j].getAttribute("name"), true);
               break;
            }
         }
      }


      // We are done with the file, close it.
      MMNotes.close(metaFile);
      metaFile = 0;

      return true;
   }
   else
   {
      // No settings to read.
      return false;
   }

   return false;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    finish
//
// Purpose
//    We are done.  Do any last minute stuff and show any log information
//    that user may have requested.
//
function finish()
{
   // Show what we did if show log is enabled
   if ( doShowLog() )
   {
      var bDidSomething = (
                        (gRemoveMetaTags > 0)      ||
                        (gRemoveWordXML > 0)       ||
                        (gRemoveConditionals > 0)  ||
                        (gRemoveEmptyParas > 0)    ||
                        (gRemoveMargins > 0)       ||
                        (gRemoveInlineCSS > 0)     ||
                        (gRemovemsoStyle > 0)      ||
                        (gRemoveNonCSS > 0)        ||
                        (gRemoveTableCSS > 0)      ||
                        (gRemoveUnusedCSS > 0)     ||
                        (gFontsConverted > 0)      ||
                        (gNestingFixed > 0)        ||
                        (gBackgroundSet != "")     ||
                        (gSourceFormatted > 0)       );

      MM_note(MSG_TrcSummaryHeader);

      if(bDidSomething)
      {
         if(gRemoveMetaTags > 0)
            MM_note(MSG_TrcRemoveMetaTags, gRemoveMetaTags);

         if(gRemoveWordXML > 0)
            MM_note(MSG_TrcRemoveWordXML, gRemoveWordXML);

         if(gRemoveConditionals > 0)
            MM_note(MSG_TrcRemoveConditionals, gRemoveConditionals);

         if(gRemoveEmptyParas > 0)
            MM_note(MSG_TrcRemoveEmptyParas, gRemoveEmptyParas);

         if(gRemoveMargins > 0)
            MM_note(MSG_TrcRemoveMargins, gRemoveMargins);

         if(gRemoveInlineCSS > 0)
            MM_note(MSG_TrcRemoveInlineCSS, gRemoveInlineCSS);

         if(gRemovemsoStyle > 0)
            MM_note(MSG_TrcRemovemsoStyle, gRemovemsoStyle);

         if(gRemoveNonCSS > 0)
            MM_note(MSG_TrcRemoveNonCSS, gRemoveNonCSS);

         if(gRemoveTableCSS > 0)
            MM_note(MSG_TrcRemoveTableCSS, gRemoveTableCSS);

         if(gRemoveUnusedCSS > 0)
            MM_note(MSG_TrcRemoveUnusedCSS, gRemoveUnusedCSS);

         if(gFontsConverted > 0)
            MM_note(MSG_TrcFontsConverted, gFontsConverted);

         if(gNestingFixed > 0)
            MM_note(MSG_TrcNestingFixed, gNestingFixed);

         if(gBackgroundSet != "")
            MM_note(MSG_TrcBackgroundSet, gBackgroundSet);

         if(gSourceFormatted > 0)
            MM_note(MSG_TrcSourceFormatted);
      }
      else
      {
         MM_note( MSG_TrcDidNothing );
      }

      MM_showLog();
   }

   window.close();
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    saveSettings
//
// Purpose
//    Save the options that the user has selected so that the next time
//    they use this dialog, it will have their last settings.
//
function saveSettings()
{
   var path = document.URL;
   var metaFile = MMNotes.open(path, true);
   var name;

   if(metaFile == 0)
   {
      alert(wrapTextForAlert(MSG_MetaFileError, 80));
      return;
   }

   // Make sure the meta file does not contain stale information.
   clearMetaFile(metaFile);

   // Now set a key for each option that is on.
   for(i = 0; i < CBTags.length; i++)
   {
      name = CBTags[i].getAttribute("name");
      if(CB.isChecked(name))
         MMNotes.set(metaFile, name, "1");
   }

   // Now, save the state of the "convert fonts" things.
   for(i = 1; i <= 7; i++)
   {
      name = "menuSize" + i;
      font = dwscripts.findDOMObject(name);

      if(font != null)
         MMNotes.set(metaFile, name, font.selectedIndex+"");
   }

   MMNotes.close(metaFile);
   metaFile = 0;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    clearMetaFile
//
// Purpose
//    Clear the metafile so that we don't have stale info in there.
//
function clearMetaFile(metaFile)
{
   if(metaFile == 0)
      return;

   var keys = MMNotes.getKeys(metaFile);

   for(i = 0; i < keys.length; i++)
      MMNotes.remove(metaFile, keys[i]);
}

// ----------- Autodetection routines ----------------

/////////////////////////////////////////////////////////////////////////////
// Function
//    detectWordVersion
//
// Purpose
//    Find out what version of Word the document was generated by.  We do
//    this so that the user doesn't need to worry about it.
//
function detectWordVersion()
{
   // Init gWordVersion
   gWordVersion = -1;

   // This will set 'gWordVersion' if it finds anything
   findVersionInMetaTag();

   if(gWordVersion == -1)
   {
      // We could not determine the version of Word used to generate this
      // document.  Default to Word 2000.
      gWordVersion = 2000;
      gFoundVersion = false;
   }
   else
      gFoundVersion = true;

   // Set the dropdown to have what we have detected.
   dwscripts.findDOMObject("detecting").visibility = 'hidden';
   dwscripts.findDOMObject("wordVersion").visibility = 'visible';
   with(dwscripts.findDOMObject("selectWordVersion"))
   {
      for(i = 0; i < options.length; i++)
      {
         if(options[i].value == gWordVersion)
         {
            selectedIndex = i;
            break;
         }
      }
   }

   return gWordVersion;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    findVersionInMetaTag
//
// Purpose
//    This function is the meat of finding the version of Word used to
//    generate the HTML.  We look at the meta tags and find the one that
//    gives the version of Word.
//
function findVersionInMetaTag()
{

   var metaArr = DOM.getElementsByTagName("META");
   var tag;

   for (var i = 0; i < metaArr.length; i++)
   {
      tag = metaArr[i];

      name = tag.getAttribute("NAME");
      if (name && name != undefined && (name.toUpperCase() == "GENERATOR" || name.toUpperCase() == "ORIGINATOR"))
      {
         content = tag.getAttribute("CONTENT");
         if(content != null && content.search(/word/i) != -1)
         {         
            if(content.search(/word 97/i) >= 0)
            {
               gWordVersion = 97;
               return false;
            }
            else if(content.search(/word 81/i) >= 0)
            {
               gWordVersion = 97;
               return false;
            }
            else if(content.search(/word 98/i) >= 0)
            {
               gWordVersion = 97;
               return false;
            }
            else if(content.search(/word 9/i) >= 0)
            {
               gWordVersion = 2000;
               return false;
            }
            else if(content.search(/word 10/i) >= 0)
            {
               gWordVersion = 2000;
               return false;
            }
         }
      }
   }
   return true;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    setWordVersion
//
// Purpose
//    This gets called when the user selects a different version of Word
//    from the drop down list.  This grabs the selected value from the
//    drop down and shows the appropriate options by hiding or showing
//    the different layers.
//
function setWordVersion()
{
   version = getVersion();

   if(version != gWordVersion && gFoundVersion)
   {
      // The user is trying to select the options for a version of
      // word that does not match what we think it is.  Allow them
      // to to do this, but warn them that in doing so the import
      // may not work since the algorithms for the different versions
      // are different.

      alert(wrapTextForAlert(MSG_DiffWordVersion, 80));
   }

   switch(version)
   {
      case "2000":
         T.showGroup("group2000");
         break;

      case "97":
         T.showGroup("group97");
         break;

      default:
         alert(wrapTextForAlert(MSG_Error, 80));
         break;
   }
   T.refresh();
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    getVersion
//
// Purpose
//    Quickie function to get the version from the selected version
//
function getVersion()
{
  var retVal = DEFAULT_WORD_VERSION;
  var menu = dwscripts.findDOMObject("selectWordVersion");

  if (menu)
  {
    retVal = menu.options[menu.selectedIndex].value;
  }

  return retVal;
}



// ----- Check functions ----------------------------------------------------

/////////////////////////////////////////////////////////////////////////////
function doRemoveMetaLink()
{
   switch(getVersion())
   {
      case "2000":
         return CB.isChecked("removeMetaLink2000_detail");
         break;

      case "97":
         return CB.isChecked("removeMetaLink97_detail");
         break;
   }

   return false;
}


/////////////////////////////////////////////////////////////////////////////
function doConvertSize(size)
{
   switch(size)
   {
      case "7":
         return CB.isChecked("convertSize7_detail");
      case "6":
         return CB.isChecked("convertSize6_detail");
      case "5":
         return CB.isChecked("convertSize5_detail");
      case "4":
         return CB.isChecked("convertSize4_detail");
      case "3":
         return CB.isChecked("convertSize3_detail");
      case "2":
         return CB.isChecked("convertSize2_detail");
      case "1":
         return CB.isChecked("convertSize1_detail");
   }

   return false;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    getDesiredFontSize
//
// Purpose
//    Given a size, find out what the user has specified to change that
//    size to.  This queries the dropdown box associated with the given
//    size.
//
function getDesiredFontSize(size)
{
   var option;

   if(size.length != 1)
      return "-1";

   if(size[0] < '1' || size[0] > '7')
      return "-1";

   option = dwscripts.findDOMObject("menuSize"+size);

   if(option != null && option != '')
      return option.options[option.selectedIndex].value;

   return "-1";
}


/////////////////////////////////////////////////////////////////////////////
function doRemoveXMLFromHTML()
{
   return CB.isChecked("removeXMLHTML2000_detail");
}

/////////////////////////////////////////////////////////////////////////////
function doRemoveXMLMarkup()
{
   return CB.isChecked("removeXMLmarkup2000_detail");
}

/////////////////////////////////////////////////////////////////////////////
function doRemoveIfs()
{
   return CB.isChecked("removeIf2000_detail");
}

/////////////////////////////////////////////////////////////////////////////
function doRemoveEmptyParas()
{
   return CB.isChecked("removeEmptyPara2000_detail");
}

/////////////////////////////////////////////////////////////////////////////
function doRemoveInlineCSS()
{
   return CB.isChecked("removeInlineCSS2000_detail");
}

/////////////////////////////////////////////////////////////////////////////
function doRemoveMSOStyleAttr()
{
   return CB.isChecked("removemsoStyle2000_detail");
}

/////////////////////////////////////////////////////////////////////////////
function doRemoveNonCSSDeclaration()
{
   return CB.isChecked("removeNonCSS2000_detail");
}

/////////////////////////////////////////////////////////////////////////////
function doRemoveCSSFromTables()
{
   return CB.isChecked("removeCSSTable2000_detail");
}

/////////////////////////////////////////////////////////////////////////////
function doRemoveUnusedStyles()
{
   return false;
//   return CB.isChecked("removeUnusedCSS2000_detail");
}

/////////////////////////////////////////////////////////////////////////////
function doFixInvalidNesting()
{
   return CB.isChecked("fixInvalidNest_basic");
}

/////////////////////////////////////////////////////////////////////////////
function doApplySourceFormatting()
{
   return CB.isChecked("applyFormatting_basic");
}

/////////////////////////////////////////////////////////////////////////////
function doShowLog()
{
   return CB.isChecked("showLog_basic");
}

/////////////////////////////////////////////////////////////////////////////
function doSaveSettings()
{
   return true;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    ProcessWord2000
//
// Purpose
//    This is the main function for doing Word 2000 processing on the
//    document.
//
function ProcessWord2000()
{
   if(doRemoveXMLFromHTML())
      removeXMLFromHTML();

   if(doRemoveXMLMarkup())
      removeXMLMarkup();

   if(doRemoveIfs())
     removeIfs();

   if(doRemoveMSOStyleAttr())
      removeMSOStyleAttr();

   if(doRemoveEmptyParas())
      removeEmptyParas();

   if(doRemoveCSSFromTables())
      removeCSSFromTables();

   if(doRemoveNonCSSDeclaration())
      removeNonCSSDeclaration();

   if(doRemoveInlineCSS())
      removeInlineCSS();

   if(doRemoveUnusedStyles())
      removeUnusedStyles();

   // We are done.  Do some general cleanup
   formatCSS();
   
}


/////////////////////////////////////////////////////////////////////////////
function RemoveEmptyTags()
{
   var body = DOM.body;
   var emptyTags = new Array();
   var tag;

   // First find all of the tags that are empty inside the body.
   traverse(body, findEmptyTags, null, null, emptyTags);
  
   // Now remove them.
   while((tag = emptyTags.pop()) != null)
   {
      if (dw.nodeExists(tag)){
	     tag.outerHTML = tag.innerHTML;
      }
   }
   
   // Now deal with tags that might have attributes but no
   // contents, or that are wrapped around nothing something unexpected, 
   // like a <br>.
   
   // Find all <br> tags, and check if each one's parent is a SPAN.
   // If that SPAN has no other children, it's safe to remove it.
   var brTags = DOM.getElementsByTagName('br');
   for (var i=brTags.length-1; i >=0; i--){
     if (brTags[i].parentNode.tagName == 'SPAN' && brTags[i].parentNode.childNodes.length == 1){
       brTags[i].parentNode.outerHTML = brTags[i].parentNode.innerHTML;
     }
   }
   
   // Find all <br> tags, and check if each one's parent is a B.
   // If that B has no other children, it's safe to remove it.
   var brTags = DOM.getElementsByTagName('br');
   for (var i=brTags.length-1; i >=0; i--){
     if (brTags[i].parentNode.tagName == 'B' && brTags[i].parentNode.childNodes.length == 1){
       brTags[i].parentNode.outerHTML = brTags[i].parentNode.innerHTML;
     }
   }
   
   // Find all the SPAN tags that may have style attributes but 
   // no content, and remove them.
   var spans = DOM.getElementsByTagName('span');
   for (var i=spans.length-1; i >= 0; i--){
     if (spans[i].innerHTML == ""){
       spans[i].outerHTML = spans[i].innerHTML;
     }else{
       var match = spans[i].innerHTML.match(/[^<][\w]*/);
       if (!match){
         spans[i].outerHTML == "";
       }
     }
   }

   // Word usually nests SPAN tags inside B tags, so once
   // all the empty SPAN tags are removed, the B tags may
   // also have no content. Remove those.
   var btags = DOM.getElementsByTagName('b');
   for (var i=btags.length-1; i >= 0; i--){
     if (btags[i].innerHTML == ""){
       btags[i].outerHTML = btags[i].innerHTML;
     }
   }
   
}

/////////////////////////////////////////////////////////////////////////////
function findEmptyTags(tag, emptyTags)
{
   var tagName = tag.tagName.toUpperCase();
   var html;
   var regx;
   var result;

   switch (tagName)
   {
      // Add new empty tags to be removed here.
      case "DIV":
      case "SPAN":
      case "FONT":

         // Do a match to see if the tag is empty (no attributes)
         // If it is add it to the list of empty tags that we
         // will remove from the doc.
         html = tag.outerHTML;
         regx = new RegExp("<"+tagName+">", "i");
         result = regx.exec(html);

         // LMH: Fixed logic error where empty tags within a
         // non-empty tag were being matched by accident.
         // if(result != null && result.index != -1)
         if(result != null && result.index == 0)
         {
            emptyTags.push(tag);
            
            // Return immediately so tag does not get added to list
            // twice, then deleted twice, which is an internal error.
            return true;
         }
         break;
   }
   
   switch (tagName)
   {
     case "FONT":
       if (!tag.innerHTML || tag.innerHTML==" ")
         emptyTags.push(tag);
       break;
   }

   return true;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    ProcessWord97
//
// Purpose
//    This is the main function for doing Word 97 processing on the
//    document.
//
function ProcessWord97()
{
   if(doRemoveMetaLink())
      removeMetaLink();

   convertFontSizes();

   if(doFixInvalidNesting())
      fixInvalidNesting();
}


function convertFontSizes()
{
   traverse(DOM.documentElement, convertFontSizeHandler);

   // post processing will strip the empty <font> tags.
}


//////////////////////////////////////////////////////////////////////////////
// Function
//    convertFontSizeHandler
//
// Purpose
//    Callback that searches for font tags to convert.
//
function convertFontSizeHandler(tag)
{
   if(tag.tagName.toUpperCase() == "FONT")
   {
      var size = tag.getAttribute("size");
      var desiredSize;

      if(size != null && doConvertSize(size))
      {
         desiredSize = getDesiredFontSize(size);
         switch(desiredSize)
         {
            case "-1":  // don't change anything
               break;

            case "0":  // use default size
               tag.removeAttribute("size");
               gFontsConverted++;
               break;

            case "1":
            case "2":
            case "3":
            case "4":
            case "5":
            case "6":
            case "7":
               tag.setAttribute("size", desiredSize);
               gFontsConverted++;
               break;

            case "h1":
            case "h2":
            case "h3":
            case "h4":
            case "h5":
            case "h6":
               // If this font tag is not contained within another block tag,
               // we can convert it to a header.  If the font is contained
               // within a block tag, it is an inline font size change.  We
               // don't want to convert those to headers since headers create
               // vertical white space.
               if(!isInsideTag(tag, "p,h1,h2,h3,h4,h5,h6"))
               {
                  // We remove the size attribute from the <font> tag
                  // and wrap the font tag and all its content with
                  // the appropriate heading.
                  tag.removeAttribute("size");

                  html = tag.outerHTML;

                  // Strip any internal <p>'s that we might have.  We don't
                  // need them since we are converting this to a header.
                  html = html.replace(/<\/?P[^>]*>/ig, "");

                  html = "<"+desiredSize+">" + html + "</"+desiredSize+">";
                  tag.outerHTML = html;

                  gFontsConverted++;

                  // Note, we could be leaving behind an empty <font> tag.
                  // But, this is OK.  The general post processing will
                  // clean these up.
               }
               break;
         }
      }
   }

   // Keep traversing...
   return true;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    fixInvalidNesting
//
// Purpose
//    Word 97 has no clue about HTML structure.  Most HTML documents that
//    it generates have overlapped tags, and invalid nesting structures.
//    This function aims to clean up that mess.
//
//    Note!  This is a very specialized case for Word 97.  This will
//    not fix all general cases of invalid nesting.
//
function fixInvalidNesting()
{
   traverse(DOM.documentElement, fixHandler);
   traverse(DOM.documentElement, removeMarkedTags);
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    fixHandler
//
// Purpose
//    Callback for fixing up invalidly nested tags.  This is very specific
//    to how Word 97 generates its HTML.  This will NOT fix any general
//    case of invalid HTML (that problem is actually quite difficult).
//
function fixHandler(tag, nestingFixes)
{
   var html;
   var tagName = tag.tagName.toUpperCase();

   // If this is a <p> or a header, we need to do some work.
   if(tagName == "P" || tagName == "LI" || (tagName.match(/h[1-6]/i) != null))
   {
      // Fix up them tags

      var pCase = tag.tagName;  // maintain upper/lower case
      var parent = tag.parentNode;
      var innerMostHTML = tag.innerHTML;

      while(parent != null)
      {
         if(parent.tagName)
         {
            switch(parent.tagName.toUpperCase())
            {
               case "FONT":
               case "B":
               case "I":
                  parent.removeAttribute("TO_BE_DELETED");
                  html = parent.outerHTML;
                  parent.setAttribute("TO_BE_DELETED",true);

                  // We use match here to make sure we maintain any tag attributes.
                  startTag = html.match(/<[^>]*>/);

                  if(startTag != null)
                  {
                     innerMostHTML = startTag[0] + innerMostHTML +
                        "</" + parent.tagName + ">";
                  }
                  break;
            }

         }

         parent = parent.parentNode;
      }
      tag.innerHTML = innerMostHTML;  //actually change the internal tag
   }

   return true;
}


function removeMarkedTags(tag) {
  if (tag.getAttribute("TO_BE_DELETED")) {
    tag.outerHTML = tag.innerHTML;         //blow away outer tag
  }
  return true;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    removeXMLFromHTML
//
// Purpose
//    Word puts some useless XML markup in the start <html> tag, nuke it.
//
function removeXMLFromHTML()
{
   var root = DOM.documentElement;
   var xmlnsVal = root.getAttribute("xmlns");
   var html = root.outerHTML;

   // We have 2 submatches, "<html", everything after "<html" to the ending
   // ">".  We want to throw out everything between "<html" and the end.
   // So, we will just keep $1. Search is case-insensitive to match both
   // <HTML and <html.
   html = html.replace(/(<html)([^>]*)/i, "$1");

   root.outerHTML = html;
   // we don't want to lose the xmlns attribute
   root.setAttribute("xmlns",xmlnsVal);
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    removeXMLMarkup
//
// Purpose
//    Word puts some random, useless XML markup in the body.  Strip it.
//
function removeXMLMarkup()
{
   var root = DOM.documentElement;
   var html = root.outerHTML;

   if(doShowLog())
   {
      var match;

      match = html.match(/<o:p>/g);
      gRemoveWordXML += (match != null ? match.length : 0);

/* LMH: I don't think we should be including end tags in the count.
      match = html.match(/<\/o:p>/g);
      gRemoveWordXML += (match != null ? match.length : 0);
*/
      match = html.match(/<o:SmartTagType[^>]*>/g);
      gRemoveWordXML += (match != null ? match.length : 0);
      
      match = html.match(/<st1:[\w\s"=]*>/gi);
      gRemoveWordXML += (match != null ? match.length : 0);
   }

   // Paragraphs that have <st1:address> in them are addresses, so
   // the line after the address shouldn't be in a new paragraph. Attempt
   // to substitute a <br> for the paragraph break before stripping
   // out all remaining <st1:> tags.
   html = html.replace(/<\/st1:address>(<\/st1:\w*>)?<\/p>[\n\r\s]*<p[\s\w="']*>/gi, "<br>");
   
   // Remove all instances of <o:p></o:p>
   html = html.replace(/<o:p>/g, "");
   html = html.replace(/<\/o:p>/g, "");
   html = html.replace(/<o:SmartTagType[^>]*>/g, "");

   // Remove all instances of <st1:whatever></st1:whatever>
   html = html.replace(/<st1:[\w\s"=]*>/gi, "");
   html = html.replace(/<\/st1:\w*>/gi, "");

   root.outerHTML = html;

   // Remove excel attribs from table tags
   // Find each table tag and do some processing on it.
   var tables = DOM.getElementsByTagName('table');
   for (var i=0; i < tables.length; i++){
      traverse(tables[i], removeXattrsInTables);
   }
	
   // If we find any other instances of XML markup, we can add it here.

}


/////////////////////////////////////////////////////////////////////////////
// Function
//    removeIfs
//
// Purpose
//    Word uses many <![if...]> style comments for its own internal
//    purposes, which are useless in HTML.  This function strips those.
//
function removeIfs()
{
   traverse(DOM.documentElement, null, null, ifHandler);

   var root = DOM.documentElement;
   var html = root.outerHTML;

   // clean up those empty comments!
   html = html.replace(/<!-*>/g, "");
   root.outerHTML = html;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    ifHandler
//
// Purpose
//    Find those pesky "if" conditionals that Word 2000 puts in the HTML
//    and nuke'em.
//
function ifHandler(comment)
{
   var html = comment.data;
   var matchif = html.match(/\[if /);
   var matchendif = html.match(/\[endif/);

   if(matchif != null || matchendif != null)
   {
      gRemoveConditionals++;
      comment.data = "";
   }

   return true;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    removeMSOStyleAttr
//
// Purpose
//    Microsoft Word uses many custom CSS attributes.  This function hunts
//    them down and removes them.
//
function removeMSOStyleAttr()
{
   var root = DOM.documentElement;
   var html = root.outerHTML;
   var oldMulti = RegExp.multiline;
   RegExp.multiline = true;

   if(doShowLog())
   {
      // NOTE!  This is highly ineffiecient since we are doing the regexp
      // searches twice (once to count, once to do the actual replaces).
      // If there is a better way to know how many times a replace()
      // does its thing, we should do that.     
      var match;

      match = html.match(/mso-[^:]*:"[^"]*";/g, "");
      if (match) gRemovemsoStyle += match.length;
      match = html.match(/mso-[^;'"]*;*(\n|\r)*/g, "");
      if (match) gRemovemsoStyle += match.length;
      match = html.match(/ style=['"]tab-interval:[^'"]*['"]/g, "");
      if (match) gRemovemsoStyle += match.length;
      match = html.match(/behavior:"[^"]*";/g, "");
      if (match) gRemovemsoStyle += match.length;
   }

   // This finds the mso-*:"SomeStuff"; style attributes and sets them to be nothing.
   html = html.replace(/mso-[^:]*:"[^"]*";/g, "");

   // This finds the other mso-* style attibutes.
   html = html.replace(/mso-[^;'"]*;*(\n|\r)*/g, "");

   // Remove some other Word-only css style attributes.
   html = html.replace(/ style=['"]tab-interval:[^'"]*['"]/g, "");
   html = html.replace(/behavior:[^;'"]*;*(\n|\r)*/g, "");

   root.outerHTML = html;

   RegExp.multiline = oldMulti;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    removeEmptyParas
//
// Purpose
//    Word sets paragraph bottom margins to zero, then inserts empty
//    (containing &nbsp;) paragraphs to maintain the vertical spacing
//    expected.  This is soley for its own purpose and is redundant
//    for HTML.  This function removes margin definitions and removes
//    those pesky empty paragraphs.
//
function removeEmptyParas()
{
   var root = DOM.documentElement;
   var styles = findStyles();
   var style;
   var html;

   while (styles != null && styles.length > 0)
   {
      style = styles.shift();
      // Clean out the nonsense zero margin definitions from the
      // style block.
      html = style.innerHTML;

      // Just strip all of those wacky margins that Word puts in there.
      html = html.replace(/margin[^:]*:[^};\n\r]*;/g, "");

      style.innerHTML = html;
   }

   // Next, go through the document and strip out those inline margins too.
   traverse(root, stripMargins);

   // Now go find those empty paragraphs and remove them.
   traverse(root, paraHandler);
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    stripMargins
//
// Purpose
//    Word uses a lot of CSS margin settings in attempt to make the HTML
//    version look exactly like the native Word version.  In general, this
//    is unwanted, so lets remove all this stuff.
//
function stripMargins(tag)
{
   var style = tag.getAttribute("style");

   if(style != null)
   {
      if(doShowLog())
      {
         // Note, if there is a better way to count (if replace can be forced
         // to report how many replaces it did), we should do that.  Because
         // this takes extra processing effort to count this stuff using
         // "match".

         var match;

         match = style.match(/margin[^"';]*;?/g);
         gRemoveMargins += (match != null ? match.length : 0);
         match = style.match(/text-indent[^"';]*;?/g);
         gRemoveMargins += (match != null ? match.length : 0);
         match = style.match(/tab-stops:[^'";]*;?/g);
         gRemoveMargins += (match != null ? match.length : 0);
      }

      style = style.replace(/margin[^"';]*;?/g, "");
      style = style.replace(/text-indent[^"';]*;?/g, "");
      style = style.replace(/tab-stops:[^'";]*;?/g, "");

      if(style == null || style == "" || style.search (/[^\s]/) == -1)
         tag.removeAttribute("style");
      else
         tag.setAttribute("style", style);
   }

   return true;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    paraHandler
//
// Purpose
//    Callback that looks for empty <p>'s and deletes them.  After
//    doing some processing removing stuff, we can easily end up
//    with empty paragraphs.  This just cleans up after ourselves.
//
function paraHandler(tag)
{
   tagName = tag.tagName;
   if(tagName.toUpperCase() == "P")
   {
      text = tag.innerHTML;

      // Make sure there are not any content generating HTML tags, this
      // prevents us from removing say, <img> in the next step.
      if(containsContentTags(text))
         return true; // Keep searching in the traverse

      // Ok, we don't have any content tags.  We are save to strip any
      // other tags (font, b, etc).
      text = text.replace(/<[^>]*>/g, "");

      // Strip whitespace
      text = text.replace(/\s/g, "");

      // Strip &nbsp;s
      text = text.replace(/&nbsp;/g, "");

      // After doing all that, if there is nothing left, this paragraph is empty.
      if(text == "" || text == null)
      {
         gRemoveEmptyParas++;
         tag.outerHTML = "";
      }
   }

   return true;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    containsContentTags
//
// Purpose
//    Given a string that contains some HTML, check to see if we have
//    any tags that generate visible content.
//
// Returns
//    zero (false) if no content generating tags are found.  Non-zero
//    value if anything is found.
//
function containsContentTags(text)
{
   var index = 0;

   // text.search returns -1 if it does not find anything.  So by adding
   // 1 and bitwise or-ing the result, we maintain zero if no match.
   index |= text.search(/<hr/i) + 1;
   index |= text.search(/<img/i) + 1;
   index |= text.search(/<input/i) + 1;
   index |= text.search(/<object/i) + 1;
   index |= text.search(/<table/i) + 1;
   index |= text.search(/<textarea/i) + 1;
   index |= text.search(/<embed/i) + 1;

   // if index is still zero after all that, we don't have any content tags.

   return index;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    formatCSS
//
// Purpose
//    When we modify or remove stuff from the <style> block we leave it in
//    a not so pretty state.  This will clean it up so it looks nice.
//
function formatCSS()
{
   var styles = findStyles();
   var style;
   var html;

   var oldMulti = RegExp.multiline;
   while (styles != null && styles.length > 0)
   {
      style = styles.shift();
      html = style.innerHTML;
      // We need multiline turned on for this.
      RegExp.multiline = true;

      // Lets just get rid of those comments that Word puts in there
      html = html.replace(/\/\*.*\*\//g, "");

      // Clean up the whitespace between the start and end brackets.
      html = html.replace(/\s*\}/g, "}");
      html = html.replace(/\{\s*/g, "{");

      // Make sure anything that is indented is indented only one tab.
      html = html.replace(/^\t+/g, "\t");

      // Make sure the style names are on their own line.
      html = html.replace(/\}/g, "}\n");

      // This will delete blank lines in the style declaration
      html = html.replace(/^[ \t]*(\r|\n)+/g, "");

      style.innerHTML = html;
   }
   // Set it back
   RegExp.multiline = oldMulti;
}



/////////////////////////////////////////////////////////////////////////////
// Function
//    removeCSSFromTables
//
// Purpose
//    Word tends to go overboard with CSS with tables.  Almost all of it
//    is used to maintain the "Word appearance" and is generally undesirable
//    for use with HTML.  So, this function just strips it all.
//
function removeCSSFromTables()
{
  // Find each table tag and do some processing on it.
  var tables = DOM.getElementsByTagName('table');
  for (var i=0; i < tables.length; i++){
    traverse(tables[i], convertCSSInTables);
  }
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    convertCSSInTables
//
// Purpose
//    We want to strip the CSS applied to tables and their cells.
//    However, some of these styles can be converted into HTML
//    attributes.  This function converts any styles we can to
//    HTML attributes and then removes the style attribute.
//    Also, remove class attributes from tables, rows & cells.
//
function convertCSSInTables(tag)
{
   var tagName = tag.tagName.toUpperCase();
   var style;
   var match;

   if(tagName == "TABLE")
   {
      style = tag.getAttribute("style");

      if(style != null && style != "")
      {
         match = style.match(/border-color: *([^;'"]*)/);

         if(match != null)
            tag.setAttribute("bordercolor", match[1]);

         tag.removeAttribute("style");

         gRemoveTableCSS++;
      }
   }
   else if(tagName == "TR")
   {
      // TRs do not have any styles that we want to keep.
      if (tag.getAttribute("style")){
        tag.removeAttribute("style");
        gRemoveTableCSS++;
      }
   }
   else if(tagName == "TD")
   {
      style = tag.getAttribute("style");

      if(style != null && style != "")
      {
         match = style.match(/background: *([^;'"]*)/);

         if(match != null)
            tag.setAttribute("bgcolor", match[1]);

         // Remove the style attribute from the TD.
         tag.removeAttribute("style");

         gRemoveTableCSS++;
      }
   }
   
   if (tagName == "TABLE" || tagName == "TR" || tagName == "TD" || tagName == "TH")
   {
      var classAttr = tag.getAttribute("class");
      if (classAttr != null && classAttr != "")
      {
         tag.removeAttribute("class");
         gRemoveTableCSS++;
      }
   }
   
   return true;
}



/////////////////////////////////////////////////////////////////////////////
// Function
//    removeXattrsInTables
//
// Purpose
//    Remove excel attribs from table, rows & cells.
//
function removeXattrsInTables(tag)
{
   var tagName = tag.tagName.toUpperCase();

   if (tagName == "TABLE" || tagName == "TR" || tagName == "TD" || tagName == "TH")
   {
      var xAttr = tag.getAttribute("x:num");
      if (xAttr != null && xAttr != "")
      {
         tag.removeAttribute("x:num");
         gRemoveWordXML++;
      }
      
      xAttr = tag.getAttribute("x:str");
      if (xAttr != null && xAttr != "")
      {
         tag.removeAttribute("x:str");
         gRemoveWordXML++;
      }
   }
   
   return true;
}

function combineLists(list1, list2)
{
   if (list1 == null)
      return list2;
   if (list2 = null)
      return list1;
   return list1.concat(list2);
}

/////////////////////////////////////////////////////////////////////////////
// Function
//    removeNonCSSDeclaration
//
// Purpose
//    Word puts a number of non-standard CSS style declarations in the
//    style block.  This will strip them, and remove any references to
//    them.
//    
//    While we're here, we're also going to remove any empty style 
//    declarations, and the references to them.
//
function removeNonCSSDeclaration()
{
   var invalidList = new Array();
   var fullInvalidList = new Array();
   var styles = findStyles();
   var style;
   var html;
   var htmlLeft, htmlRight;
   var result;

   var oldMulti = RegExp.multiline;
   while (styles != null && styles.length > 0)
   {
      RegExp.multiline = true;
      style = styles.shift();
      html = style.innerHTML;
      // First, we need to get a list of all the invalid style names.  This
      // way, we can go through the file and remove the references to them.

      // The first type of invalid style selector looks like this: @foo bar
      var partialInvalidList = html.match(/^[\s]*@[^\s]*\s\w*/g);
      var atMatches = partialInvalidList != null;
      invalidList = combineLists(invalidList, partialInvalidList);

      // now look for selectors like "v\:*"
      partialInvalidList = html.match(/^.\\:[^\s]*\s\w*/g);
      var charSlashMatches = partialInvalidList != null;
      invalidList = combineLists(invalidList, partialInvalidList);
      
      // now "MsoNormal" selectors
      partialInvalidList = html.match(/\s*\w*\.MsoNormal([A-Za-z]+)?/g);
      var msoNormMatches = false;
      if (partialInvalidList != null)
      {
         msoNormMatches = true;
         invalidList.push("MsoNormal");
      }

      // now empty rules
      partialInvalidList =  html.match(/.*\{[\s]*}/g);
      var emptyMatches = partialInvalidList != null;

      if(invalidList != null)
      {
         fullInvalidList = combineLists(fullInvalidList, invalidList);
         // Log the number of invalid CSS styles we find.
         gRemoveNonCSS += invalidList.length;

         for(var i = 0; i < invalidList.length; i++)
         {
            invalidList[i] = invalidList[i].replace(/@\w* /g, "");
            invalidList[i] = invalidList[i].replace(/(\r|\n)*/g, "");
			invalidList[i] = invalidList[i].replace(/^\./,"");
			invalidList[i] = invalidList[i].replace(/\{/g, "");
			invalidList[i] = invalidList[i].replace(/}/g, "");
         }
      }

      // This removes the invalid "@" CSS declarations, if any
      // [vm 9/06] note: we remove informative at-rules, with style-declaration blocks,
      // such as @page and @list, but leave declarative at-rules such as @import.
      if (atMatches)
         html = html.replace(/^[\s]*@[^;]*\{[^\}]*\}/g, "");

      // remove the ones like "v\:*", if any
      if (charSlashMatches)
         html = html.replace(/^.\\:[^\}]*\}/g, "");

      // remove the MsoNormal rules, if any
      if (msoNormMatches)
         html = html.replace(/\w*\.MsoNormal[^\}]*\}/g, "");

      // remove empty rules, if any, including their selectors
      if (emptyMatches)
         html = html.replace(/(\}|\;)*\s*.*\s*\{[\s]*}/g, "$1");

      // Now we need to go and clean out everything that referenced the
      // invalid styles.  First lets finish cleaning the style block.
      if(invalidList != null)
      {
         var regx = new RegExp();
         for(var j = 0; j < invalidList.length; j++)
         {
            // Find stuff of the form "div.Section1 ... { ... }"
            regx.compile("^.*\\."+invalidList[j]+"[^\\}]*\\}", "g");
            while((result = regx.exec(html)) != null)
            {
               htmlLeft = html.substring(0, result.index);
               htmlRight = html.substring(result.index + result[0].length);
               // Remove the match
               html = htmlLeft + htmlRight;
            }
         }

         style.innerHTML = html;
         // Now remove any empty style blocks
         RegExp.multiline = true; // have to set it yet again
         regx.compile("\\.?\\w*\\.([^\\{]*)\\{([^\\}]*)\\}", "g");

         while((result = regx.exec(html)) != null){
           htmlLeft = "";
           htmlRight = "";
           result[1] = result[1].replace(/[\r\n\s]*/g,"");
           result[2] = result[2].replace(/[\r\n\s]*/g,"");
           if (result[2] == ""){
             htmlLeft = html.substring(0, result.index);
             htmlRight = html.substring(result.index + result[0].length);
             invalidList.push(result[1]);           

             html = htmlLeft + htmlRight;
           }
         }
         style.innerHTML = html;
      }
   }

   // restore it
   RegExp.multiline = oldMulti;
  
   // OK, we cleaned up the style block, now we just need to go
   // through the rest of the document and remove any references
   // to the invalid CSS classes.
   if(fullInvalidList != null)
   {
      var root = DOM.documentElement;
      var html = root.outerHTML;
      var regx = new RegExp();

      for(var k = 0; k < fullInvalidList.length; k++)
      {
         regx.compile(' ?class=((")?)'+fullInvalidList[k]+'\\1', 'g');
         while((result = regx.exec(html)) != null)
         {
            htmlLeft = html.substring(0, result.index);
            htmlRight = html.substring(result.index + result[0].length);

            html = htmlLeft + htmlRight;
         }
      }

      root.outerHTML = html;
   }
}



/////////////////////////////////////////////////////////////////////////////
// Function
//    removeMetaLink
//
// Purpose
//    Removes those nasty Microsoft-only Meta tags.
//
function removeMetaLink()
{
   var tag, html, rel, tagArr;

   // Clean up the META tags.
   tagArr = DOM.getElementsByTagName("META");
   while (tagArr.length > 0)
   {
      tag = tagArr.pop(); // Order is important here. Removing last first.

      if(tag.outerHTML.match(/(word|microsoft|mshtml)/i))
      {
         // This tag contains some Word junk, nuke it.
         tag.outerHTML="";
         gRemoveMetaTags++;
      }
   }

   // Clean up the LINK tags.
   tagArr = DOM.getElementsByTagName("LINK");
   while (tagArr.length > 0)
   {
      tag = tagArr.pop(); // Order is important here. Removing last first.
      rel = tag.getAttribute("rel");

      if(rel == "File-List" || rel == "Edit-Time-Data" || rel == "themeData"  || rel == "colorSchemeMapping")
      {
         tag.outerHTML = "";
         gRemoveMetaTags++;
      }
   }

   return true;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    removeInlineCSS
//
// Purpose
//    Word 2000 loves to declare "normal" styles and then apply them to
//    every block in the document.  Let's just set it on the body and
//    remove it from everything else.
//    vm - we now just remove MsoNormal styles in removeNonCSSDeclaration(),
//    and don't set Normal on the body.
//    This doesn't really remove "inline css" after all.
//
function removeInlineCSS()
{
   return;
   // someday should fix the rest of this to really remove inline styles, but it never really did that
   var styles = findStyles();
   var style;
   var html;
   var index;
   var oldMulti = RegExp.multiline;
   RegExp.multiline = true;

   while (styles != null && styles.length > 0)
   {
      style = styles.shift();
      html = style.innerHTML;

      if (html.length == 0)
         continue;
         
      index = html.search(/\.(MsoNormal)/i);
      if(index != -1)
      {
         // Lets strip out the "normal" styles and make only one.  Word
         // tends to have stuff like p.MsoNormal, li.MsoNormal, etc.
         html = html.replace(/\s*\w*\.MsoNormal([A-Za-z]+)?/ig, ".TempNormal");

         // Change the first one we find to "FirstNormal"
         html = html.replace(/\.TempNormal/, ".FirstNormal");

         // Remove the rest.
         html = html.replace(/\.TempNormal/g, "");

         // Change the first normal to just "Normal". 
         // LMH: Make sure *not* to replace the opening curly brace if it
         // happens to be on the name line as the class name, or we'll end
         // up with an invalid style declaration.
         html = html.replace(/^.*\.FirstNormal[^\r\n\s\{]*/, ".Normal");

         style.innerHTML = html;

         // Now we need to go and remove all references to the old class.
         html = DOM.body.innerHTML;

         if(doShowLog())
         {
            var match = html.match(/ class=MsoNormal/g);
            gRemoveInlineCSS += (match != null ? match.length : 0);
         }

         html = html.replace(/ class=MsoNormal([A-Za-z]+)?/g, "");
         DOM.body.innerHTML = html;

         DOM.body.setAttribute("class", "Normal");

         // Since body styles do not filter down into table cells,
         // we need to set the styles on the table cells too.
         traverse(DOM.documentElement, setTDStyles);
      }
   }
   RegExp.multiline = oldMulti;

}


/////////////////////////////////////////////////////////////////////////////
// Function
//    setTDStyles
//
// Purpose
//    We have removed the styles from individual paragraphs and the body
//    style does not filter down into the table cells, so we need to set
//    the style on the table cells too.
//
function setTDStyles(tag)
{
   if(tag.tagName.toUpperCase() == "TD")
   {
      tag.setAttribute("class", "Normal");
   }

   return true;
}


/////////////////////////////////////////////////////////////////////////////
// Function
//    removeUnusedStyles
//
// Purpose
//    After we have done all of our house cleaning, some styles defined
//    in the head may no longer be used anywhere.  If they are no longer
//    used, we will blow them away.
//    vm - this has many bugs, so is now never called. We're removing
//    too much.
function removeUnusedStyles()
{
   var styles = findStyles();
   var style;
   var html;
   var classes;

   while (styles != null && styles.length > 0)
   {
      style = styles.shift();
      html = style.innerHTML;

      // Put each style class in an array.
      // LMH: Removed ? after \. to prevent us from considering
      // redefined HTML tags as classes (redefined HTML tags
      // aren't called from anywhere, but they're still used).
      classes = html.match(/\.\w*\s*\{[^\}]*\}/g);

      if(classes != null)
      {
         var classNames = new Array(classes.length);
         var regx = new RegExp();

         // Clean up the matches so we only have the class name.
         for(i = 0; i < classes.length; i++)
            classNames[i] = classes[i].replace(/^\s*\.?(\w*)\s*\{[^\}]*\}/g, "$1");

         body = findTag("body",DOM.documentElement);
         bodyhtml = body.outerHTML;

         // Now search in the body to see if we use them anywhere.
         for(i = 0; i < classes.length; i++)
         {
            regx.compile("class=['\"]?" + classNames[i], "g");

            result = regx.exec(bodyhtml);

            if(result == null)
            {
               // this style is not used.  Nuke it.
               classes[i] = "";

               gRemoveUnusedCSS++;
            }
         }

         // Now reconstruct the style block
         html = "\n<!--\n";

         for(i = 0; i < classes.length; i++)
            html += classes[i];

         html += "\n-->\n";

         style.innerHTML = html;
         
      }
   }
}

// look in the doc head for any number of style tags
function findStyles() {
   var head = findTag("head",DOM.documentElement);
   var styles = null;
   if (head != null)
      styles = head.getElementsByTagName("style");

   return styles; 
}


//*************** Pg1 Class *****************

//This is an example of a page class to be used with the TabControl.
//Uncomment the alert() calls to display the various events as they occur.

function Pg1(theTabLabel) {
  this.tabLabel = theTabLabel;
}
Pg1.prototype.getTabLabel = Pg1_getTabLabel;


function Pg1_getTabLabel() {
  return this.tabLabel;
}

//***************** End of Pg1 Class ******************
//*************** Pg2 Class *****************

//This is an example of a page class to be used with the TabControl.
//Uncomment the alert() calls to display the various events as they occur.

function Pg2(theTabLabel) {
  this.tabLabel = theTabLabel;
}
Pg2.prototype.getTabLabel = Pg2_getTabLabel;


function Pg2_getTabLabel() {
  return this.tabLabel;
}

//***************** End of Pg2 Class ******************
//*************** Pg3 Class *****************

//This is an example of a page class to be used with the TabControl.
//Uncomment the alert() calls to display the various events as they occur.

function Pg3(theTabLabel) {
  this.tabLabel = theTabLabel;
  this.loaded = false;
}
Pg3.prototype.getTabLabel = Pg3_getTabLabel;
Pg3.prototype.canLoad = Pg3_canLoad;
Pg3.prototype.unload = Pg3_unload;
Pg3.prototype.lastUnload = Pg3_lastUnload;


function Pg3_getTabLabel() {
  return this.tabLabel;
}

function Pg3_canLoad() {
  if (!this.loaded) {
    initDropDowns();
    setDropDownStates();
    this.loaded = true;
  }
  return true;
}

function Pg3_unload() {
  T.obj.visibility = "hidden";
  T.obj.visibility = "visible";
  return true;
}

function Pg3_lastUnload() {
  return this.canLoad();     //ensure dropdowns are initted before we do our work
}
//***************** End of Pg3 Class ******************
