Advertisement
Guest User

patch to make xsl-webhelpindexer.jar standalone

a guest
Feb 11th, 2011
319
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Diff 38.17 KB | None
  1. Index: xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java
  2. ===================================================================
  3. --- xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java  (revision 8967)
  4. +++ xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerTask.java  (working copy)
  5. @@ -1,3 +1,4 @@
  6. +/*
  7.  package com.nexwave.nquindexer;
  8.  
  9.  import java.io.File;
  10. @@ -18,6 +19,7 @@
  11.  import com.nexwave.nsidita.DirList;
  12.  import com.nexwave.nsidita.DocFileInfo;
  13.  
  14. +*/
  15.  /**
  16.   * Indexer ant task.
  17.   *
  18. @@ -25,7 +27,8 @@
  19.   *
  20.   * @author N. Quaine
  21.   * @author Kasun Gajasinghe <http://kasunbg.blogspot.com>
  22. - */
  23. + *//*
  24. +
  25.  public class IndexerTask extends Task {
  26.  
  27.     // messages
  28. @@ -36,7 +39,7 @@
  29.     private String txt_no_relative_files_found= "No relative html files calculated.";
  30.     private String txt_no_words_gathered= "No words have been indexed in";
  31.     private String txt_no_html_files="No HTML Files found in";
  32. -   private String txt_no_args="No argument given: you must provide an htmldir to the IndexerTask";
  33. +   private String txt_no_args="No argument given: you must provide an htmlDir to the IndexerTask";
  34.    
  35.     //working directories
  36.     private String searchdir = "search";
  37. @@ -45,7 +48,7 @@
  38.     private String projectDir = null;
  39.  
  40.     // ANT parameters
  41. -   private String htmldir=null;
  42. +   private String htmlDir=null;
  43.      public static String indexerLanguage="en";
  44.  
  45.      //supported languages: add new additions to this. don't include country codes to the end such as en_US or en_UK,
  46. @@ -64,18 +67,22 @@
  47.     public IndexerTask() {
  48.         super();
  49.     }
  50. -   /** The setter for the "htmldir" attribute (parameter of the task)
  51. +   */
  52. +/** The setter for the "htmlDir" attribute (parameter of the task)
  53.      * @param htmldir
  54.      * @throws InterruptedException
  55. -    */
  56. -    public void setHtmldir(String htmldir) {
  57. -        this.htmldir = htmldir;
  58. +    *//*
  59. +
  60. +    public void setHtmlDir(String htmlDir) {
  61. +        this.htmlDir = htmlDir;
  62.      }
  63.  
  64. -     /**
  65. +     */
  66. +/**
  67.       * Set the extension in which html files are generated
  68.       * @param htmlExtension The extension in wich html files are generated
  69. -     */
  70. +     *//*
  71. +
  72.      public void setHtmlextension(String htmlExtension) {
  73.         this.htmlExtension = htmlExtension;
  74.         //Trim the starting "."
  75. @@ -84,11 +91,13 @@
  76.         }
  77.     }
  78.  
  79. -    /**
  80. +    */
  81. +/**
  82.       * setter for "indexerLanguage" attribute from ANT
  83.       * @param indexerLanguage language for the search indexer. Used to differerentiate which stemmer to be used.
  84.       * @throws InterruptedException for ant
  85. -     */
  86. +     *//*
  87. +
  88.      public void setIndexerLanguage(String indexerLanguage){
  89.          if(indexerLanguage !=null && !"".equals(indexerLanguage)) {
  90.              int temp = indexerLanguage.indexOf('_');
  91. @@ -114,9 +123,11 @@
  92.          }
  93.      }
  94.    
  95. -   /**
  96. +   */
  97. +/**
  98.      * Implementation of the execute function (Task interface)
  99. -    */
  100. +    *//*
  101. +
  102.     public void execute() throws BuildException {
  103.          try{
  104.              //Use Xerces as the parser. Does not support Saxon6.5.5 parser
  105. @@ -146,12 +157,12 @@
  106.         //timing
  107.         Date dateStart = new Date();
  108.        
  109. -       if (htmldir == null) {
  110. +       if (htmlDir == null) {
  111.             System.out.println(txt_no_args + ".");
  112.             return;
  113.         }
  114.         // Init input directory
  115. -       inputDir = new File(htmldir);
  116. +       inputDir = new File(htmlDir);
  117.  
  118.         // Begin of init
  119.         // check if inputdir initialized
  120. @@ -252,7 +263,9 @@
  121.                
  122.                 filesDescription.add(docFileInfoTemp);
  123.             }
  124. -           /*remove empty strings from the map*/
  125. +           */
  126. +/*remove empty strings from the map*//*
  127. +
  128.             if (tempDico.containsKey("")) {
  129.                 tempDico.remove("");
  130.             }
  131. @@ -281,9 +294,11 @@
  132.         }
  133.     }
  134.    
  135. -   /**
  136. +   */
  137. +/**
  138.       * Prints the usage information for this class to <code>System.out</code>.
  139. -     */
  140. +     *//*
  141. +
  142.      private static void DisplayHelp() {
  143.         String lSep = System.getProperty("line.separator");
  144.          StringBuffer msg = new StringBuffer();
  145. @@ -354,3 +369,4 @@
  146.      }
  147.  
  148.  }
  149. +*/
  150. Index: xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java
  151. ===================================================================
  152. --- xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java  (revision 0)
  153. +++ xsl-webhelpindexer/src/com/nexwave/nquindexer/IndexerMain.java  (revision 0)
  154. @@ -0,0 +1,404 @@
  155. +/**
  156. + * Licensed under the Apache License, Version 2.0 (the "License");
  157. + * you may not use this file except in compliance with the License.
  158. + * You may obtain a copy of the License at
  159. + *
  160. + * http://www.apache.org/licenses/LICENSE-2.0
  161. + *
  162. + * Unless required by applicable law or agreed to in writing,
  163. + * software distributed under the License is distributed on an
  164. + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  165. + * KIND, either express or implied. See the License for the
  166. + * specific language governing permissions and limitations
  167. + * under the License.
  168. + */
  169. +
  170. +package com.nexwave.nquindexer;
  171. +
  172. +import com.nexwave.nsidita.DirList;
  173. +import com.nexwave.nsidita.DocFileInfo;
  174. +
  175. +import java.io.File;
  176. +import java.io.FileInputStream;
  177. +import java.io.IOException;
  178. +import java.util.*;
  179. +
  180. +/**
  181. + * User: Kasun Gajasinghe, University of Moratuwa, http://kasunbg.blogspot.com
  182. + * Date: Feb 10, 2011
  183. + */
  184. +
  185. +public class IndexerMain {
  186. +
  187. +    // messages
  188. +    private String txt_no_inputdir = "Input directory not found:";
  189. +    private String txt_cannot_create_outputdir = "Cannot create output search directory.";
  190. +    private String txt_no_files_found = "No html files found.";
  191. +    private String txt_wrong_dita_basedir = "ERROR: Parser initialization failed. Wrong dita base dir";
  192. +    private String txt_no_relative_files_found = "No relative html files calculated.";
  193. +    private String txt_no_words_gathered = "No words have been indexed in";
  194. +    private String txt_no_html_files = "No HTML Files found in";
  195. +    private String txt_no_args = "No argument given: you must provide an htmlDir to the IndexerMain";
  196. +    
  197. +    private static String txt_no_lang_specified ="Language of the content is not specified. Defaults to English.";
  198. +
  199. +    //working directories
  200. +    private String searchdir = "search";
  201. +    private File inputDir = null;
  202. +    private String outputDir = null;
  203. +    private String projectDir = null;
  204. +
  205. +    // ANT parameters
  206. +    public String htmlDir = null;
  207. +    public String indexerLanguage = "en";
  208. +
  209. +    //supported languages: add new additions to this. don't include country codes to the end such as en_US or en_UK,
  210. +    // as stemmers doesn't find a difference between them.
  211. +    private String[] supportedLanguages = {"en", "de", "fr", "zh", "ja", "ko"}; //currently extended support available for
  212. +    // English, German, French and CJK (Chinese [zh], Japanese [ja], Korean [ko]) languages only.
  213. +
  214. +    // Indexing features: words to remove
  215. +    private ArrayList<String> cleanUpStrings = null;
  216. +    private ArrayList<String> cleanUpChars = null;
  217. +
  218. +    //Html extension
  219. +    private String htmlExtension = "html";
  220. +
  221. +    // Constructors
  222. +    public IndexerMain(String htmlDir, String indexerLanguage) {
  223. +        super();
  224. +        setHtmlDir(htmlDir);
  225. +        setIndexerLanguage(indexerLanguage);
  226. +    }
  227. +
  228. +    /**
  229. +     * The content language defaults to English "en"
  230. +     * @param htmlDir The directory where html files resides.
  231. +     */
  232. +    public IndexerMain(String htmlDir) {
  233. +        super();
  234. +        setHtmlDir(htmlDir);
  235. +        setIndexerLanguage("en");
  236. +    }
  237. +
  238. +    /**
  239. +     * The setter for the "htmlDir" attribute (parameter of the task)
  240. +     *
  241. +     * @param htmlDir
  242. +     */
  243. +    public void setHtmlDir(String htmlDir) {
  244. +        this.htmlDir = htmlDir;
  245. +    }
  246. +
  247. +    /**
  248. +     * Set the extension in which html files are generated
  249. +     *
  250. +     * @param htmlExtension The extension in which html files are generated
  251. +     */
  252. +    public void setHtmlextension(String htmlExtension) {
  253. +        this.htmlExtension = htmlExtension;
  254. +        //Trim the starting "."
  255. +        if (this.htmlExtension.startsWith(".")) {
  256. +            this.htmlExtension = this.htmlExtension.substring(1);
  257. +        }
  258. +    }
  259. +
  260. +    /**
  261. +     * setter for "indexerLanguage" attribute from ANT
  262. +     *
  263. +     * @param indexerLanguage language for the search indexer. Used to differentiate which stemmer to be used.
  264. +     */
  265. +    public void setIndexerLanguage(String indexerLanguage) {
  266. +        if (indexerLanguage != null && !"".equals(indexerLanguage)) {
  267. +            int temp = indexerLanguage.indexOf('_');
  268. +            if (temp != -1) {
  269. +                indexerLanguage = indexerLanguage.substring(0, temp);
  270. +            }
  271. +            int i = 0;
  272. +            for (; i < supportedLanguages.length; i++) {
  273. +                if (indexerLanguage.equals(supportedLanguages[i])) {
  274. +                    this.indexerLanguage = supportedLanguages[i];
  275. +                    break;
  276. +                }
  277. +            }
  278. +
  279. +            //if not in supported language list,
  280. +            if (i >= supportedLanguages.length) {
  281. +//                System.out.println("The given language, \""+indexerLanguage+"\", does not have extensive support for " +
  282. +//                        "searching. Check documentation for details. ");
  283. +                this.indexerLanguage = indexerLanguage;
  284. +            }
  285. +        } else {
  286. +            this.indexerLanguage = "@@"; //fail-safe mechanism, This vm should not reach this point.
  287. +        }
  288. +    }
  289. +
  290. +    /**
  291. +     * com.nexwave.nquindexer.IndexerMain
  292. +     * The main class without Ant dependencies.
  293. +     * This can be used as a standalone jar.
  294. +     *
  295. +     * @param args need two parameters for this array. htmlDirectory indexerLanguage
  296. +     *             If only one parameter is there (htmlDir), indexerLanguage defaults to english
  297. +     */
  298. +    public static void main(String[] args) {
  299. +
  300. +        IndexerMain indexer;
  301. +        if (args.length == 1) {
  302. +            System.out.println(txt_no_lang_specified);
  303. +            indexer = new IndexerMain(args[0]);
  304. +        } else if (args.length >= 2) {
  305. +
  306. +            indexer = new IndexerMain(args[0], args[1]);                        
  307. +        } else {
  308. +            throw new ArrayIndexOutOfBoundsException("Please specify the parameters htmlDirectory and (optional) " +
  309. +                    "indexerLanguage");
  310. +        }
  311. +
  312. +        indexer.execute();
  313. +
  314. +    }
  315. +
  316. +
  317. +    /**
  318. +     * Implementation of the execute function (Task interface)
  319. +     */
  320. +    public void execute() {
  321. +        try {
  322. +            //Use Xerces as the parser. Does not support Saxon6.5.5 parser
  323. +            System.setProperty("org.xml.sax.driver", "org.apache.xerces.parsers.SAXParser");
  324. +            System.setProperty("javax.xml.parsers.SAXParserFactory", "org.apache.xerces.jaxp.SAXParserFactoryImpl");
  325. +//           System.setProperty("org.xml.sax.driver", "com.icl.saxon.aelfred.SAXDriver");
  326. +//           System.setProperty("javax.xml.parsers.SAXParserFactory", "com.icl.saxon.aelfred.SAXParserFactoryImpl");
  327. +        } catch (SecurityException se) {
  328. +            System.out.println("[WARNING] Default parser is not set to Xerces. Make sure Saxon6.5.5 " +
  329. +                    "is not in your CLASSPATH.");
  330. +        } catch (Exception e) {
  331. +            System.out.println("[WARNING] Default parser is not set to Xerces. Make sure Saxon6.5.5 " +
  332. +                    "is not in your CLASSPATH");
  333. +        }
  334. +
  335. +        ArrayList<DocFileInfo> filesDescription = null; // list of information about the topic files
  336. +        ArrayList<File> htmlFiles = null; // topic files listed in the given directory
  337. +        ArrayList<String> htmlFilesPathRel = null;
  338. +        Map<String, String> tempDico = new HashMap<String, String>();
  339. +        Iterator it;
  340. +
  341. +        //File name initialization
  342. +        String htmlList = "htmlFileList.js";
  343. +        String htmlInfoList = "htmlFileInfoList.js";
  344. +        String indexName = ".js";
  345. +
  346. +        //timing
  347. +        Date dateStart = new Date();
  348. +
  349. +        if (htmlDir == null) {
  350. +            System.out.println(txt_no_args + ".");
  351. +            return;
  352. +        }
  353. +        // Init input directory
  354. +        inputDir = new File(htmlDir);
  355. +
  356. +        // Begin of init
  357. +        // check if inputdir initialized
  358. +        if (inputDir == null) {
  359. +            DisplayHelp();
  360. +            return;
  361. +        }
  362. +
  363. +        // check if inputdir exists
  364. +        if (!inputDir.exists()) {
  365. +            System.out.println(txt_no_inputdir + " " + inputDir + ".");
  366. +            return;
  367. +        }
  368. +
  369. +        // check if outputdir defined
  370. +        if (outputDir == null) {
  371. +            //set the output directory: path= {inputDir}/search
  372. +            outputDir = inputDir.getPath().concat(File.separator).concat(searchdir);
  373. +        }
  374. +
  375. +        // check if outputdir exists
  376. +        File tempfile = new File(outputDir);
  377. +        if (!tempfile.exists()) {
  378. +            boolean b = (new File(outputDir)).mkdir();
  379. +            if (!b) {
  380. +                System.out.println(txt_cannot_create_outputdir + " " + outputDir + ".");
  381. +                return;
  382. +            }
  383. +        }
  384. +
  385. +        // check if projdir is defined
  386. +        if (projectDir == null) {
  387. +            projectDir = inputDir.getPath();
  388. +        }
  389. +        //end of init
  390. +
  391. +
  392. +        // Get the list of all html files but the tocs, covers and indexes
  393. +        DirList nsiDoc = new DirList(inputDir, "^.*\\." + htmlExtension + "?$", 1);
  394. +        htmlFiles = nsiDoc.getListFiles();
  395. +        // Check if found html files
  396. +        if (htmlFiles.isEmpty()) {
  397. +            System.out.println(txt_no_html_files + " " + inputDir + ".");
  398. +            return;
  399. +        }
  400. +        // Get the list of all html files with relative paths
  401. +        htmlFilesPathRel = nsiDoc.getListFilesRelTo(projectDir);
  402. +
  403. +        if (htmlFiles == null) {
  404. +            System.out.println(txt_no_files_found);
  405. +            return;
  406. +        } else if (htmlFilesPathRel == null) {
  407. +            System.out.println(txt_no_relative_files_found);
  408. +            return;
  409. +        }
  410. +
  411. +        // Create the list of the existing html files (index starts at 0)
  412. +        WriteJSFiles.WriteHTMLList(outputDir.concat(File.separator).concat(htmlList), htmlFilesPathRel);
  413. +
  414. +        // Parse each html file to retrieve the words:
  415. +        // ------------------------------------------
  416. +
  417. +        // Retrieve the clean-up properties for indexing
  418. +        RetrieveCleanUpProps();
  419. +        // System.out.print("clean"+" " +cleanUpStrings);
  420. +
  421. +        //create a default handler
  422. +        //SaxHTMLIndex spe = new SaxHTMLIndex (); // do not use clean-up props files
  423. +        //SaxHTMLIndex spe = new SaxHTMLIndex (cleanUpStrings); // use clean-up props files
  424. +        SaxHTMLIndex spe = new SaxHTMLIndex(cleanUpStrings, cleanUpChars); // use clean-up props files
  425. +
  426. +        if (spe.init(tempDico) == 0) {
  427. +
  428. +            //create a html file description list
  429. +            filesDescription = new ArrayList<DocFileInfo>();
  430. +
  431. +            it = htmlFiles.iterator();
  432. +
  433. +            // parse each html files
  434. +            while (it.hasNext()) {
  435. +                File ftemp = (File) it.next();
  436. +                //tempMap.put(key, value);
  437. +                //The HTML file information are added in the list of FileInfoObject
  438. +                DocFileInfo docFileInfoTemp = new DocFileInfo(spe.runExtractData(ftemp, this.indexerLanguage));
  439. +
  440. +                ftemp = docFileInfoTemp.getFullpath();
  441. +                String stemp = ftemp.toString();
  442. +                int i = stemp.indexOf(projectDir);
  443. +                if (i != 0) {
  444. +                    System.out.println("the documentation root does not match with the documentation input!");
  445. +                    return;
  446. +                }
  447. +                int ad = 1;
  448. +                if (stemp.equals(projectDir)) ad = 0;
  449. +                stemp = stemp.substring(i + projectDir.length() + ad);  //i is redundant (i==0 always)
  450. +                ftemp = new File(stemp);
  451. +                docFileInfoTemp.setFullpath(ftemp);
  452. +
  453. +                filesDescription.add(docFileInfoTemp);
  454. +            }
  455. +            /*remove empty strings from the map*/
  456. +            if (tempDico.containsKey("")) {
  457. +                tempDico.remove("");
  458. +            }
  459. +            // write the index files
  460. +            if (tempDico.isEmpty()) {
  461. +                System.out.println(txt_no_words_gathered + " " + inputDir + ".");
  462. +                return;
  463. +            }
  464. +
  465. +//            WriteJSFiles.WriteIndex(outputDir.concat(File.separator).concat(indexName), tempDico);
  466. +            WriteJSFiles.WriteIndex(outputDir.concat(File.separator).concat(indexName), tempDico, indexerLanguage);
  467. +
  468. +            // write the html list file with title and shortdesc
  469. +            //create the list of the existing html files (index starts at 0)
  470. +            WriteJSFiles.WriteHTMLInfoList(outputDir.concat(File.separator).concat(htmlInfoList), filesDescription);
  471. +
  472. +            //perf measurement
  473. +            Date dateEnd = new Date();
  474. +            long diff = dateEnd.getTime() - dateStart.getTime();
  475. +            if (diff < 1000)
  476. +                System.out.println("Delay = " + diff + " milliseconds");
  477. +            else
  478. +                System.out.println("Delay = " + diff / 1000 + " seconds");
  479. +        } else {
  480. +            System.out.println(txt_wrong_dita_basedir);
  481. +            return;
  482. +        }
  483. +    }
  484. +
  485. +    /**
  486. +     * Prints the usage information for this class to <code>System.out</code>.
  487. +     */
  488. +    private static void DisplayHelp() {
  489. +        String lSep = System.getProperty("line.separator");
  490. +        StringBuffer msg = new StringBuffer();
  491. +        msg.append("USAGE:" + lSep);
  492. +        msg.append("   java -classpath TesterIndexer inputDir outputDir projectDir" + lSep);
  493. +        msg.append("with:" + lSep);
  494. +        msg.append("   inputDir (mandatory) :  specify the html files ' directory to index" + lSep);
  495. +        msg.append("   outputDir (optional) : specify where to output the index files" + lSep);
  496. +        msg.append("   projectDir (optional) : specify the root of the documentation directory" + lSep);
  497. +        msg.append("Example:" + lSep);
  498. +        msg.append("   java -classpath TesterIndexer /home/$USER/DITA/doc" + lSep);
  499. +        msg.append("Example 2:" + lSep);
  500. +        msg.append("   java -classpath TesterIndexer /home/$USER/DITA/doc/customer/concepts /home/$USER/temp/search /home/$USER/DITA/doc/" + lSep);
  501. +        System.out.println(msg.toString());
  502. +    }
  503. +
  504. +    private int RetrieveCleanUpProps() {
  505. +
  506. +        // Files for punctuation (only one for now)
  507. +        String[] punctuationFiles = new String[]{"punctuation.props"};
  508. +        FileInputStream input;
  509. +        String tempStr;
  510. +        File ftemp;
  511. +        Collection c = new ArrayList<String>();
  512. +
  513. +        // Get the list of the props file containing the words to remove (not the punctuation)
  514. +        DirList props = new DirList(inputDir, "^(?!(punctuation)).*\\.props$", 1);
  515. +        ArrayList<File> wordsList = props.getListFiles();
  516. +//     System.out.println("props files:"+wordsList);
  517. +        //TODO all properties are taken to a single arraylist. does it ok?.
  518. +        Properties enProps = new Properties();
  519. +        String propsDir = inputDir.getPath().concat(File.separator).concat(searchdir);
  520. +
  521. +        // Init the lists which will contain the words and chars to remove
  522. +        cleanUpStrings = new ArrayList<String>();
  523. +        cleanUpChars = new ArrayList<String>();
  524. +
  525. +        try {
  526. +            // Retrieve words to remove
  527. +            for (File aWordsList : wordsList) {
  528. +                ftemp = aWordsList;
  529. +                if (ftemp.exists()) {
  530. +                    enProps.load(input = new FileInputStream(ftemp.getAbsolutePath()));
  531. +                    input.close();
  532. +                    c = enProps.values();
  533. +                    cleanUpStrings.addAll(c);
  534. +                    enProps.clear();
  535. +                }
  536. +            }
  537. +
  538. +            // Retrieve char to remove (punctuation for ex.)
  539. +            for (String punctuationFile : punctuationFiles) {
  540. +                tempStr = propsDir.concat(File.separator).concat(punctuationFile);
  541. +                ftemp = new File(tempStr);
  542. +                if (ftemp.exists()) {
  543. +                    enProps.load(input = new FileInputStream(tempStr));
  544. +                    input.close();
  545. +                    c = enProps.values();
  546. +                    cleanUpChars.addAll(c);
  547. +                    enProps.clear();
  548. +                }
  549. +            }
  550. +        }
  551. +        catch (IOException e) {
  552. +            e.printStackTrace();
  553. +            return 1;
  554. +        }
  555. +        return 0;
  556. +    }
  557. +
  558. +}
  559. Index: xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java
  560. ===================================================================
  561. --- xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java    (revision 8967)
  562. +++ xsl-webhelpindexer/src/com/nexwave/nquindexer/TesterIndexer.java    (working copy)
  563. @@ -1,5 +1,7 @@
  564. +/*
  565.  package com.nexwave.nquindexer;
  566.  
  567. +*/
  568.  /**
  569.   * For running tests with the indexertask.
  570.   *
  571. @@ -7,17 +9,20 @@
  572.   *
  573.   * @author N. Quaine
  574.   * @author Kasun Gajasinghe
  575. - */
  576. + *//*
  577. +
  578.      public class TesterIndexer {
  579.     public static IndexerTask IT = null;
  580. -   /**
  581. +   */
  582. +/**
  583.      * @param args
  584.      * @throws InterruptedException
  585. -    */
  586. +    *//*
  587. +
  588.     public static void main(String[] args) throws InterruptedException {
  589.          if (args.length != 0) {
  590.              IT = new IndexerTask();
  591. -            IT.setHtmldir(args[0]);
  592. +            IT.setHtmlDir(args[0]);
  593.              IT.setIndexerLanguage(args[1]);
  594.              IT.execute();
  595.          } else {
  596. @@ -27,7 +32,7 @@
  597.              String dir = "../doc/content";
  598.              String lang = "en";
  599.              IT = new IndexerTask();
  600. -            IT.setHtmldir(dir);
  601. +            IT.setHtmlDir(dir);
  602.              IT.setIndexerLanguage(lang);
  603.              IT.execute();
  604.          }
  605. @@ -36,3 +41,4 @@
  606.    
  607.  }
  608.  
  609. +*/
  610. Index: xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java
  611. ===================================================================
  612. --- xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java (revision 8967)
  613. +++ xsl-webhelpindexer/src/com/nexwave/nquindexer/WriteJSFiles.java (working copy)
  614. @@ -13,163 +13,247 @@
  615.  import java.util.TreeSet;
  616.  
  617.  import com.nexwave.nsidita.DocFileInfo;
  618. +
  619.  /**
  620.   * Outputs the js files with:
  621.   * - the list of html files and their description
  622.   * - the words retrieved from the html files and their location
  623. - *
  624. - * @version 2.0 2010-08-13
  625. - *
  626. + *
  627.   * @author N. Quaine
  628.   * @author Kasun Gajasinghe
  629. + * @version 2.0 2010-08-13
  630.   */
  631.  public class WriteJSFiles {
  632. -  
  633. -   private static String txt_VM_encoding_not_supported = "This VM does not support the specified encoding.";
  634. -   private static String txt_indices_location = "The created index files are located in ";
  635. -  
  636. -   /** Create a javascript array listing the html files with their paths relative to the project root
  637. -    * @param fileO path and name of the file in which to output the list of html files  
  638. -    * @param list of the html files, relative to the doc root directory  
  639. -    */
  640. -   public static void WriteHTMLList (String fileO,ArrayList<String> list) {
  641. -       int i = 0;
  642. -       Iterator it;
  643. -      
  644. -       if (list == null) {
  645. -           return;
  646. -       }
  647. -       if (fileO == null) {
  648. -           return;
  649. -       }
  650. -       it = list.iterator ( ) ;
  651. -      
  652. -       try {
  653. -           // open a outputstream, here a file
  654. -           OutputStream fOut= new FileOutputStream(fileO);
  655. -           OutputStream bout= new BufferedOutputStream(fOut);
  656. -           OutputStreamWriter out  = new OutputStreamWriter(bout, "UTF-8");
  657. -          
  658. -           /*fl : file list*/
  659. -           out.write("//List of files which are indexed.\n");
  660. -           out.write("fl = new Array();\n");
  661. -           String temp;
  662. -           while ( it.hasNext ( ) ) {
  663. -               temp = (String)it.next();
  664. -               //System.out.println("temp : "+File.separatorChar+" "+temp.replace(File.separatorChar, '/'));
  665. -              out.write("fl[\""+i+"\"]"+"= \""+temp.replace(File.separatorChar, '/')+"\";\n");
  666. -              i++;
  667. -           }
  668. -          
  669. -           out.flush();  // Don't forget to flush!
  670. -           out.close();
  671. +
  672. +    private static String txt_VM_encoding_not_supported = "This VM does not support the specified encoding.";
  673. +    private static String txt_indices_location = "The created index files are located in ";
  674. +
  675. +    /**
  676. +     * Create a javascript array listing the html files with their paths relative to the project root
  677. +     *
  678. +     * @param fileO path and name of the file in which to output the list of html files
  679. +     * @param list  of the html files, relative to the doc root directory
  680. +     */
  681. +    public static void WriteHTMLList(String fileO, ArrayList<String> list) {
  682. +        int i = 0;
  683. +        Iterator it;
  684. +
  685. +        if (list == null) {
  686. +            return;
  687. +        }
  688. +        if (fileO == null) {
  689. +            return;
  690. +        }
  691. +        it = list.iterator();
  692. +
  693. +        try {
  694. +            // open a outputstream, here a file
  695. +            OutputStream fOut = new FileOutputStream(fileO);
  696. +            OutputStream bout = new BufferedOutputStream(fOut);
  697. +            OutputStreamWriter out = new OutputStreamWriter(bout, "UTF-8");
  698. +
  699. +            /*fl : file list*/
  700. +            out.write("//List of files which are indexed.\n");
  701. +            out.write("fl = new Array();\n");
  702. +            String temp;
  703. +            while (it.hasNext()) {
  704. +                temp = (String) it.next();
  705. +                //System.out.println("temp : "+File.separatorChar+" "+temp.replace(File.separatorChar, '/'));
  706. +                out.write("fl[\"" + i + "\"]" + "= \"" + temp.replace(File.separatorChar, '/') + "\";\n");
  707. +                i++;
  708. +            }
  709. +
  710. +            out.flush();  // Don't forget to flush!
  711. +            out.close();
  712.  //         System.out.println("the array of html is in " + fileO);
  713.  
  714. -       }
  715. -       catch (UnsupportedEncodingException e) {
  716. -             System.out.println(txt_VM_encoding_not_supported);
  717. -           }
  718. -           catch (IOException e) {
  719. -             System.out.println(e.getMessage());        
  720. -       }
  721. -              
  722. -   }
  723. +        }
  724. +        catch (UnsupportedEncodingException e) {
  725. +            System.out.println(txt_VM_encoding_not_supported);
  726. +        }
  727. +        catch (IOException e) {
  728. +            System.out.println(e.getMessage());
  729. +        }
  730.  
  731. -   /** Create a javascript array listing the html files with
  732. -    * their paths relative to project root, their titles and shortdescs
  733. -    * @param fileO path and name of the file in which to output the list of html files  
  734. -    * @param list of the html files, relative to the doc root directory  
  735. -    */
  736. -   public static void WriteHTMLInfoList (String fileO,ArrayList<DocFileInfo> list) {
  737. -       int i = 0;
  738. -       Iterator it = null;
  739. -      
  740. -       if (list == null) {
  741. -           return;
  742. -       }
  743. -       if (fileO == null) {
  744. -           return;
  745. -       }
  746. -       it = list.iterator ( ) ;
  747. -       try {
  748. -           // open a outputstream, here a file
  749. -           OutputStream fOut= new FileOutputStream(fileO);
  750. -           // open a buffer output stream
  751. -           OutputStream bout= new BufferedOutputStream(fOut);
  752. -           OutputStreamWriter out
  753. -            = new OutputStreamWriter(bout, "UTF-8");
  754. -          
  755. -           /*fil : file list*/
  756. -           out.write("fil = new Array();\n");
  757. -          
  758. -           DocFileInfo tempInfo;
  759. -           String tempPath;
  760. -           String tempTitle;
  761. -           String tempShortdesc;
  762. -           while ( it.hasNext ( ) ) {
  763. -               // Retrieve file information: path, title and shortdesc.
  764. -               tempInfo = (DocFileInfo)it.next();
  765. -               tempPath = tempInfo.getFullpath().toString().replace(File.separatorChar, '/');
  766. -               tempTitle = tempInfo.getTitle();
  767. -               tempShortdesc = tempInfo.getShortdesc();
  768. -               //Remove unwanted white char
  769. -               if (tempTitle != null ) {
  770. -                   tempTitle = tempTitle.replaceAll("\\s+", " ");
  771. -                   tempTitle = tempTitle.replaceAll("['�\"]", " ");
  772. -               }
  773. -               if (tempShortdesc != null ) {
  774. -                   tempShortdesc = tempShortdesc.replaceAll("\\s+", " ");
  775. -                   tempShortdesc = tempShortdesc.replaceAll("['�\"]", " ");
  776. -               }
  777. -               //System.out.println("temp : "+File.separatorChar+" "+tempShortdesc);
  778. -              out.write("fil[\""+i+"\"]"+"= \""+tempPath+"@@@"+tempTitle+"@@@"+tempShortdesc+"\";\n");
  779. -              i++;
  780. -           }
  781. -          
  782. -           out.flush();  // Don't forget to flush!
  783. -           out.close();
  784. +    }
  785.  
  786. -       }
  787. -       catch (UnsupportedEncodingException e) {
  788. -             System.out.println(txt_VM_encoding_not_supported);
  789. -           }
  790. -           catch (IOException e) {
  791. -             System.out.println(e.getMessage());        
  792. -       }
  793. -              
  794. -   }
  795. +    /**
  796. +     * Create a javascript array listing the html files with
  797. +     * their paths relative to project root, their titles and shortdescs
  798. +     *
  799. +     * @param fileO path and name of the file in which to output the list of html files
  800. +     * @param list  of the html files, relative to the doc root directory
  801. +     */
  802. +    public static void WriteHTMLInfoList(String fileO, ArrayList<DocFileInfo> list) {
  803. +        int i = 0;
  804. +        Iterator it = null;
  805.  
  806. -   /** Create javascript index files alphabetically.
  807. -    * @param fileOutStr contains the path and the suffix of the index files to create.
  808. -    * The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc...  
  809. -    * @param indexMap its keys are the indexed words and
  810. -    *  its values are the list of the files which contain the word.  
  811. -    */
  812. -   public static void WriteIndex (String fileOutStr, Map<String, ?> indexMap) {
  813. -       OutputStreamWriter out;
  814. -       OutputStream bout;
  815. -       OutputStream fOut;
  816. -       String tstr;       
  817. -      
  818. -       // check arguments
  819. -       if (indexMap == null || fileOutStr ==null) {
  820. -           return;
  821. -       }
  822. +        if (list == null) {
  823. +            return;
  824. +        }
  825. +        if (fileO == null) {
  826. +            return;
  827. +        }
  828. +        it = list.iterator();
  829. +        try {
  830. +            // open a outputstream, here a file
  831. +            OutputStream fOut = new FileOutputStream(fileO);
  832. +            // open a buffer output stream
  833. +            OutputStream bout = new BufferedOutputStream(fOut);
  834. +            OutputStreamWriter out
  835. +                    = new OutputStreamWriter(bout, "UTF-8");
  836.  
  837. -       // Collect the key of the index map
  838. -       TreeSet<String> sortedKeys = new TreeSet<String>();
  839. -       sortedKeys.addAll(indexMap.keySet());
  840. -       Iterator keyIt = sortedKeys.iterator();
  841. -       tstr = (String)keyIt.next();
  842. -      
  843. -       File fileOut= new File(fileOutStr);
  844. +            /*fil : file list*/
  845. +            out.write("fil = new Array();\n");
  846.  
  847. +            DocFileInfo tempInfo;
  848. +            String tempPath;
  849. +            String tempTitle;
  850. +            String tempShortdesc;
  851. +            while (it.hasNext()) {
  852. +                // Retrieve file information: path, title and shortdesc.
  853. +                tempInfo = (DocFileInfo) it.next();
  854. +                tempPath = tempInfo.getFullpath().toString().replace(File.separatorChar, '/');
  855. +                tempTitle = tempInfo.getTitle();
  856. +                tempShortdesc = tempInfo.getShortdesc();
  857. +                //Remove unwanted white char
  858. +                if (tempTitle != null) {
  859. +                    tempTitle = tempTitle.replaceAll("\\s+", " ");
  860. +                    tempTitle = tempTitle.replaceAll("['�\"]", " ");
  861. +                }
  862. +                if (tempShortdesc != null) {
  863. +                    tempShortdesc = tempShortdesc.replaceAll("\\s+", " ");
  864. +                    tempShortdesc = tempShortdesc.replaceAll("['�\"]", " ");
  865. +                }
  866. +                //System.out.println("temp : "+File.separatorChar+" "+tempShortdesc);
  867. +                out.write("fil[\"" + i + "\"]" + "= \"" + tempPath + "@@@" + tempTitle + "@@@" + tempShortdesc + "\";\n");
  868. +                i++;
  869. +            }
  870. +
  871. +            out.flush();  // Don't forget to flush!
  872. +            out.close();
  873. +
  874. +        }
  875. +        catch (UnsupportedEncodingException e) {
  876. +            System.out.println(txt_VM_encoding_not_supported);
  877. +        }
  878. +        catch (IOException e) {
  879. +            System.out.println(e.getMessage());
  880. +        }
  881. +
  882. +    }
  883. +
  884. +    /**
  885. +     * Create javascript index files alphabetically.
  886. +     *
  887. +     * @param fileOutStr      contains the path and the suffix of the index files to create.
  888. +     *                        The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc...
  889. +     * @param indexMap        its keys are the indexed words and
  890. +     *                        its values are the list of the files which contain the word.
  891. +     * @param indexerLanguage The language of the content that gets indexed
  892. +     */
  893. +    public static void WriteIndex(String fileOutStr, Map<String, ?> indexMap, String indexerLanguage) {
  894. +        OutputStreamWriter out;
  895. +        OutputStream bout;
  896. +        OutputStream fOut;
  897. +        String tstr;
  898. +
  899. +        // check arguments
  900. +        if (indexMap == null || fileOutStr == null) {
  901. +            return;
  902. +        }
  903. +
  904. +        // Collect the key of the index map
  905. +        TreeSet<String> sortedKeys = new TreeSet<String>();
  906. +        sortedKeys.addAll(indexMap.keySet());
  907. +        Iterator keyIt = sortedKeys.iterator();
  908. +        tstr = (String) keyIt.next();
  909. +
  910. +        File fileOut = new File(fileOutStr);
  911. +
  912.          /* Writes the index to Three JS files, namely: index-1.js, index-2.js, index-3.js
  913. +        * Index will be distributed evenly in these three files.
  914. +        * tstr is the current key
  915. +        * keyIt is the iterator of the key set
  916. +        * */
  917. +        int indexSize = sortedKeys.size();
  918. +        for (int i = 1; i <= 3; i++) {
  919. +            try {
  920. +                // open a outputstream, here a file
  921. +                fOut = new FileOutputStream(fileOut.getParent() + File.separator + "index-" + i + fileOut.getName());
  922. +                bout = new BufferedOutputStream(fOut);
  923. +                out = new OutputStreamWriter(bout, "UTF-8");
  924. +
  925. +                try {
  926. +                    /* Populate a javascript hashmap:
  927. +                      The key is a word to look for in the index,
  928. +                      The value is the numbers of the files in which the word exists.
  929. +                      Example: w["key"]="file1,file2,file3";*/
  930. +                    int count = 0;
  931. +                    if (i == 1)
  932. +                        out.write("var indexerLanguage=\"" + indexerLanguage + "\";\n");
  933. +                    out.write("//Auto generated index for searching.\n");
  934. +                    while (keyIt.hasNext()) {        //&& (tempLetter == tstr.charAt(0))
  935. +                        out.write("w[\"" + tstr + "\"]" + "=\"" + indexMap.get(tstr) + "\";\n");
  936. +                        tstr = (String) keyIt.next();
  937. +                        count++;
  938. +                        if (indexSize / count < 3) {
  939. +                            break;
  940. +                        }
  941. +                    }
  942. +                    out.write("\n");
  943. +                    out.flush();  // Don't forget to flush!
  944. +                    out.close();
  945. +                }
  946. +                catch (UnsupportedEncodingException e) {
  947. +                    System.out.println(txt_VM_encoding_not_supported);
  948. +                }
  949. +            }
  950. +            catch (IOException e) {
  951. +                System.out.println(e.getMessage());
  952. +            }
  953. +        }
  954. +        System.out.println(txt_indices_location + fileOutStr);
  955. +    }
  956. +
  957. +
  958. +    /**
  959. +     * Create javascript index files alphabetically.
  960. +     *
  961. +     * @deprecated replaced by WriteIndex(String fileOutStr, Map<String, ?> indexMap, String indexerLanguage) {  
  962. +     *
  963. +     * @param fileOutStr contains the path and the suffix of the index files to create.
  964. +     *                   The first letter of the key is added to the given suffix. For example: e.g. a.js, b.js etc...
  965. +     * @param indexMap   its keys are the indexed words and
  966. +     *                   its values are the list of the files which contain the word.
  967. +     */
  968. +
  969. +
  970. +    public static void WriteIndex(String fileOutStr, Map<String, ?> indexMap) {
  971. +        OutputStreamWriter out;
  972. +        OutputStream bout;
  973. +        OutputStream fOut;
  974. +        String tstr;
  975. +
  976. +        // check arguments
  977. +        if (indexMap == null || fileOutStr == null) {
  978. +            return;
  979. +        }
  980. +
  981. +        // Collect the key of the index map
  982. +        TreeSet<String> sortedKeys = new TreeSet<String>();
  983. +        sortedKeys.addAll(indexMap.keySet());
  984. +        Iterator keyIt = sortedKeys.iterator();
  985. +        tstr = (String) keyIt.next();
  986. +
  987. +        File fileOut = new File(fileOutStr);
  988. +
  989. +        /* Writes the index to Three JS files, namely: index-1.js, index-2.js, index-3.js
  990.          * Index will be distributed evenly in these three files.
  991.          * tstr is the current key
  992.          * keyIt is the iterator of the key set
  993.          * */
  994. -        int indexSize = sortedKeys.size();
  995. +        int indexSize = sortedKeys.size();
  996.          for (int i = 1; i <= 3; i++) {
  997.              try {
  998.                  // open a outputstream, here a file
  999. @@ -183,17 +267,17 @@
  1000.                        The value is the numbers of the files in which the word exists.
  1001.                        Example: w["key"]="file1,file2,file3";*/
  1002.                      int count = 0;
  1003. -                    if(i==1)
  1004. -                        out.write("var indexerLanguage=\""+IndexerTask.indexerLanguage+"\";\n");
  1005. +//                    if (i == 1)
  1006. +//                        out.write("var indexerLanguage=\"" + IndexerTask.indexerLanguage + "\";\n");
  1007.                      out.write("//Auto generated index for searching.\n");
  1008.                      while (keyIt.hasNext()) {        //&& (tempLetter == tstr.charAt(0))
  1009.                          out.write("w[\"" + tstr + "\"]" + "=\"" + indexMap.get(tstr) + "\";\n");
  1010.                          tstr = (String) keyIt.next();
  1011.                          count++;
  1012. -                        if (indexSize / count < 3){
  1013. +                        if (indexSize / count < 3) {
  1014.                              break;
  1015.                          }
  1016. -                    }
  1017. +                    }
  1018.                      out.write("\n");
  1019.                      out.flush();  // Don't forget to flush!
  1020.                      out.close();
  1021. @@ -205,7 +289,7 @@
  1022.              catch (IOException e) {
  1023.                  System.out.println(e.getMessage());
  1024.              }
  1025. -        }
  1026. -       System.out.println(txt_indices_location + fileOutStr);
  1027. -   }
  1028. +        }
  1029. +        System.out.println(txt_indices_location + fileOutStr);
  1030. +    }
  1031.  }
Advertisement
RAW Paste Data Copied
Advertisement