Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import org.apache.commons.digester.Digester;
- import org.xml.sax.SAXException;
- import org.apache.lucene.util.Version;
- import java.util.ArrayList;
- import java.util.List;
- import java.io.File;
- import java.io.IOException;
- import java.io.FileNotFoundException;
- import java.io.BufferedWriter;
- import java.io.FileWriter;
- import java.io.Writer;
- import java.io.OutputStreamWriter;
- import java.io.FileOutputStream;
- /**
- * Generates centroids given docs list in a XML file
- *
- */
- public class CentroidGenerator
- {
- List<Centroid> centroids;
- public List<Centroid> getCentroids() {
- for (int i=0; i<centroids.size(); i++) {
- centroids.get(i).averageTime();
- centroids.get(i).averageLat();
- centroids.get(i).averageLng();
- }
- return centroids;
- }
- public void nextItem(FlickrDoc flickrDoc)
- {
- if (centroids == null) {
- centroids = new ArrayList<Centroid>();
- Centroid centroid = new Centroid(flickrDoc.getTitle(), flickrDoc.getDescription(), flickrDoc.getTime(), flickrDoc.getTags(), flickrDoc.getEvent());
- if ( (flickrDoc.getLatitude() != null) && (flickrDoc.getLongitude() != null) ) {
- centroid.setLat(flickrDoc.getLatitude());
- centroid.setLng(flickrDoc.getLongitude());
- }
- centroids.add(centroid);
- } else {
- String existing;
- Boolean merged = false;
- for (int i=0; i<centroids.size(); i++) {
- if (flickrDoc.getEvent().equals(centroids.get(i).getEvent())) {
- existing = centroids.get(i).getTitle();
- centroids.get(i).appendTitle(flickrDoc.getTitle());
- existing = centroids.get(i).getDescription();
- centroids.get(i).appendDescription(flickrDoc.getDescription());
- existing = centroids.get(i).getTime();
- centroids.get(i).appendTime(flickrDoc.getTime());
- existing = centroids.get(i).getTags();
- centroids.get(i).appendTags(flickrDoc.getTags());
- if ( (flickrDoc.getLatitude() != null) && (flickrDoc.getLongitude() != null) ) {
- centroids.get(i).appendLat(flickrDoc.getLatitude());
- centroids.get(i).appendLng(flickrDoc.getLongitude());
- }
- merged = true;
- break;
- }
- }
- if (!merged) {
- Centroid centroid = new Centroid(flickrDoc.getTitle(), flickrDoc.getDescription(), flickrDoc.getTime(), flickrDoc.getTags(), flickrDoc.getEvent());
- if ( (flickrDoc.getLatitude() != null) && (flickrDoc.getLongitude() != null) ) {
- centroid.setLat(flickrDoc.getLatitude());
- centroid.setLng(flickrDoc.getLongitude());
- }
- centroids.add(centroid);
- }
- }
- }
- /**
- * Configures Digester rules and actions, parses the XML file specified
- * as the first argument.
- *
- * @param args command line arguments
- */
- public static void main(String[] args) throws IOException, SAXException
- {
- System.out.println("Centroid Generator...");
- // instantiate Digester and disable XML validation
- Digester digester = new Digester();
- digester.setValidating(false);
- // instantiate CollectionIndexer class
- digester.addObjectCreate("collection", CentroidGenerator.class );
- // instantiate Document class
- digester.addObjectCreate("collection/doc", FlickrDoc.class );
- // set type property of Document instance when 'type' attribute is found
- //digester.addSetProperties("collection/doc", "type", "type" );
- // set different properties of Document instance using specified methods
- digester.addCallMethod("collection/doc/id", "setId", 0);
- digester.addCallMethod("collection/doc/title", "setTitle", 0);
- digester.addCallMethod("collection/doc/description", "setDescription", 0);
- digester.addCallMethod("collection/doc/time", "setTime", 0);
- digester.addCallMethod("collection/doc/tags", "setTags", 0);
- digester.addCallMethod("collection/doc/geo/latitude", "setLatitude", 0);
- digester.addCallMethod("collection/doc/geo/longitude", "setLongitude", 0);
- digester.addCallMethod("collection/doc/event", "setEvent", 0);
- // call 'addDocument' method when the next 'collection/document' pattern is seen
- digester.addSetNext("collection/doc", "nextItem" );
- // now that rules and actions are configured, start the parsing process
- CentroidGenerator abp = (CentroidGenerator) digester.parse(new File(args[0]));
- StringBuffer contents = new StringBuffer();
- contents.append("<?xml version='1.0' encoding='utf-8'?>" + System.getProperty("line.separator"));
- contents.append("<collection>").append(System.getProperty("line.separator"));
- List<Centroid> cent = abp.getCentroids();
- for (int i=0; i<cent.size(); i++) {
- contents.append("<doc>" + System.getProperty("line.separator"));
- contents.append("<id>");
- contents.append(i);
- contents.append("</id>" + System.getProperty("line.separator"));
- contents.append("<title>");
- contents.append(cent.get(i).getTitle());
- contents.append("</title>" + System.getProperty("line.separator"));
- contents.append("<description>");
- contents.append(cent.get(i).getDescription());
- contents.append("</description>" + System.getProperty("line.separator"));
- contents.append("<time>");
- contents.append(cent.get(i).getTime());
- contents.append("</time>" + System.getProperty("line.separator"));
- contents.append("<tags>");
- contents.append(cent.get(i).getTags());
- contents.append("</tags>" + System.getProperty("line.separator"));
- contents.append("<geo>" + System.getProperty("line.separator"));
- contents.append("<lat>");
- if (cent.get(i).getLat() != null) contents.append(cent.get(i).getLat());
- contents.append("</lat>" + System.getProperty("line.separator"));
- contents.append("<lng>");
- if (cent.get(i).getLng() != null) contents.append(cent.get(i).getLng());
- contents.append("</lng>" + System.getProperty("line.separator"));
- contents.append("</geo>" + System.getProperty("line.separator"));
- contents.append("<event>");
- contents.append(cent.get(i).getEvent());
- contents.append("</event>" + System.getProperty("line.separator"));
- contents.append("</doc>" + System.getProperty("line.separator"));
- }
- contents.append("</collection>" + System.getProperty("line.separator") + System.getProperty("line.separator"));
- Writer writer = null;
- try {
- //File fileOutput = new File("output.trectext");
- File fileOutput = new File(args[1]);
- writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileOutput), "UTF8"));
- writer.write(contents.toString());
- } catch (FileNotFoundException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- } finally {
- try {
- if (writer != null) {
- writer.close();
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
- }
- /**
- * Flickr structured document.
- *
- */
- public static class FlickrDoc
- {
- //private String type;
- private String id;
- private String title;
- private String description;
- private String time;
- private String tags;
- private String latitude;
- private String longitude;
- private String event;
- public void setId(String newId)
- {
- id = newId;
- }
- public String getId()
- {
- return id;
- }
- public void setTitle(String newTitle)
- {
- title = newTitle;
- }
- public String getTitle()
- {
- return title;
- }
- public void setDescription(String newDescription)
- {
- description = newDescription;
- }
- public String getDescription()
- {
- return description;
- }
- public void setTime(String newTime)
- {
- time = newTime;
- }
- public String getTime()
- {
- return time;
- }
- public void setTags(String newTags)
- {
- tags = newTags;
- }
- public String getTags()
- {
- return tags;
- }
- public void setLatitude(String newLatitude)
- {
- latitude = newLatitude;
- }
- public String getLatitude()
- {
- return latitude;
- }
- public void setLongitude(String newLongitude)
- {
- longitude = newLongitude;
- }
- public String getLongitude()
- {
- return longitude;
- }
- public void setEvent(String newEvent)
- {
- event = newEvent;
- }
- public String getEvent()
- {
- return event;
- }
- }
- public static class Centroid
- {
- private String title;
- private String description;
- private String time;
- private String tags;
- private String latitude;
- private String longitude;
- private String event;
- public Centroid(String newTitle, String newDescription, String newTime, String newTags, String newEvent) {
- title = newTitle;
- description = newDescription;
- tags = newTags;
- time = newTime;
- event = newEvent;
- }
- public void setTitle(String newTitle)
- {
- title = newTitle;
- }
- public void appendTitle(String newTitle)
- {
- title = title + " " + newTitle;
- }
- public String getTitle()
- {
- return title;
- }
- public void setDescription(String newDescription)
- {
- description = newDescription;
- }
- public void appendDescription(String newDescription)
- {
- description = description + " " + newDescription;
- }
- public String getDescription()
- {
- return description;
- }
- public void setTime(String newTime)
- {
- time = newTime;
- }
- public void appendTime(String newTime)
- {
- time = time + " " + newTime;
- }
- public void averageTime()
- {
- String[] timeValues = time.split(" ");
- int sum = 0;
- for (int i = 0; i < timeValues.length; i++) {
- sum = sum + Integer.parseInt(timeValues[i]);
- }
- time = Integer.toString(sum / timeValues.length);
- }
- public String getTime()
- {
- return time;
- }
- public void setTags(String newTags)
- {
- tags = newTags;
- }
- public void appendTags(String newTags)
- {
- tags = tags + " " + newTags;
- }
- public String getTags()
- {
- return tags;
- }
- public void setLat(String newLatitude)
- {
- latitude = newLatitude;
- }
- public void appendLat(String newLatitude)
- {
- latitude = latitude + " " + newLatitude;
- }
- public void averageLat()
- {
- if (latitude!=null) {
- if (!latitude.equals("null")) {
- String[] latValues = latitude.split(" ");
- float sum = 0;
- int counter = 0;
- for (int i = 0; i < latValues.length; i++) {
- if (!latValues[i].equals("null")) {
- sum = sum + Float.valueOf(latValues[i].trim()).floatValue();
- counter = counter +1;
- }
- }
- latitude = Float.toString(sum / (float) counter);
- }
- }
- }
- public String getLat()
- {
- return latitude;
- }
- public void setLng(String newLongitude)
- {
- longitude = newLongitude;
- }
- public void appendLng(String newLongitude)
- {
- longitude = longitude + " " + newLongitude;
- }
- public void averageLng()
- {
- if (longitude!=null) {
- if (!longitude.equals("null")) {
- String[] lngValues = longitude.split(" ");
- float sum = 0;
- int counter = 0;
- for (int i = 0; i < lngValues.length; i++) {
- if (!lngValues[i].equals("null")) {
- sum = sum + Float.valueOf(lngValues[i].trim()).floatValue();
- counter = counter +1;
- }
- }
- longitude = Float.toString(sum / (float) counter);
- }
- }
- }
- public String getLng()
- {
- return longitude;
- }
- public void setEvent(String newEvent)
- {
- event = newEvent;
- }
- public String getEvent()
- {
- return event;
- }
- }
- }
Add Comment
Please, Sign In to add comment