Guest User

Untitled

a guest
Jan 23rd, 2018
69
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.30 KB | None | 0 0
  1. import org.apache.commons.digester.Digester;
  2. import org.xml.sax.SAXException;
  3.  
  4. import org.apache.lucene.util.Version;
  5. import java.util.ArrayList;
  6. import java.util.List;
  7.  
  8. import java.io.File;
  9. import java.io.IOException;
  10. import java.io.FileNotFoundException;
  11.  
  12. import java.io.BufferedWriter;
  13. import java.io.FileWriter;
  14. import java.io.Writer;
  15.  
  16. import java.io.OutputStreamWriter;
  17. import java.io.FileOutputStream;
  18.  
  19.  
  20. /**
  21. * Generates centroids given docs list in a XML file
  22. *
  23. */
  24. public class CentroidGenerator
  25. {
  26.  
  27. List<Centroid> centroids;
  28.  
  29. public List<Centroid> getCentroids() {
  30.  
  31. for (int i=0; i<centroids.size(); i++) {
  32. centroids.get(i).averageTime();
  33. centroids.get(i).averageLat();
  34. centroids.get(i).averageLng();
  35. }
  36.  
  37. return centroids;
  38. }
  39.  
  40. public void nextItem(FlickrDoc flickrDoc)
  41. {
  42.  
  43. if (centroids == null) {
  44.  
  45. centroids = new ArrayList<Centroid>();
  46.  
  47. Centroid centroid = new Centroid(flickrDoc.getTitle(), flickrDoc.getDescription(), flickrDoc.getTime(), flickrDoc.getTags(), flickrDoc.getEvent());
  48.  
  49. if ( (flickrDoc.getLatitude() != null) && (flickrDoc.getLongitude() != null) ) {
  50. centroid.setLat(flickrDoc.getLatitude());
  51. centroid.setLng(flickrDoc.getLongitude());
  52. }
  53.  
  54. centroids.add(centroid);
  55.  
  56. } else {
  57.  
  58. String existing;
  59. Boolean merged = false;
  60. for (int i=0; i<centroids.size(); i++) {
  61.  
  62. if (flickrDoc.getEvent().equals(centroids.get(i).getEvent())) {
  63.  
  64. existing = centroids.get(i).getTitle();
  65. centroids.get(i).appendTitle(flickrDoc.getTitle());
  66.  
  67. existing = centroids.get(i).getDescription();
  68. centroids.get(i).appendDescription(flickrDoc.getDescription());
  69.  
  70. existing = centroids.get(i).getTime();
  71. centroids.get(i).appendTime(flickrDoc.getTime());
  72.  
  73. existing = centroids.get(i).getTags();
  74. centroids.get(i).appendTags(flickrDoc.getTags());
  75.  
  76. if ( (flickrDoc.getLatitude() != null) && (flickrDoc.getLongitude() != null) ) {
  77. centroids.get(i).appendLat(flickrDoc.getLatitude());
  78. centroids.get(i).appendLng(flickrDoc.getLongitude());
  79. }
  80.  
  81. merged = true;
  82. break;
  83. }
  84. }
  85.  
  86. if (!merged) {
  87. Centroid centroid = new Centroid(flickrDoc.getTitle(), flickrDoc.getDescription(), flickrDoc.getTime(), flickrDoc.getTags(), flickrDoc.getEvent());
  88.  
  89. if ( (flickrDoc.getLatitude() != null) && (flickrDoc.getLongitude() != null) ) {
  90. centroid.setLat(flickrDoc.getLatitude());
  91. centroid.setLng(flickrDoc.getLongitude());
  92. }
  93.  
  94. centroids.add(centroid);
  95. }
  96. }
  97.  
  98. }
  99.  
  100. /**
  101. * Configures Digester rules and actions, parses the XML file specified
  102. * as the first argument.
  103. *
  104. * @param args command line arguments
  105. */
  106. public static void main(String[] args) throws IOException, SAXException
  107. {
  108.  
  109. System.out.println("Centroid Generator...");
  110.  
  111. // instantiate Digester and disable XML validation
  112. Digester digester = new Digester();
  113. digester.setValidating(false);
  114.  
  115. // instantiate CollectionIndexer class
  116. digester.addObjectCreate("collection", CentroidGenerator.class );
  117. // instantiate Document class
  118. digester.addObjectCreate("collection/doc", FlickrDoc.class );
  119.  
  120. // set type property of Document instance when 'type' attribute is found
  121. //digester.addSetProperties("collection/doc", "type", "type" );
  122.  
  123. // set different properties of Document instance using specified methods
  124. digester.addCallMethod("collection/doc/id", "setId", 0);
  125. digester.addCallMethod("collection/doc/title", "setTitle", 0);
  126. digester.addCallMethod("collection/doc/description", "setDescription", 0);
  127. digester.addCallMethod("collection/doc/time", "setTime", 0);
  128. digester.addCallMethod("collection/doc/tags", "setTags", 0);
  129. digester.addCallMethod("collection/doc/geo/latitude", "setLatitude", 0);
  130. digester.addCallMethod("collection/doc/geo/longitude", "setLongitude", 0);
  131. digester.addCallMethod("collection/doc/event", "setEvent", 0);
  132.  
  133. // call 'addDocument' method when the next 'collection/document' pattern is seen
  134. digester.addSetNext("collection/doc", "nextItem" );
  135.  
  136. // now that rules and actions are configured, start the parsing process
  137. CentroidGenerator abp = (CentroidGenerator) digester.parse(new File(args[0]));
  138.  
  139. StringBuffer contents = new StringBuffer();
  140. contents.append("<?xml version='1.0' encoding='utf-8'?>" + System.getProperty("line.separator"));
  141. contents.append("<collection>").append(System.getProperty("line.separator"));
  142.  
  143.  
  144. List<Centroid> cent = abp.getCentroids();
  145.  
  146. for (int i=0; i<cent.size(); i++) {
  147.  
  148. contents.append("<doc>" + System.getProperty("line.separator"));
  149.  
  150. contents.append("<id>");
  151. contents.append(i);
  152. contents.append("</id>" + System.getProperty("line.separator"));
  153.  
  154. contents.append("<title>");
  155. contents.append(cent.get(i).getTitle());
  156. contents.append("</title>" + System.getProperty("line.separator"));
  157.  
  158. contents.append("<description>");
  159. contents.append(cent.get(i).getDescription());
  160. contents.append("</description>" + System.getProperty("line.separator"));
  161.  
  162. contents.append("<time>");
  163. contents.append(cent.get(i).getTime());
  164. contents.append("</time>" + System.getProperty("line.separator"));
  165.  
  166. contents.append("<tags>");
  167. contents.append(cent.get(i).getTags());
  168. contents.append("</tags>" + System.getProperty("line.separator"));
  169.  
  170. contents.append("<geo>" + System.getProperty("line.separator"));
  171.  
  172. contents.append("<lat>");
  173. if (cent.get(i).getLat() != null) contents.append(cent.get(i).getLat());
  174. contents.append("</lat>" + System.getProperty("line.separator"));
  175.  
  176. contents.append("<lng>");
  177. if (cent.get(i).getLng() != null) contents.append(cent.get(i).getLng());
  178. contents.append("</lng>" + System.getProperty("line.separator"));
  179.  
  180. contents.append("</geo>" + System.getProperty("line.separator"));
  181.  
  182. contents.append("<event>");
  183. contents.append(cent.get(i).getEvent());
  184. contents.append("</event>" + System.getProperty("line.separator"));
  185.  
  186. contents.append("</doc>" + System.getProperty("line.separator"));
  187. }
  188. contents.append("</collection>" + System.getProperty("line.separator") + System.getProperty("line.separator"));
  189.  
  190.  
  191. Writer writer = null;
  192.  
  193. try {
  194. //File fileOutput = new File("output.trectext");
  195. File fileOutput = new File(args[1]);
  196. writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileOutput), "UTF8"));
  197. writer.write(contents.toString());
  198. } catch (FileNotFoundException e) {
  199. e.printStackTrace();
  200. } catch (IOException e) {
  201. e.printStackTrace();
  202. } finally {
  203. try {
  204. if (writer != null) {
  205. writer.close();
  206. }
  207. } catch (IOException e) {
  208. e.printStackTrace();
  209. }
  210. }
  211.  
  212.  
  213.  
  214. }
  215.  
  216. /**
  217. * Flickr structured document.
  218. *
  219. */
  220. public static class FlickrDoc
  221. {
  222. //private String type;
  223. private String id;
  224. private String title;
  225. private String description;
  226. private String time;
  227. private String tags;
  228. private String latitude;
  229. private String longitude;
  230. private String event;
  231.  
  232. public void setId(String newId)
  233. {
  234. id = newId;
  235. }
  236. public String getId()
  237. {
  238. return id;
  239. }
  240.  
  241. public void setTitle(String newTitle)
  242. {
  243. title = newTitle;
  244. }
  245. public String getTitle()
  246. {
  247. return title;
  248. }
  249.  
  250. public void setDescription(String newDescription)
  251. {
  252. description = newDescription;
  253. }
  254. public String getDescription()
  255. {
  256. return description;
  257. }
  258.  
  259. public void setTime(String newTime)
  260. {
  261. time = newTime;
  262. }
  263. public String getTime()
  264. {
  265. return time;
  266. }
  267.  
  268. public void setTags(String newTags)
  269. {
  270. tags = newTags;
  271. }
  272. public String getTags()
  273. {
  274. return tags;
  275. }
  276.  
  277. public void setLatitude(String newLatitude)
  278. {
  279. latitude = newLatitude;
  280. }
  281. public String getLatitude()
  282. {
  283. return latitude;
  284. }
  285.  
  286. public void setLongitude(String newLongitude)
  287. {
  288. longitude = newLongitude;
  289. }
  290. public String getLongitude()
  291. {
  292. return longitude;
  293. }
  294.  
  295. public void setEvent(String newEvent)
  296. {
  297. event = newEvent;
  298. }
  299. public String getEvent()
  300. {
  301. return event;
  302. }
  303. }
  304.  
  305. public static class Centroid
  306. {
  307. private String title;
  308. private String description;
  309. private String time;
  310. private String tags;
  311. private String latitude;
  312. private String longitude;
  313. private String event;
  314.  
  315.  
  316. public Centroid(String newTitle, String newDescription, String newTime, String newTags, String newEvent) {
  317. title = newTitle;
  318. description = newDescription;
  319. tags = newTags;
  320. time = newTime;
  321. event = newEvent;
  322. }
  323.  
  324. public void setTitle(String newTitle)
  325. {
  326. title = newTitle;
  327. }
  328.  
  329. public void appendTitle(String newTitle)
  330. {
  331. title = title + " " + newTitle;
  332. }
  333.  
  334. public String getTitle()
  335. {
  336. return title;
  337. }
  338.  
  339. public void setDescription(String newDescription)
  340. {
  341. description = newDescription;
  342. }
  343.  
  344. public void appendDescription(String newDescription)
  345. {
  346. description = description + " " + newDescription;
  347. }
  348.  
  349. public String getDescription()
  350. {
  351. return description;
  352. }
  353.  
  354. public void setTime(String newTime)
  355. {
  356. time = newTime;
  357. }
  358.  
  359. public void appendTime(String newTime)
  360. {
  361. time = time + " " + newTime;
  362. }
  363.  
  364. public void averageTime()
  365. {
  366. String[] timeValues = time.split(" ");
  367. int sum = 0;
  368. for (int i = 0; i < timeValues.length; i++) {
  369. sum = sum + Integer.parseInt(timeValues[i]);
  370. }
  371. time = Integer.toString(sum / timeValues.length);
  372. }
  373.  
  374. public String getTime()
  375. {
  376. return time;
  377. }
  378.  
  379. public void setTags(String newTags)
  380. {
  381. tags = newTags;
  382. }
  383.  
  384. public void appendTags(String newTags)
  385. {
  386. tags = tags + " " + newTags;
  387. }
  388.  
  389. public String getTags()
  390. {
  391. return tags;
  392. }
  393.  
  394. public void setLat(String newLatitude)
  395. {
  396. latitude = newLatitude;
  397. }
  398.  
  399. public void appendLat(String newLatitude)
  400. {
  401. latitude = latitude + " " + newLatitude;
  402. }
  403.  
  404. public void averageLat()
  405. {
  406. if (latitude!=null) {
  407. if (!latitude.equals("null")) {
  408. String[] latValues = latitude.split(" ");
  409. float sum = 0;
  410. int counter = 0;
  411. for (int i = 0; i < latValues.length; i++) {
  412. if (!latValues[i].equals("null")) {
  413. sum = sum + Float.valueOf(latValues[i].trim()).floatValue();
  414. counter = counter +1;
  415. }
  416. }
  417. latitude = Float.toString(sum / (float) counter);
  418.  
  419. }
  420. }
  421. }
  422.  
  423. public String getLat()
  424. {
  425. return latitude;
  426. }
  427.  
  428. public void setLng(String newLongitude)
  429. {
  430. longitude = newLongitude;
  431. }
  432.  
  433. public void appendLng(String newLongitude)
  434. {
  435. longitude = longitude + " " + newLongitude;
  436. }
  437.  
  438. public void averageLng()
  439. {
  440. if (longitude!=null) {
  441. if (!longitude.equals("null")) {
  442. String[] lngValues = longitude.split(" ");
  443. float sum = 0;
  444. int counter = 0;
  445. for (int i = 0; i < lngValues.length; i++) {
  446. if (!lngValues[i].equals("null")) {
  447. sum = sum + Float.valueOf(lngValues[i].trim()).floatValue();
  448. counter = counter +1;
  449. }
  450. }
  451. longitude = Float.toString(sum / (float) counter);
  452. }
  453. }
  454. }
  455.  
  456. public String getLng()
  457. {
  458. return longitude;
  459. }
  460.  
  461. public void setEvent(String newEvent)
  462. {
  463. event = newEvent;
  464. }
  465.  
  466. public String getEvent()
  467. {
  468. return event;
  469. }
  470. }
  471. }
Add Comment
Please, Sign In to add comment