Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package imdb;
- //72169 == actresses -> roles est une date ? apparement oui...
- // RE INIT LES DONNEES A CHAQUE TOUR DE BOUCLE !!!
- //actors.list : ligne 6956365 , 20595967 -> changer le { en espace car mauvais encodage
- //bug episode si le nom est une année ? ligne 131925 de actor.list
- //INSERT NULL POUR TOUS LES TRUCS 0
- /*
- export CLASSPATH=$CLASSPATH:/usr/share/java/mysql-connector-java.jar
- */ //!!!!!!!!!!!!!!!!!
- import java.util.*;
- import java.util.Date;
- import java.io.BufferedReader;
- import java.sql.*;
- import java.io.FileReader;
- import java.io.IOException;
- import java.util.regex.*;
- public class Main{
- public static void main(String[] args) {
- final String JDBC_DRIVER = "com.mysql.jdbc.Driver";
- final String DB_URL = "jdbc:mysql://localhost/maindb";
- final String USER = "root";
- final String PASS = "";
- Connection conn = null;
- PreparedStatement stmt = null;
- try{
- Class.forName("com.mysql.jdbc.Driver");
- System.out.println("Connecting to database...");
- conn = DriverManager.getConnection(DB_URL,USER,PASS);
- conn.setAutoCommit(false); // !!!
- }
- catch(Exception e){ e.printStackTrace();}
- getRatings
- /*getMoviesInfo("movies.list",stmt,conn);
- System.out.println("MOVIE DONE");
- getMoviesInfo("language.list",stmt,conn);
- System.out.println("LANGUE DONE");
- getMoviesInfo("genres.list",stmt,conn);
- System.out.println("GENRE DONE");
- getMoviesInfo("countries.list",stmt,conn);
- System.out.println("COUNTRY DONE");*/
- //getPersonInfo("actors.list",stmt,conn);
- /*getPersonInfo("actors.list",stmt,conn);
- getPersonInfo("actresses.list",stmt,conn);
- getPersonInfo("writers.list",stmt,conn);
- getPersonInfo("directors.list",stmt,conn);*/
- }
- static void getMoviesInfo(String file,PreparedStatement stmt,Connection conn){
- /* récupère toutes les infos concernant une oeuvre (nom de l'oeuvre,nom
- * et saison/numéro de l'épisode si il y en a, la langue et le pays dans
- * lequel l'oeuvre a été tournée */
- try {
- BufferedReader br = new BufferedReader(new FileReader((file)));
- String info = "", line = "", sql_query="",identifiant="", table="";
- int beginYear=-1, endYear =-1, workYear=-1,index = 0, i=0,beginLine=0,endLine=0;
- String ep[] = new String[2], name[] = new String[3];
- ResultSet rs =null;
- PreparedStatement stmtA =null,stmtB =null,stmtC =null,stmtD =null, stmtE=null, stmtF=null;
- sql_query = "REPLACE INTO Oeuvre(identifiant,titre,annee,note) VALUES(?,?,?,?)";
- stmtA = conn.prepareStatement(sql_query);
- sql_query = "REPLACE INTO Episode(identifiant, numeroSaison, numeroEpisode, date, titreEpisode) VALUES(?,?,?,?,?)";
- stmtB = conn.prepareStatement(sql_query);
- sql_query = "REPLACE INTO Serie(identifiant,anneeFin) VALUES(?,?)";
- stmtC = conn.prepareStatement(sql_query);
- sql_query = "REPLACE INTO Film(identifiant) VALUES(?)";
- stmtD = conn.prepareStatement(sql_query);
- if( file.equals("countries.list") )
- table="Pays";
- else if(file.equals("language.list"))
- table="Langue";
- else if(file.equals("genres.list"))
- table="Genre";
- sql_query = "REPLACE INTO "+table+"(nom) VALUES(?)";
- stmtE = conn.prepareStatement(sql_query);
- sql_query = "INSERT INTO Oeuvre"+table+"(identifiant,nom) VALUES(?,?)";
- stmtF = conn.prepareStatement(sql_query);
- if(file.equals("genres.list")){
- beginLine = 383;endLine=2490750;
- }
- else if(file.equals("movies.list")){
- beginLine = 15;endLine=4270540;
- }
- else{
- beginLine = 14; endLine = 2018390;
- if(file.equals("countries.list"))
- endLine = 2050240;
- }
- for(i=0; i<beginLine;++i)
- br.readLine();
- while ((line = br.readLine()) != null && i<endLine) {
- index = line.length()-1;
- while(line.charAt(index) != ' ' && line.charAt(index) != '\t'){
- info = line.charAt(index)+info;
- index--;
- }
- if(file.equals("movies.list") ){
- if(isInteger(info.substring(0, 4)))
- beginYear = Integer.parseInt(info.substring(0, 4));
- if( info.length() == 9){
- workYear=beginYear;
- if(isInteger(info.substring(5,9)))
- endYear = Integer.parseInt(info.substring(5, 9));
- else
- endYear=0;
- }
- }
- name = getName(line); //name[0] = nom, name[1] = série ou film
- if(name[1].equals("0")) // si ="0" c'est une série
- ep = getEpisode(line, index);
- else
- workYear=beginYear;
- // name = titre, ep[0] = titre ep, ep[1]= saison ep.
- if( file.equals("movies.list") && (1999 < workYear) && (workYear < 2011) && beginYear<2011 && beginYear>1999){
- //c'est une série
- if(name[1].equals("0") && !(ep[0].equals("SUSPENDED"))){
- identifiant=name[0]+name[3]+ep[0]+ep[1];
- stmtA.setString(1, identifiant);
- stmtA.setString(2, name[0]);
- stmtA.setInt(3,workYear);
- stmtA.setObject(4, null);
- stmtA.addBatch();
- if(((!ep[0].equals("") || !ep[1].equals(" ")))){
- stmtB.setString(1,identifiant);
- if(!ep[1].equals(" ")){
- String[] tmp = ep[1].split(" ");
- stmtB.setInt(2,Integer.parseInt(tmp[0])); // numéro saison
- stmtB.setInt(3,Integer.parseInt(tmp[1])); // numéro episode
- }
- else{
- stmtB.setObject(2,null);
- stmtB.setObject(3,null); // numéro episode
- }
- if(beginYear!=-1)
- stmtB.setInt(4,beginYear); // date de l'épisode
- else
- stmtB.setObject(4,null);
- if(!ep[0].equals(""))
- stmtB.setString(5,ep[0]);
- else
- stmtB.setObject(5, null);
- stmtB.addBatch();
- }
- stmtC.setString(1,identifiant);
- if(endYear!=-1){
- stmtC.setInt(2,endYear);
- }
- else {
- stmtC.setObject(2,null);
- }
- stmtC.addBatch();
- }
- // c'est un film
- else{
- identifiant=name[0]+name[3];
- stmtA.setString(1, identifiant);
- stmtA.setString(2, name[0]);
- stmtA.setInt(3,workYear);
- stmtA.setObject(4, null);
- stmtA.addBatch();
- stmtD.setString(1,identifiant);
- stmtD.addBatch();
- }
- }
- else if(!file.equals("movies.list") && isInteger(name[2]) && ( Integer.parseInt(name[2]) > 1999 ) && ( Integer.parseInt(name[2])<2011 ) && !(ep[0].equals("SUSPENDED")) ){
- if(name[1].equals("0"))
- identifiant=name[0]+name[3]+ep[0]+ep[1];
- else
- identifiant=name[0]+name[3]+ep[0]+ep[1];
- insertCouLangGen(info,name[1],identifiant,stmtE,stmtF);
- }
- info = "";
- beginYear=-1;endYear=-1;
- i++;
- if(i%25000==0){
- if(file.equals("movies.list")){
- stmtA.executeBatch();
- stmtB.executeBatch();
- stmtC.executeBatch();
- stmtD.executeBatch();
- }
- else{
- stmtE.executeBatch();
- stmtF.executeBatch();
- }
- conn.commit( );
- System.out.println(i);
- }
- }
- br.close();
- conn.commit( );
- }
- catch (IOException e){e.printStackTrace();}
- catch(Exception e){ e.printStackTrace();}
- }
- static String[] getName(String line){
- String name ="", num = "", idYear="";
- String isMovie = "1";
- int index=0;
- while(!isInteger(num) && index<line.length()-1 && !num.equals("????")){
- while((line.charAt(index)!= '(' && index<line.length()-1) || index==1){
- name+= line.charAt(index);
- index++;
- }
- num = line.substring(index+1, index+5);
- index++;
- }
- if(line.charAt(index+4)!=')'){
- index+=4;
- while(line.charAt(index)!=')'){
- num+=line.charAt(index);
- index++;
- }
- }
- idYear=num;
- if(name.charAt(0)== '\"'){
- isMovie = "0";
- name = name.substring(1, name.length()-2);
- }
- String res[] = new String[4];
- res[0] = name;
- res[1] = isMovie;
- res[2]= num;
- res[3]= idYear;
- return res;
- }
- static String[] getEpisode(String line, int index){
- String episode="", numEpisode="", saison ="", ep="";
- String res[] = new String[2];
- while(line.charAt(index) != '{' && index>0){
- index--;
- }
- //on a un épisode
- if(index != 0){
- index++;
- while(line.charAt(index) != '}'){
- episode += line.charAt(index);
- index++;
- }
- //on a un numSaison/NumEpisode
- if(episode.indexOf('#') != -1){
- Pattern p = Pattern.compile("(^.*)?(?=\\(#)(\\(#([0-9]*).([0-9]*)\\))");
- Matcher m = p.matcher(episode);
- if(m.matches()){ // GROUPE 1 = NOM, groupe 2 = tout le (#...), groupe 3 = saison, groupe 4 = episode;
- numEpisode = m.group(2);
- saison = m.group(3);
- ep = m.group(4);
- }
- //on enleve le numSaison/numEp de nom de l'épisode
- episode = episode.substring(0, episode.length()-numEpisode.length());
- }
- }
- res[0] = episode;
- res[1] = saison+ " "+ ep;
- return res;
- }
- static void insertCouLangGen(String info, String isMovie, String identifiant, PreparedStatement stmtA, PreparedStatement stmtB){
- try{
- stmtA.setString(1, info);
- stmtA.addBatch();
- stmtB.setString(1, identifiant);
- stmtB.setString(2, info);
- stmtB.addBatch();
- stmtB.executeBatch();
- }
- catch(SQLException se){se.printStackTrace();}
- }
- static void getPersonInfo(String file,PreparedStatement stmt,Connection conn){
- try {
- String work[]={}, name[]={};
- BufferedReader br = new BufferedReader(new FileReader((file)));
- int beginLine=0, i=0, endLine=0, identifiant=0;
- String line="", info="", sql_query="";
- ResultSet rs = null;
- if(file.equals("directors.list")){
- beginLine = 235;endLine=3280382;
- }
- else if(file.equals("actors.list")){
- beginLine = 239;endLine=21562311;
- }
- else if(file.equals("actresses.list")){
- beginLine=13022040; endLine=13022047;
- }
- else if(file.equals("writers.list")){
- beginLine=303;endLine=5224334;
- }
- for(i=0; i<beginLine;++i)
- br.readLine();
- sql_query="INSERT INTO Personne(numero,nom,prenom,genre) VALUES(?,?,?,?)";
- stmt = conn.prepareStatement(sql_query);
- PreparedStatement stmtA =null;
- sql_query="INSERT INTO Acteur(numero,nom,prenom) VALUES(?,?,?)";
- stmtA = conn.prepareStatement(sql_query);
- PreparedStatement stmtB=null;
- sql_query="SELECT MAX(numero) FROM Personne";
- stmtB = conn.prepareStatement(sql_query);
- PreparedStatement stmtC=null;
- sql_query = "REPLACE INTO OeuvreActeur(identifiant,nom,prenom,numero) SELECT e.identifiant,?,?,? "
- +"FROM Episode AS e, Oeuvre AS o WHERE e.titreEpisode= ? AND e.date = ?";
- stmtC = conn.prepareStatement(sql_query);
- // work[0] = nom oeuvre
- // work[1] = nom episode
- // work[2] = saison/num
- // work[3] = année de début
- // work[4] = si c un film/série
- // name[0] = nom
- // name[1] = prénom
- Date date = new Date();
- System.out.println(date.toString());
- while ((line = br.readLine()) != null && i<endLine) {
- //System.out.println(i);
- if(line != null && !line.isEmpty() && line.charAt(0)!=' ' && line.charAt(0)!= '\t' && line.charAt(0)!= '0' ){
- name = getPersonName(line);
- identifiant++;
- //insert dans Personne
- stmt.setInt(1, identifiant);
- stmt.setString(2,name[0]);
- stmt.setString(3,name[1]);
- if(file.equals("actors.list"))
- stmt.setString(4, "men");
- else
- stmt.setString(4, "woman");
- stmt.addBatch();
- // insert dans acteur
- stmtA.setInt(1,identifiant);
- stmtA.setString(2, name[0]);
- stmtA.setString(3, name[1]);
- stmtA.addBatch();
- }
- if(line != null && !line.isEmpty()){
- work = getWork(line, i);
- if((file.equals("actors.list") || file.equals("actresses.list")) && isInteger(work[3]) && Integer.parseInt(work[3])>1999&& Integer.parseInt(work[3])<2011){
- info = getRole(line);
- //c'est une série
- if(work[4]=="0"){
- stmtC.setString(1,name[0]);
- stmtC.setString(2,name[1]);
- stmtC.setInt(3,identifiant);
- stmtC.setString(4,work[1]);
- if(work[3].equals("????"))
- stmtC.setObject(5, null);
- else
- stmtC.setInt(5,Integer.parseInt(work[3]));
- stmtC.addBatch();
- }
- }
- // A VERIF POUR PAS RAJOUTER UN WORK AU SUIVANT QUI N'EN AURAIT PAS
- // + P-E QU'IL FAUT SAVOIR SI C UN FILM/SERIE
- }
- i++;
- System.out.println(i);
- if(i%50000==0){
- date = new Date();
- System.out.println(i+ " "+date.toString());
- stmt.executeBatch();
- stmtA.executeBatch();
- stmtC.executeBatch();
- conn.commit();
- }
- }
- stmt.executeBatch();
- stmtA.executeBatch();
- stmtC.executeBatch();
- conn.commit();
- br.close();
- conn.close();
- }
- catch (IOException e) {e.printStackTrace();}
- catch(Exception e){ e.printStackTrace();}
- }
- static String getRole(String line){
- int index = line.length()-1;
- String role = "";
- while(index>0){
- if(line.charAt(index)==']'){
- index--;
- while(line.charAt(index)!='[' && index>0){
- role = line.charAt(index) + role;
- index--;
- }
- if(index==0)
- role="";
- return role;
- }
- index--;
- }
- return role;
- }
- static String[] getWork(String line, int i){
- String workEp="", numEp="";
- if(line.charAt(0) != '\t'){ // on est sur une ligne où il y a le nom/prénom
- int index=0;
- while(line.charAt(index)!='\t')
- index++;
- line = line.substring(index+1, line.length());
- }
- line = line.replaceAll("^\\s+", ""); // on enlève les tabs de gauche
- String name[] = getName(line);
- if(name[1].equals("0")){
- String ep[] = getEpisode(line, line.length()-1);
- workEp = ep[0];
- numEp= ep[1];
- }
- else
- name[0]= name[0].substring(0, name[0].length()-1);
- String res[] = new String[5];
- res[0]= name[0]; // nom oeuvre
- res[1]= workEp; // nom episode
- res[2]= numEp; // saison/num
- res[3]= name[2]; // année de début
- res[4]=name[1]; // si c'est un film/série
- return res;
- }
- static String[] getPersonName(String line){
- String firstname ="", name="";
- int index =0;
- while(true){
- if(line.charAt(index)==',' || line.charAt(index)=='\t')
- break;
- name+= line.charAt(index);
- index++;
- }
- if(line.charAt(index)==','){
- index+=2;
- while(line.charAt(index)!=' ' && line.charAt(index)!= '\t'){
- firstname+= line.charAt(index);
- index++;
- }
- }
- String res[] = new String[2];
- res[0]= name;
- res[1]= firstname;
- return res;
- }
- static void getRatingInfo(String file,PreparedStatement stmt,Connection conn){
- try {
- BufferedReader br = new BufferedReader(new FileReader((file)));
- int beginLine = 30, i=0, votes=0, year = 0;
- float score=0.0f;
- String line="", title="", production="";
- for(i=0; i<beginLine;++i)
- br.readLine();
- while ((line = br.readLine()) != null) {
- if(line.length()!=0 && line.charAt(0) == ' '){
- Pattern p = Pattern.compile("\\s*([0-9\\.*]{10})\\s*([0-9]*)(\\s*[0-9]+\\.[0-9]\\s*)([^\\(]*)?(\\(([^\\(]*)\\))?(\\s\\{(.*)\\})?");
- Matcher m = p.matcher(line);
- if(m.matches()){
- title = m.group(4);
- score=Float.parseFloat(m.group(3));
- votes =Integer.parseInt(m.group(2));
- production=m.group(1);
- // fonctionne pas car il y 2016/I... a voir si il faut le prendre
- //year = Integer.parseInt(m.group(6));
- System.out.println(i + " || " + score + " || " +title + " || " +votes);
- }
- }
- i++;
- }
- }
- catch (IOException e){e.printStackTrace();}
- }
- static boolean isInteger( String input ){
- try{
- Integer.parseInt( input );
- return true;
- }
- catch( Exception e){
- return false;
- }
- }
- };
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement