Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #############################################################################
- #
- # Andrew G. West - notes.txt - Notes about the WP:MED stats findings
- #
- #############################################################################
- Top ten WP's by views:
- en English
- es Spanish
- ru Russian
- ja Japanese
- de German
- fr French
- pt Portuguese
- it Italian
- pl Polish
- zh Chinese
- #############################
- mysql> SELECT SUBSTRING(PAGE,1,2),COUNT(*),SUM(year) FROM wp_med_monthly
- GROUP BY SUBSTRING(PAGE,1,2) ORDER BY COUNT(*) DESC;
- +---------------------+----------+------------+
- | SUBSTRING(PAGE,1,2) | COUNT(*) | SUM(year) |
- +---------------------+----------+------------+
- | en | 29072 | 2277403987 |
- | de | 7761 | 348355902 |
- | fr | 6390 | 218532955 |
- | es | 6367 | 658556909 |
- | pl | 5999 | 147488961 |
- | it | 5677 | 147115245 |
- | pt | 5269 | 212465002 |
- | ru | 4865 | 150162878 |
- | nl | 4391 | 75543607 |
- | ja | 4303 | 253740601 |
- | ar | 4220 | 33228397 |
- | sv | 3661 | 30557682 |
- | zh | 3426 | 36225111 |
- | ca | 2992 | 4764627 |
- | fa | 2843 | 17695775 |
- | fi | 2825 | 22876930 |
- | he | 2389 | 16288412 |
- | cs | 2360 | 21955957 |
- | uk | 2291 | 11050301 |
- | no | 2186 | 12113834 |
- | tr | 1822 | 33904861 |
- | sr | 1775 | 2665627 |
- | ko | 1692 | 6543526 |
- | si | 1600 | 4251231 |
- | hu | 1528 | 12119560 |
- | da | 1433 | 6702468 |
- | id | 1431 | 22732356 |
- | hr | 1396 | 8679378 |
- | th | 1278 | 11120603 |
- | bg | 1242 | 7098983 |
- | ro | 1225 | 7805024 |
- | sl | 1191 | 3063597 |
- | gl | 1173 | 614948 |
- | lt | 1005 | 4123914 |
- | sk | 1000 | 4279143 | [snip!]
- 203 rows in set (0.17 sec)
- #############################
- mysql> SELECT SUM(year) FROM wp_med_monthly;
- +------------+
- | SUM(year) |
- +------------+
- | 4,880,133,770 |
- +------------+
- 1 row in set (0.11 sec)
- #############################
- mysql> SELECT PAGE,COUNT(*)+1 FROM wp_med_map m, wp_med_monthly p
- WHERE m.en_id=p.id GROUP BY en_id ORDER BY COUNT(*) DESC LIMIT 100;
- ADD 1 TO ACCOUNT FOR ENGLISH ENTRY NOT IN MAP TABLE!!!
- +------------------------------------------------------+------------+
- | PAGE | COUNT(*)+1 |
- +------------------------------------------------------+------------+
- | en Leonardo_da_Vinci | 180 |
- | en Human | 165 |
- | en Human_biology | 165 |
- | en Medicine | 148 |
- | en Medical_practice | 148 |
- | en Blood | 137 |
- | en Fungus | 134 |
- | en HIV/AIDS | 128 |
- | en Brain_cell | 127 |
- | en Brain_function | 127 |
- | en Bacteria | 125 |
- | en Metastatic_tuberculous_ulcer | 123 |
- | en Tuberculosis | 123 |
- | en Tubercular_scar | 123 |
- | en Louis_Pasteur | 122 |
- | en Cancer_patient | 119 |
- | en Cancer | 119 |
- | en Anatomy | 117 |
- | en Mosquito_bite | 115 |
- | en World_Health_Organization | 115 |
- | en Mosquito | 115 |
- | en Diabetes_mellitus | 113 |
- | en Disorder_(medicine) | 113 |
- | en Disease | 113 |
- | en Illness | 113 |
- | en Head | 111 |
- | en Cerebral_malaria | 111 |
- | en Virus | 111 |
- | en Malaria | 111 |
- | en Bromine | 110 |
- | en Death | 110 |
- | en Health | 109 |
- | en Cholera | 105 |
- | en Neck | 103 |
- | en Influenza | 103 |
- | en Full_term | 101 |
- | en Pregnancy | 101 |
- | en Biochemistry | 101 |
- | en Avicenna | 101 |
- | en Angelina_Jolie_cancer_treatment | 98 |
- | en Abortion | 97 |
- | en Sexual_intercourse | 96 |
- | en Hippocrates | 96 |
- | en Hospital | 95 |
- | en International_Red_Cross_and_Red_Crescent_Movement | 94 |
- | en X-Ray | 92 |
- | en Genetics | 92 |
- | en X-ray | 92 |
- | en Suicide | 91 |
- | en Alzheimer's_disease | 90 |
- | en Pneumonia | 90 |
- | en Microbiology | 89 |
- | en Lip | 88 |
- | en Hormone | 88 |
- | en Sleep | 88 |
- | en Physiology | 88 |
- | en Obesity | 87 |
- | en Smallpox | 87 |
- | en Variola_major | 87 |
- | en Pharmaceutical_drug | 86 |
- | en Condom | 86 |
- [snip]!
- #############################
- mysql> CREATE TEMPORARY TABLE IF NOT EXISTS wp_med_temp AS
- (SELECT PAGE,COUNT(*)+1 as A FROM wp_med_map m, wp_med_monthly p
- WHERE m.en_id=p.id GROUP BY en_id ORDER BY COUNT(*));
- mysql> SELECT A,COUNT(*) FROM wp_med_temp GROUP BY A ORDER BY A DESC;
- +-----+----------+
- | A | COUNT(*) |
- +-----+----------+
- | 180 | 1 |
- | 165 | 2 |
- | 148 | 2 |
- | 137 | 1 |
- | 134 | 1 |
- | 128 | 1 |
- | 127 | 2 |
- | 125 | 1 |
- | 123 | 3 |
- | 122 | 1 |
- | 119 | 2 |
- | 117 | 1 |
- | 115 | 3 |
- | 113 | 4 |
- | 111 | 4 |
- | 110 | 2 |
- | 109 | 1 |
- | 105 | 1 |
- | 103 | 2 |
- | 101 | 4 |
- | 98 | 1 |
- | 97 | 1 |
- | 96 | 2 |
- | 95 | 1 |
- | 94 | 1 |
- | 92 | 3 |
- | 91 | 1 |
- | 90 | 2 |
- | 89 | 1 |
- | 88 | 4 |
- | 87 | 3 |
- | 86 | 2 |
- | 85 | 3 |
- | 84 | 6 |
- | 83 | 3 |
- | 82 | 8 |
- | 81 | 5 |
- | 80 | 5 |
- | 79 | 2 |
- | 78 | 5 |
- | 77 | 7 |
- | 76 | 6 |
- | 75 | 6 |
- | 74 | 6 |
- | 73 | 3 |
- | 72 | 6 |
- | 71 | 9 |
- | 70 | 5 |
- | 69 | 5 |
- | 68 | 11 |
- | 67 | 6 |
- | 66 | 10 |
- | 65 | 9 |
- | 64 | 11 |
- | 63 | 13 |
- | 62 | 11 |
- | 61 | 13 |
- | 60 | 4 |
- | 59 | 11 |
- | 58 | 15 |
- | 57 | 17 |
- | 56 | 16 |
- | 55 | 8 |
- | 54 | 18 |
- | 53 | 22 |
- | 52 | 17 |
- | 51 | 22 |
- | 50 | 23 |
- | 49 | 15 |
- | 48 | 20 |
- | 47 | 27 |
- | 46 | 35 |
- | 45 | 32 |
- | 44 | 36 |
- | 43 | 36 |
- | 42 | 25 |
- | 41 | 24 |
- | 40 | 33 |
- | 39 | 25 |
- | 38 | 38 |
- | 37 | 28 |
- | 36 | 59 |
- | 35 | 35 |
- | 34 | 51 |
- | 33 | 78 |
- | 32 | 44 |
- | 31 | 65 |
- | 30 | 46 |
- | 29 | 69 |
- | 28 | 76 |
- | 27 | 79 |
- | 26 | 96 |
- | 25 | 90 |
- | 24 | 77 |
- | 23 | 126 |
- | 22 | 132 |
- | 21 | 166 |
- | 20 | 164 |
- | 19 | 230 |
- | 18 | 186 |
- | 17 | 209 |
- | 16 | 251 |
- | 15 | 261 |
- | 14 | 341 |
- | 13 | 292 |
- | 12 | 346 |
- | 11 | 402 |
- | 10 | 429 |
- | 9 | 505 |
- | 8 | 553 |
- | 7 | 677 |
- | 6 | 872 |
- | 5 | 1113 |
- | 4 | 1529 |
- | 3 | 2361 |
- | 2 | 4254 |
- +-----+----------+
- 116 rows in set (0.01 sec)
- REMEMBER THAT PAGES THAT EXIST ONLY IN ENGLISH WILL NOT APPEAR IN THIS LIST!!!
- #############################
- Corresponding to 1/1/2013
- ID=1007
- UNIX_DAY=15706 -- 16070
- Need to convert this to hours = 376944 -- 385703
- And this is the starting point for summing 2013 views BY LANG/PROJECT
- mysql> CREATE TABLE wp_med_projects (PROJECT varchar(127), VIEWS bigint unsigned not null);
- mysql> INSERT INTO wp_med_projects SELECT PROJECT,SUM(HITS) FROM projectcount
- WHERE UNIX_HOUR>=376944 AND UNIX_HOUR<=385703 AND PROJECT NOT LIKE "%.%" GROUP BY PROJECT;
- 581 rows in this table, and a primary key is problematic
- #############################
- mysql> SELECT * FROM wp_med_projects ORDER BY VIEWS DESC LIMIT 20;
- mysql> SELECT * FROM wp_med_projects ORDER BY VIEWS DESC LIMIT 20;
- +---------+-------------+
- | PROJECT | VIEWS |
- +---------+-------------+
- | en | 91252704491 |
- | es | 14806944198 |
- | ja | 12535042404 |
- | ru | 12072227602 |
- | de | 11067376677 |
- | fr | 8305441200 |
- | it | 5738581496 |
- | pl | 5691502347 |
- | pt | 5266100175 |
- | zh | 3775941714 |
- | nl | 2198783176 |
- | tr | 1646386888 |
- | ar | 1287936214 |
- | sv | 1242519595 |
- | cs | 922966523 |
- | id | 853940379 |
- | ko | 807918557 |
- | uk | 789594746 |
- | fi | 720451053 |
- | th | 713506494 |
- +---------+-------------+
- 20 rows in set (0.00 sec)
- #############################
- mysql> SELECT SUBSTRING(m.PAGE,1,2),SUM(m.year),(SELECT MAX(VIEWS) FROM
- wp_med_projects WHERE PROJECT=SUBSTRING(m.PAGE,1,2)) as PROJ,
- 100*(SUM(m.year)/(SELECT MAX(VIEWS) FROM wp_med_projects WHERE
- PROJECT=SUBSTRING(m.PAGE,1,2))) as PERCENT FROM wp_med_monthly m
- GROUP BY SUBSTRING(m.PAGE,1,2) ORDER BY SUM(m.year) DESC;
- +-----------------------+-------------+-------------+-------------+
- | SUBSTRING(m.PAGE,1,2) | SUM(m.year) | PROJ | PERCENT |
- +-----------------------+-------------+-------------+-------------+
- | en | 2277403987 | 91252704491 | 2.4957 |
- | es | 658556909 | 14806944198 | 4.4476 |
- | de | 348355902 | 11067376677 | 3.1476 |
- | ja | 253740601 | 12535042404 | 2.0243 |
- | fr | 218532955 | 8305441200 | 2.6312 |
- | pt | 212465002 | 5266100175 | 4.0346 |
- | ru | 150162878 | 12072227602 | 1.2439 |
- | pl | 147488961 | 5691502347 | 2.5914 |
- | it | 147115245 | 5738581496 | 2.5636 |
- | nl | 75543607 | 2198783176 | 3.4357 |
- | zh | 36225111 | 3775941714 | 0.9594 |
- | tr | 33904861 | 1646386888 | 2.0593 |
- | ar | 33228397 | 1287936214 | 2.5800 |
- | sv | 30557682 | 1242519595 | 2.4593 |
- | fi | 22876930 | 720451053 | 3.1754 |
- | id | 22732356 | 853940379 | 2.6621 |
- | cs | 21955957 | 922966523 | 2.3788 |
- | fa | 17695775 | 688385541 | 2.5706 |
- | he | 16288412 | 584759276 | 2.7855 |
- | vi | 12449359 | 662259161 | 1.8798 |
- | hu | 12119560 | 634729654 | 1.9094 |
- | no | 12113834 | 596318064 | 2.0314 |
- | th | 11120603 | 713506494 | 1.5586 |
- | uk | 11050301 | 789594746 | 1.3995 |
- | hr | 8679378 | 277338602 | 3.1295 |
- | el | 7860165 | 322117346 | 2.4402 |
- | ro | 7805024 | 394144067 | 1.9802 |
- | bg | 7098983 | 345599276 | 2.0541 |
- | da | 6702468 | 399676672 | 1.6770 |
- | ko | 6543526 | 807918557 | 0.8099 |
- | ca | 4764627 | 298249613 | 1.5975 |
- | sk | 4279143 | 230244607 | 1.8585 [snip!]
- #############################
- SELECT PROJECT,VARIANCE(HITS) FROM projectcount WHERE UNIX_HOUR>=376944
- AND UNIX_HOUR<=385703 AND PROJECT NOT LIKE "%.%" GROUP BY PROJECT;
- SELECT PROJECT,VARIANCE(HITS) FROM projectcount WHERE UNIX_HOUR>=376944
- AND UNIX_HOUR<=385703 AND (PROJECT LIKE "en" OR PROJECT LIKE "it"
- OR PROJECT LIKE "zh" OR PROJECT LIKE "pt" OR PROJECT LIKE "de"
- OR PROJECT LIKE "fr" OR PROJECT LIKE "pl" OR PROJECT LIKE "ru"
- OR PROJECT LIKE "ja" OR PROJECT LIKE "es") GROUP BY PROJECT;
- +---------+--------------------+
- | PROJECT | VARIANCE(HITS) |
- +---------+--------------------+
- | de | 398996828928.1165 | = 3.9 E11
- | en | 4133472736509.7593 | = 4.1 E12
- | es | 750902541698.0205 |
- | fr | 186084980422.9977 |
- | it | 113402184155.7467 |
- | ja | 347401633589.6532 |
- | pl | 52918051447.4765 |
- | pt | 91186214042.1714 |
- | ru | 435896954210.2971 |
- | zh | 22312185282.3384 |
- +---------+--------------------+
- 10 rows in set (12.58 sec)
- Top "en" variance articles per [top_variance.java]:
- 1.1687814713924304E11 en Ricin
- 1.0235129531805556E11 en Tinnitus
- 5.2550090006354164E10 en Rorschach_test
- 4.7889255083020836E10 en Latent_inhibition
- 4.068264651888889E10 en Scientific_consensus
- 3.6692258284354164E10 en Veganism
- 3.6615616211076385E10 en Vegetarianism
- 3.3384832597833332E10 en Lacto_vegetarianism
- 2.8962499269388885E10 en Mastectomy
- 2.4725881447409725E10 en Patient_Protection_and_Affordable_Care_Act
- 2.2501433001805557E10 en Norovirus
- 2.2254987207020832E10 en Onychophagia
- 2.050006175025E10 en Sarin
- 1.7162366044E10 en Conjugate_gaze_palsy
- 1.6797197597333334E10 en Diaper
- 1.5511770852576387E10 en Leonardo_da_Vinci
- 1.2866544604076387E10 en Asperger_syndrome
- 1.1853506504E10 en Pseudofolliculitis_barbae
- 1.1301288210555557E10 en Bronchitis
- 9.5828020621875E9 en Desomorphine
- 8.810964324520834E9 en Blind_spot_(vision)
- 8.644234368354166E9 en Psychosis
- 8.600064082805555E9 en Influenza
- 8.599932074020834E9 en Bipolar_disorder
- 8.566960627888889E9 en Down_syndrome
- 8.061887144743056E9 en Blue_field_entoptic_phenomenon
- #############################
- Other questions to ask of the data:
- * Articles with greatest month to month variance
- * Articles with greatest inter-country variance
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement