Jump to content

Wikipedia:People by year/SQL for table

From Wikipedia, the free encyclopedia
DROP TABLE IF EXISTS temp_peopleyr;

CREATE TABLE temp_peopleyr
SELECT DISTINCT cur_id AS p_id, cur_title AS p_title, '0000' AS y1, '0000' AS y2, '00' AS p_cats, '00000' AS p_age, cur_title AS p_sortkey, 0 AS p_update
FROM categorylinks, cur
WHERE (cl_to LIKE '%deaths' OR cl_to LIKE '%births')
AND cl_from=cur_id
AND cur_namespace=0
AND cur_is_redirect=0
AND cl_sortkey NOT LIKE '*%'
ORDER BY cl_sortkey
LIMIT 100000;

ALTER TABLE temp_peopleyr ADD PRIMARY KEY (p_id);
ALTER TABLE temp_peopleyr ADD COLUMN p_categories VARCHAR(255);

# Adds years

UPDATE temp_peopleyr, categorylinks
SET y1=LEFT(cl_to, 4), p_sortkey=cl_sortkey
WHERE p_id=cl_from  AND cl_to LIKE '%births';

UPDATE temp_peopleyr, categorylinks
SET y2=LEFT(cl_to, 4), p_sortkey=cl_sortkey
WHERE p_id=cl_from  AND cl_to LIKE '%deaths';

UPDATE temp_peopleyr
SET p_age=y2-y1;

UPDATE temp_peopleyr
SET p_age=2004-y1
WHERE y2=0000;

DROP TABLE IF EXISTS temp_peoplecatcount;

CREATE TABLE temp_peoplecatcount
SELECT p_id AS cc_id, Count(*) AS cc_num
FROM temp_peopleyr, categorylinks
WHERE p_id=cl_from
AND cl_to NOT LIKE '%deaths'
AND cl_to NOT LIKE '%births'
# ignore categories added through templates
AND cl_to <> 'People_stubs'
AND cl_to <> 'Writer_stubs'
AND cl_to <> 'Language_stubs'
AND cl_to <> '1911_Britannica'
AND cl_to <> 'NPOV_disputes'
AND cl_to <> 'Unformatted_ice_hockey_player'
AND cl_to <> 'Substubs'
AND cl_to <> 'Articles_to_be_split'
AND cl_to <> 'Cleanup'
AND cl_to <> 'Pages_on_votes_for_deletion'
AND cl_to <> 'Templates_for_deletion'
AND cl_to <> 'Disambiguation'
GROUP BY p_title LIMIT 1000000;

UPDATE temp_peopleyr, temp_peoplecatcount
SET p_cats=cc_num
WHERE p_id=cc_id;

DROP TABLE IF EXISTS temp_peoplecatcount;

Notes

[edit]
  • Missing: Adding names of other categories into "p_categories".
  • Marginally suitable for pre-1000.