import pandas as pd

Here I've downloaded the results of merging completed by Anton - that's why we have such a strange dataframe name.

df_anton = pd.read_csv('uk_plus_titles.csv')
df_anton.head(10)
Unnamed: 0 id uk_page_title en_page_title uk_page_id
0 0 NaN !_(альбом_С.К.А.Й.) NaN 829565.0
1 1 4540205.0 !_(альбом) b'! (The Dismemberment Plan album)' 2112530.0
2 2 404000.0 !_(значення) b'! (disambiguation)' 1366003.0
3 3 353153.0 !!_(значення) b'!!' 2141483.0
4 4 371.0 !!! b'!!!' 425480.0
5 5 343686.0 !Action_Pact! b'!Action Pact!' 848226.0
6 6 1622767.0 !T.O.O.H.! b'!T.O.O.H.!' 425157.0
7 7 NaN !ФЕСТ NaN 432740.0
8 8 NaN !Чидро NaN 590602.0
9 9 1384337.0 $_(значення) b'$ (disambiguation)' 1369614.0
df_anton['uk_page_id'].isnull().sum()
227639

Here we see that about 20-25% of the rows in the 'uk_page_id' column are NaNs, but it is still better than we've had before - almost 1m with no clue why.

#df_anton[df_anton['uk_page_id'].isnull()]
df_anton.drop(columns=['Unnamed: 0'], inplace=True)
df_anton.shape
(1021050, 4)
df_translations = pd.read_csv('uk_translations.csv')
df_translations.head(2)
Unnamed: 0 uk_page_title uk_translations_count
0 0 Всесвіт 103
1 1 Земля 145
df_translations.drop(columns=['Unnamed: 0'], inplace=True)
df_translations.shape
(1021063, 2)
merged1 = pd.merge(df_anton, df_translations, how='left', on = 'uk_page_title')
merged1.head(10)
id uk_page_title en_page_title uk_page_id uk_translations_count
0 NaN !_(альбом_С.К.А.Й.) NaN 829565.0 NaN
1 4540205.0 !_(альбом) b'! (The Dismemberment Plan album)' 2112530.0 NaN
2 404000.0 !_(значення) b'! (disambiguation)' 1366003.0 NaN
3 353153.0 !!_(значення) b'!!' 2141483.0 NaN
4 371.0 !!! b'!!!' 425480.0 24.0
5 343686.0 !Action_Pact! b'!Action Pact!' 848226.0 NaN
6 1622767.0 !T.O.O.H.! b'!T.O.O.H.!' 425157.0 7.0
7 NaN !ФЕСТ NaN 432740.0 1.0
8 NaN !Чидро NaN 590602.0 0.0
9 1384337.0 $_(значення) b'$ (disambiguation)' 1369614.0 NaN
# We fill it with zeros as it simply means that there are no translations there.
merged1['uk_translations_count'].fillna(0, inplace = True)
merged1.head(2)
id uk_page_title en_page_title uk_page_id uk_translations_count
0 NaN !_(альбом_С.К.А.Й.) NaN 829565.0 0.0
1 4540205.0 !_(альбом) b'! (The Dismemberment Plan album)' 2112530.0 0.0
df_uk_incoming_links = pd.read_csv('uk_incoming_links.csv')
df_uk_incoming_links.head(2)
Unnamed: 0 uk_page_title uk_incoming_links
0 0 ! 67
1 1 !! 42
merged2 = pd.merge(merged1, df_uk_incoming_links, how='left', on = 'uk_page_title')
merged2.head()
id uk_page_title en_page_title uk_page_id uk_translations_count Unnamed: 0 uk_incoming_links
0 NaN !_(альбом_С.К.А.Й.) NaN 829565.0 0.0 54.0 31.0
1 4540205.0 !_(альбом) b'! (The Dismemberment Plan album)' 2112530.0 0.0 NaN NaN
2 404000.0 !_(значення) b'! (disambiguation)' 1366003.0 0.0 55.0 1.0
3 353153.0 !!_(значення) b'!!' 2141483.0 0.0 NaN NaN
4 371.0 !!! b'!!!' 425480.0 24.0 2.0 8.0
merged2.drop(columns=['Unnamed: 0'], inplace=True)
df_uk_langlinks = pd.read_csv('uk_langlinks.csv')
df_uk_langlinks.head(2)
Unnamed: 0 uk_page_id uk_langlinks_count
0 0 1 4
1 1 3 46
merged3 = pd.merge(merged2, df_uk_langlinks, how='left', on = 'uk_page_id')
df_uk_outcoming_links = pd.read_csv('uk_outcoming_links.csv')
df_uk_outcoming_links.head(2)
Unnamed: 0 uk_page_id outcoming_links
0 0 1 18
1 1 2 1
merged4 = pd.merge(merged3, df_uk_outcoming_links, how='left', on = 'uk_page_id')
df_revisions = pd.read_csv('uk_revisions.csv')
df_revisions.head(2)
Unnamed: 0 uk_page_id uk_first_edit uk_last_edit uk_revisions_count uk_minor_revisions_count uk_deleted_revisions
0 0 0 2008-11-02 13:02:01 2010-10-01 13:33:05 2 0.0 0.0
1 1 1 2003-10-20 12:27:01 2006-10-10 06:46:53 4 0.0 0.0
df_revisions.drop(columns=['Unnamed: 0'], inplace=True)
df_revisions.shape
(2427871, 6)
merged5 = pd.merge(merged4, df_revisions, how='left', on = 'uk_page_id')
merged5.head(10)
id uk_page_title en_page_title uk_page_id uk_translations_count uk_incoming_links Unnamed: 0_x uk_langlinks_count Unnamed: 0_y outcoming_links uk_first_edit uk_last_edit uk_revisions_count uk_minor_revisions_count uk_deleted_revisions
0 NaN !_(альбом_С.К.А.Й.) NaN 829565.0 0.0 31.0 NaN NaN 626729.0 40.0 2010-10-14 09:59:01 2017-04-23 15:40:11 43.0 13.0 0.0
1 4540205.0 !_(альбом) b'! (The Dismemberment Plan album)' 2112530.0 0.0 NaN 630796.0 12.0 1662038.0 12.0 2016-01-29 23:11:32 2018-03-06 11:57:03 10.0 1.0 0.0
2 404000.0 !_(значення) b'! (disambiguation)' 1366003.0 0.0 1.0 413086.0 16.0 1076129.0 32.0 2012-09-08 09:20:52 2016-05-18 09:49:18 7.0 5.0 0.0
3 353153.0 !!_(значення) b'!!' 2141483.0 0.0 NaN 639405.0 17.0 1685953.0 18.0 2016-03-11 16:18:49 2016-06-01 20:48:15 5.0 0.0 0.0
4 371.0 !!! b'!!!' 425480.0 24.0 8.0 145622.0 25.0 314423.0 36.0 2009-03-28 07:57:13 2018-04-05 10:46:42 28.0 20.0 0.0
5 343686.0 !Action_Pact! b'!Action Pact!' 848226.0 0.0 4.0 259308.0 7.0 640597.0 32.0 2010-11-05 23:22:33 2017-03-29 18:27:19 25.0 20.0 0.0
6 1622767.0 !T.O.O.H.! b'!T.O.O.H.!' 425157.0 7.0 4.0 145519.0 7.0 314185.0 22.0 2009-03-27 04:59:16 2015-06-26 23:32:41 14.0 10.0 0.0
7 NaN !ФЕСТ NaN 432740.0 1.0 31.0 147527.0 1.0 319919.0 81.0 2009-04-12 13:04:12 2018-04-07 07:10:29 200.0 69.0 0.0
8 NaN !Чидро NaN 590602.0 0.0 4.0 NaN NaN 438278.0 13.0 2009-12-07 02:46:18 2017-09-21 06:10:03 12.0 7.0 0.0
9 1384337.0 $_(значення) b'$ (disambiguation)' 1369614.0 0.0 1.0 413972.0 13.0 1079022.0 10.0 2012-09-14 15:00:11 2014-04-24 16:08:55 5.0 4.0 0.0
df_outcoming_links_translated = pd.read_csv('uk_outcoming_links_translated.csv')
df_outcoming_links_translated.shape
(261460, 3)
merged5.shape
(1021092, 15)
merged6 = pd.merge(merged5, df_outcoming_links_translated, how='left', on = 'uk_page_id')
merged6.head()
id uk_page_title en_page_title uk_page_id uk_translations_count uk_incoming_links Unnamed: 0_x uk_langlinks_count Unnamed: 0_y outcoming_links uk_first_edit uk_last_edit uk_revisions_count uk_minor_revisions_count uk_deleted_revisions Unnamed: 0 outcoming_links_translated
0 NaN !_(альбом_С.К.А.Й.) NaN 829565.0 0.0 31.0 NaN NaN 626729.0 40.0 2010-10-14 09:59:01 2017-04-23 15:40:11 43.0 13.0 0.0 NaN NaN
1 4540205.0 !_(альбом) b'! (The Dismemberment Plan album)' 2112530.0 0.0 NaN 630796.0 12.0 1662038.0 12.0 2016-01-29 23:11:32 2018-03-06 11:57:03 10.0 1.0 0.0 NaN NaN
2 404000.0 !_(значення) b'! (disambiguation)' 1366003.0 0.0 1.0 413086.0 16.0 1076129.0 32.0 2012-09-08 09:20:52 2016-05-18 09:49:18 7.0 5.0 0.0 NaN NaN
3 353153.0 !!_(значення) b'!!' 2141483.0 0.0 NaN 639405.0 17.0 1685953.0 18.0 2016-03-11 16:18:49 2016-06-01 20:48:15 5.0 0.0 0.0 NaN NaN
4 371.0 !!! b'!!!' 425480.0 24.0 8.0 145622.0 25.0 314423.0 36.0 2009-03-28 07:57:13 2018-04-05 10:46:42 28.0 20.0 0.0 31255.0 36.0
merged6.drop(columns=['Unnamed: 0_x', 'Unnamed: 0_y', 'Unnamed: 0'], inplace=True)
merged6.to_csv('final_uk_merge.csv')
def missing_data(data):
    total = data.isnull().sum().sort_values(ascending = False)
    percent = (data.isnull().sum()/data.isnull().count()*100).sort_values(ascending = False)
    return pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data(merged6)
Total Percent
outcoming_links_translated 867258 84.934364
en_page_title 439502 43.042351
id 439502 43.042351
uk_langlinks_count 376767 36.898438
uk_incoming_links 258837 25.349038
outcoming_links 227990 22.328057
uk_deleted_revisions 227719 22.301516
uk_minor_revisions_count 227719 22.301516
uk_revisions_count 227719 22.301516
uk_last_edit 227719 22.301516
uk_first_edit 227719 22.301516
uk_page_id 227639 22.293682
uk_page_title 48 0.004701
uk_translations_count 0 0.000000

Here we try to understand why so many NaNs in the 'outcoming_links_translated'. Firstly, there are only 250k of those compared to the 1m dataframe it was joined to. Secondly, some of the rows just don't appear in the left (merge5) dataframe of the left_join process.

merged6[merged6['outcoming_links_translated'].isnull()]
id uk_page_title en_page_title uk_page_id uk_translations_count uk_incoming_links uk_langlinks_count outcoming_links uk_first_edit uk_last_edit uk_revisions_count uk_minor_revisions_count uk_deleted_revisions outcoming_links_translated
0 NaN !_(альбом_С.К.А.Й.) NaN 829565.0 0.0 31.0 NaN 40.0 2010-10-14 09:59:01 2017-04-23 15:40:11 43.0 13.0 0.0 NaN
1 4540205.0 !_(альбом) b'! (The Dismemberment Plan album)' 2112530.0 0.0 NaN 12.0 12.0 2016-01-29 23:11:32 2018-03-06 11:57:03 10.0 1.0 0.0 NaN
2 404000.0 !_(значення) b'! (disambiguation)' 1366003.0 0.0 1.0 16.0 32.0 2012-09-08 09:20:52 2016-05-18 09:49:18 7.0 5.0 0.0 NaN
3 353153.0 !!_(значення) b'!!' 2141483.0 0.0 NaN 17.0 18.0 2016-03-11 16:18:49 2016-06-01 20:48:15 5.0 0.0 0.0 NaN
5 343686.0 !Action_Pact! b'!Action Pact!' 848226.0 0.0 4.0 7.0 32.0 2010-11-05 23:22:33 2017-03-29 18:27:19 25.0 20.0 0.0 NaN
7 NaN !ФЕСТ NaN 432740.0 1.0 31.0 1.0 81.0 2009-04-12 13:04:12 2018-04-07 07:10:29 200.0 69.0 0.0 NaN
8 NaN !Чидро NaN 590602.0 0.0 4.0 NaN 13.0 2009-12-07 02:46:18 2017-09-21 06:10:03 12.0 7.0 0.0 NaN
9 1384337.0 $_(значення) b'$ (disambiguation)' 1369614.0 0.0 1.0 13.0 10.0 2012-09-14 15:00:11 2014-04-24 16:08:55 5.0 4.0 0.0 NaN
10 NaN $uicideboy$ NaN 2610007.0 6.0 1.0 6.0 14.0 2017-12-22 13:51:57 2018-04-25 14:24:08 9.0 4.0 0.0 NaN
11 NaN &RQ NaN 156371.0 2.0 52.0 2.0 114.0 2007-04-16 19:51:51 2015-09-16 11:01:40 22.0 9.0 0.0 NaN
12 149404.0 '03_Bonnie_&_Clyde b"'03 Bonnie & Clyde" 425479.0 0.0 17.0 13.0 116.0 2009-03-28 07:52:16 2017-11-20 18:10:13 20.0 15.0 0.0 NaN
14 4540381.0 '50s_on_5 b"'50s on 5" 1624538.0 0.0 2.0 1.0 35.0 2013-11-25 10:51:43 2016-11-22 08:08:58 19.0 4.0 0.0 NaN
15 4540383.0 '60s_on_6 b"'60s on 6" 1625253.0 0.0 2.0 1.0 47.0 2013-11-27 08:02:22 2018-02-25 22:55:45 13.0 2.0 0.0 NaN
16 425605.0 '74_Jailbreak b"'74 Jailbreak" 226044.0 0.0 49.0 18.0 91.0 2007-11-16 20:47:42 2017-08-20 15:18:27 32.0 22.0 0.0 NaN
17 891050.0 '92_Tour_EP b"'92 Tour EP" 1707500.0 0.0 47.0 6.0 94.0 2014-05-26 12:53:25 2017-01-31 05:44:39 5.0 2.0 0.0 NaN
18 1047737.0 '98_Live_Meltdown b"'98 Live Meltdown" 1702515.0 0.0 2.0 18.0 13.0 2014-05-19 17:01:10 2015-02-07 19:00:00 4.0 3.0 0.0 NaN
19 3612456.0 'Allelujah!_Don't_Bend!_Ascend! b"'Allelujah! Don't Bend! Ascend!" 2144196.0 0.0 9.0 7.0 31.0 2016-03-14 22:34:19 2017-01-12 18:19:01 4.0 1.0 0.0 NaN
20 NaN 'Cause_You_Are_Young NaN 280158.0 0.0 1.0 5.0 28.0 2008-04-09 14:37:28 2018-05-22 06:22:14 26.0 16.0 0.0 NaN
21 NaN 'Cichlasoma' NaN 357578.0 0.0 15.0 NaN 67.0 2008-09-10 08:08:39 2014-06-19 20:40:19 55.0 19.0 0.0 NaN
22 5527115.0 'Cichlasoma'_salvini b"Salvin's cichlid" 1530738.0 0.0 7.0 13.0 44.0 2013-04-12 14:50:08 2014-12-07 23:56:39 9.0 3.0 0.0 NaN
23 NaN 'Lac_Motion NaN 1499746.0 0.0 4.0 1.0 18.0 2013-02-10 23:57:50 2015-12-31 15:31:47 11.0 5.0 0.0 NaN
24 205891.0 'O_sole_mio b'\xe2\x80\x99O sole mio' 1648497.0 0.0 9.0 40.0 40.0 2014-01-28 16:06:19 2018-02-02 16:34:17 9.0 2.0 0.0 NaN
25 4540515.0 'Round_About_Midnight_at_the_Cafe_Bohemia b"'Round About Midnight at the Cafe Bohemia" 2541846.0 0.0 4.0 2.0 28.0 2017-09-21 22:46:21 2017-12-28 13:56:43 6.0 4.0 0.0 NaN
26 4540516.0 'Round_Midnight_(альбом_Бетті_Картер,_1963) b"'Round Midnight (1963 Betty Carter album)" 2036513.0 0.0 3.0 1.0 41.0 2015-10-16 08:51:55 2017-12-07 12:26:29 19.0 4.0 0.0 NaN
27 NaN 'Абд_Аллаг_'Афіф_ад-Дін-Ефенді NaN 2202101.0 0.0 6.0 NaN 15.0 2016-06-12 12:01:30 2017-04-23 10:34:48 9.0 6.0 0.0 NaN
28 399787.0 (_)_(значення) b'( ) (disambiguation)' 1369629.0 0.0 NaN 5.0 5.0 2012-09-14 15:30:52 2016-08-19 18:46:55 7.0 3.0 0.0 NaN
29 NaN (10003)_1971_UD1 NaN 988044.0 0.0 4.0 7.0 111.0 2011-05-24 11:27:53 2013-12-24 11:00:57 16.0 8.0 0.0 NaN
30 NaN (100048)_1991_TE14 NaN 990133.0 0.0 3.0 3.0 110.0 2011-05-27 10:13:41 2013-12-24 15:31:25 21.0 11.0 0.0 NaN
31 1627099.0 (11435)_1931_UB b'(11435) 1931 UB' 987942.0 0.0 4.0 5.0 110.0 2011-05-24 09:03:54 2013-12-24 10:24:30 14.0 8.0 0.0 NaN
32 2221416.0 (11436)_1969_QR b'(11436) 1969 QR' 988039.0 0.0 4.0 5.0 111.0 2011-05-24 11:27:31 2013-12-24 10:54:46 15.0 9.0 0.0 NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1021061 NaN NaN 682416.0 0.0 1.0 NaN 33.0 2010-04-20 18:09:51 2017-11-18 22:41:35 2.0 1.0 0.0 NaN
1021062 NaN NaN 682979.0 0.0 3.0 NaN 36.0 2010-04-21 18:35:11 2017-11-18 22:41:36 4.0 2.0 0.0 NaN
1021063 NaN NaN 682981.0 0.0 3.0 NaN 34.0 2010-04-21 18:48:42 2017-11-18 22:41:36 4.0 2.0 0.0 NaN
1021064 NaN NaN 683009.0 1.0 1.0 1.0 39.0 2010-04-21 20:36:31 2017-11-18 22:41:37 3.0 2.0 0.0 NaN
1021065 NaN NaN 683019.0 1.0 3.0 1.0 34.0 2010-04-21 21:13:14 2017-11-18 22:41:38 3.0 1.0 0.0 NaN
1021066 NaN NaN 683043.0 0.0 1.0 NaN 31.0 2010-04-21 22:51:47 2017-11-18 22:41:39 3.0 1.0 0.0 NaN
1021067 NaN NaN 683048.0 0.0 3.0 NaN 39.0 2010-04-21 23:10:39 2017-11-18 22:41:39 2.0 1.0 0.0 NaN
1021068 NaN NaN 683768.0 0.0 1.0 NaN 33.0 2010-04-22 23:12:22 2017-11-18 22:41:40 3.0 1.0 0.0 NaN
1021069 NaN NaN 683773.0 0.0 2.0 NaN 67.0 2010-04-22 23:51:22 2017-11-18 22:41:41 6.0 2.0 0.0 NaN
1021070 NaN NaN 683779.0 0.0 1.0 NaN 41.0 2010-04-23 00:41:37 2017-11-18 22:41:41 5.0 1.0 0.0 NaN
1021071 NaN NaN 683789.0 0.0 5.0 NaN 65.0 2010-04-23 01:55:53 2017-11-18 22:41:42 14.0 1.0 0.0 NaN
1021072 NaN NaN 683775.0 0.0 2.0 NaN 63.0 2010-04-22 23:58:53 2017-11-18 22:41:42 5.0 2.0 0.0 NaN
1021073 NaN NaN 684460.0 0.0 2.0 NaN 36.0 2010-04-23 10:24:09 2017-11-18 22:41:44 2.0 1.0 0.0 NaN
1021074 NaN NaN 684619.0 0.0 1.0 NaN 49.0 2010-04-23 10:47:16 2017-11-18 22:41:44 2.0 1.0 0.0 NaN
1021075 NaN NaN 684732.0 0.0 2.0 NaN 41.0 2010-04-23 11:37:14 2017-11-18 22:41:45 3.0 1.0 0.0 NaN
1021076 NaN NaN 772886.0 0.0 4.0 NaN 33.0 2010-07-22 03:34:57 2017-11-18 22:41:47 4.0 1.0 0.0 NaN
1021077 NaN NaN 674668.0 0.0 2.0 NaN 33.0 2010-04-11 14:29:29 2017-11-18 22:41:47 5.0 1.0 0.0 NaN
1021078 NaN NaN 772899.0 0.0 4.0 NaN 34.0 2010-07-22 05:43:34 2017-11-18 22:41:49 4.0 1.0 0.0 NaN
1021079 NaN NaN 772923.0 1.0 2.0 1.0 45.0 2010-07-22 07:29:02 2017-11-18 22:41:49 2.0 1.0 0.0 NaN
1021080 NaN NaN 682279.0 0.0 NaN NaN 41.0 2010-04-20 14:30:05 2017-11-18 22:41:50 3.0 2.0 0.0 NaN
1021081 NaN NaN 677307.0 0.0 3.0 NaN 34.0 2010-04-15 18:46:52 2017-11-18 22:41:50 11.0 4.0 0.0 NaN
1021082 NaN NaN 677313.0 0.0 3.0 NaN 35.0 2010-04-15 18:52:54 2017-11-18 22:41:51 10.0 1.0 0.0 NaN
1021083 NaN NaN 772894.0 1.0 4.0 1.0 41.0 2010-07-22 04:46:40 2017-11-18 22:41:52 5.0 1.0 0.0 NaN
1021084 NaN NaN 772935.0 1.0 3.0 1.0 44.0 2010-07-22 07:55:41 2017-11-18 22:41:53 2.0 1.0 0.0 NaN
1021085 NaN NaN 686124.0 0.0 2.0 NaN 33.0 2010-04-24 05:08:29 2017-11-18 22:41:54 3.0 2.0 0.0 NaN
1021086 NaN NaN 677296.0 0.0 3.0 NaN 34.0 2010-04-15 18:38:15 2017-11-18 22:41:55 4.0 1.0 0.0 NaN
1021087 NaN NaN 674415.0 0.0 1.0 NaN 37.0 2010-04-11 06:02:15 2017-11-18 22:41:56 5.0 1.0 0.0 NaN
1021088 NaN NaN 674381.0 0.0 1.0 NaN 34.0 2010-04-10 21:05:11 2017-11-18 22:41:56 9.0 1.0 0.0 NaN
1021089 NaN NaN 772901.0 0.0 1.0 NaN 32.0 2010-07-22 05:48:27 2017-11-18 22:41:57 3.0 1.0 0.0 NaN
1021090 NaN NaN 685289.0 0.0 2.0 NaN 34.0 2010-04-23 13:04:20 2017-11-18 22:41:45 2.0 1.0 0.0 NaN

867258 rows × 14 columns

for el in df_outcoming_links_translated['uk_page_id'].values:
    if not(el in merged6['uk_page_id'].values):
        print(df_outcoming_links_translated[df_outcoming_links_translated['uk_page_id'] == el]['uk_page_id'] )
557    1530
Name: uk_page_id, dtype: int64
2526    6399
Name: uk_page_id, dtype: int64
3924    11870
Name: uk_page_id, dtype: int64
3932    11969
Name: uk_page_id, dtype: int64
4167    12830
Name: uk_page_id, dtype: int64
4205    13067
Name: uk_page_id, dtype: int64
4224    13209
Name: uk_page_id, dtype: int64
4225    13211
Name: uk_page_id, dtype: int64
4226    13213
Name: uk_page_id, dtype: int64
4227    13217
Name: uk_page_id, dtype: int64
4228    13219
Name: uk_page_id, dtype: int64
4271    13415
Name: uk_page_id, dtype: int64
4272    13416
Name: uk_page_id, dtype: int64
4273    13418
Name: uk_page_id, dtype: int64
4276    13434
Name: uk_page_id, dtype: int64
4277    13443
Name: uk_page_id, dtype: int64
4278    13444
Name: uk_page_id, dtype: int64
4279    13446
Name: uk_page_id, dtype: int64
4280    13447
Name: uk_page_id, dtype: int64
4281    13448
Name: uk_page_id, dtype: int64
4282    13449
Name: uk_page_id, dtype: int64
4283    13450
Name: uk_page_id, dtype: int64
4284    13451
Name: uk_page_id, dtype: int64
4285    13452
Name: uk_page_id, dtype: int64
4286    13453
Name: uk_page_id, dtype: int64
4287    13454
Name: uk_page_id, dtype: int64
4288    13455
Name: uk_page_id, dtype: int64
4289    13456
Name: uk_page_id, dtype: int64
4290    13457
Name: uk_page_id, dtype: int64
4291    13458
Name: uk_page_id, dtype: int64
4292    13459
Name: uk_page_id, dtype: int64
4293    13460
Name: uk_page_id, dtype: int64
4294    13461
Name: uk_page_id, dtype: int64
4295    13463
Name: uk_page_id, dtype: int64
4296    13464
Name: uk_page_id, dtype: int64
4297    13465
Name: uk_page_id, dtype: int64
4298    13474
Name: uk_page_id, dtype: int64
4299    13485
Name: uk_page_id, dtype: int64
4300    13486
Name: uk_page_id, dtype: int64
4301    13487
Name: uk_page_id, dtype: int64
4302    13488
Name: uk_page_id, dtype: int64
4303    13489
Name: uk_page_id, dtype: int64
4304    13496
Name: uk_page_id, dtype: int64
4305    13504
Name: uk_page_id, dtype: int64
4306    13532
Name: uk_page_id, dtype: int64
4307    13535
Name: uk_page_id, dtype: int64
4309    13537
Name: uk_page_id, dtype: int64
4311    13539
Name: uk_page_id, dtype: int64
4312    13540
Name: uk_page_id, dtype: int64
4313    13542
Name: uk_page_id, dtype: int64
4314    13545
Name: uk_page_id, dtype: int64
4315    13549
Name: uk_page_id, dtype: int64
4316    13557
Name: uk_page_id, dtype: int64
4317    13562
Name: uk_page_id, dtype: int64
4318    13563
Name: uk_page_id, dtype: int64
4319    13566
Name: uk_page_id, dtype: int64
4320    13568
Name: uk_page_id, dtype: int64
4321    13570
Name: uk_page_id, dtype: int64
4322    13575
Name: uk_page_id, dtype: int64
4323    13577
Name: uk_page_id, dtype: int64
4324    13579
Name: uk_page_id, dtype: int64
4326    13640
Name: uk_page_id, dtype: int64
4327    13647
Name: uk_page_id, dtype: int64
4328    13648
Name: uk_page_id, dtype: int64
4329    13649
Name: uk_page_id, dtype: int64
4330    13650
Name: uk_page_id, dtype: int64
4331    13756
Name: uk_page_id, dtype: int64
4332    13757
Name: uk_page_id, dtype: int64
4333    13758
Name: uk_page_id, dtype: int64
4334    13759
Name: uk_page_id, dtype: int64
4335    13760
Name: uk_page_id, dtype: int64
4336    13761
Name: uk_page_id, dtype: int64
4337    13762
Name: uk_page_id, dtype: int64
4338    13763
Name: uk_page_id, dtype: int64
4339    13764
Name: uk_page_id, dtype: int64
4340    13765
Name: uk_page_id, dtype: int64
4341    13766
Name: uk_page_id, dtype: int64
4342    13767
Name: uk_page_id, dtype: int64
4343    13768
Name: uk_page_id, dtype: int64
4344    13769
Name: uk_page_id, dtype: int64
4345    13770
Name: uk_page_id, dtype: int64
4346    13771
Name: uk_page_id, dtype: int64
4347    13772
Name: uk_page_id, dtype: int64
4348    13773
Name: uk_page_id, dtype: int64
4349    13774
Name: uk_page_id, dtype: int64
4350    13775
Name: uk_page_id, dtype: int64
4351    13776
Name: uk_page_id, dtype: int64
4352    13779
Name: uk_page_id, dtype: int64
4353    13803
Name: uk_page_id, dtype: int64
4354    13807
Name: uk_page_id, dtype: int64
4355    13808
Name: uk_page_id, dtype: int64
4356    13809
Name: uk_page_id, dtype: int64
4357    13810
Name: uk_page_id, dtype: int64
4358    13811
Name: uk_page_id, dtype: int64
4359    13812
Name: uk_page_id, dtype: int64
4360    13813
Name: uk_page_id, dtype: int64
4361    13814
Name: uk_page_id, dtype: int64
4362    13815
Name: uk_page_id, dtype: int64
4363    13816
Name: uk_page_id, dtype: int64
4364    13817
Name: uk_page_id, dtype: int64
4365    13818
Name: uk_page_id, dtype: int64
4366    13819
Name: uk_page_id, dtype: int64
4367    13820
Name: uk_page_id, dtype: int64
4368    13821
Name: uk_page_id, dtype: int64
4369    13822
Name: uk_page_id, dtype: int64
4370    13823
Name: uk_page_id, dtype: int64
4371    13824
Name: uk_page_id, dtype: int64
4372    13825
Name: uk_page_id, dtype: int64
4373    13826
Name: uk_page_id, dtype: int64
4374    13827
Name: uk_page_id, dtype: int64
4375    13828
Name: uk_page_id, dtype: int64
4376    13829
Name: uk_page_id, dtype: int64
4377    13830
Name: uk_page_id, dtype: int64
4378    13831
Name: uk_page_id, dtype: int64
4379    13832
Name: uk_page_id, dtype: int64
4380    13833
Name: uk_page_id, dtype: int64
4381    13834
Name: uk_page_id, dtype: int64
4382    13835
Name: uk_page_id, dtype: int64
4383    13836
Name: uk_page_id, dtype: int64
4384    13837
Name: uk_page_id, dtype: int64
4386    13842
Name: uk_page_id, dtype: int64
4387    13844
Name: uk_page_id, dtype: int64
4495    14028
Name: uk_page_id, dtype: int64
4519    14423
Name: uk_page_id, dtype: int64
4520    14424
Name: uk_page_id, dtype: int64
4521    14425
Name: uk_page_id, dtype: int64
4522    14426
Name: uk_page_id, dtype: int64
4530    14538
Name: uk_page_id, dtype: int64
4545    14572
Name: uk_page_id, dtype: int64
4546    14578
Name: uk_page_id, dtype: int64
4547    14579
Name: uk_page_id, dtype: int64
4548    14580
Name: uk_page_id, dtype: int64
4549    14581
Name: uk_page_id, dtype: int64
4551    14593
Name: uk_page_id, dtype: int64
4555    14611
Name: uk_page_id, dtype: int64
4574    14933
Name: uk_page_id, dtype: int64
4581    14996
Name: uk_page_id, dtype: int64
4582    14997
Name: uk_page_id, dtype: int64
4583    14999
Name: uk_page_id, dtype: int64
4585    15001
Name: uk_page_id, dtype: int64
4587    15015
Name: uk_page_id, dtype: int64
4588    15021
Name: uk_page_id, dtype: int64
4589    15022
Name: uk_page_id, dtype: int64
4602    15080
Name: uk_page_id, dtype: int64
4605    15090
Name: uk_page_id, dtype: int64
4606    15094
Name: uk_page_id, dtype: int64
4607    15096
Name: uk_page_id, dtype: int64
4608    15100
Name: uk_page_id, dtype: int64
4609    15102
Name: uk_page_id, dtype: int64
4610    15103
Name: uk_page_id, dtype: int64
4648    15401
Name: uk_page_id, dtype: int64
4708    15725
Name: uk_page_id, dtype: int64
4710    15733
Name: uk_page_id, dtype: int64
4711    15735
Name: uk_page_id, dtype: int64
4712    15740
Name: uk_page_id, dtype: int64
4713    15741
Name: uk_page_id, dtype: int64
4714    15745
Name: uk_page_id, dtype: int64
4720    15812
Name: uk_page_id, dtype: int64
4754    16090
Name: uk_page_id, dtype: int64
4755    16098
Name: uk_page_id, dtype: int64
4756    16101
Name: uk_page_id, dtype: int64
4757    16106
Name: uk_page_id, dtype: int64
4804    16455
Name: uk_page_id, dtype: int64
4829    16748
Name: uk_page_id, dtype: int64
4830    16762
Name: uk_page_id, dtype: int64
4846    16955
Name: uk_page_id, dtype: int64
4849    16971
Name: uk_page_id, dtype: int64
4855    17108
Name: uk_page_id, dtype: int64
4856    17110
Name: uk_page_id, dtype: int64
4874    17274
Name: uk_page_id, dtype: int64
4894    17531
Name: uk_page_id, dtype: int64
4895    17532
Name: uk_page_id, dtype: int64
4896    17534
Name: uk_page_id, dtype: int64
4909    17698
Name: uk_page_id, dtype: int64
4955    18002
Name: uk_page_id, dtype: int64
4964    18049
Name: uk_page_id, dtype: int64
5031    18978
Name: uk_page_id, dtype: int64
5034    19011
Name: uk_page_id, dtype: int64
5038    19041
Name: uk_page_id, dtype: int64
5040    19057
Name: uk_page_id, dtype: int64
5042    19066
Name: uk_page_id, dtype: int64
5043    19067
Name: uk_page_id, dtype: int64
5044    19068
Name: uk_page_id, dtype: int64
5045    19070
Name: uk_page_id, dtype: int64
5046    19071
Name: uk_page_id, dtype: int64
5047    19072
Name: uk_page_id, dtype: int64
5049    19089
Name: uk_page_id, dtype: int64
5050    19106
Name: uk_page_id, dtype: int64
5057    19177
Name: uk_page_id, dtype: int64
5064    19243
Name: uk_page_id, dtype: int64
5076    19290
Name: uk_page_id, dtype: int64
5077    19292
Name: uk_page_id, dtype: int64
5078    19305
Name: uk_page_id, dtype: int64
5157    19822
Name: uk_page_id, dtype: int64
5161    19840
Name: uk_page_id, dtype: int64
5248    20465
Name: uk_page_id, dtype: int64
5251    20489
Name: uk_page_id, dtype: int64
5268    20624
Name: uk_page_id, dtype: int64
5317    20997
Name: uk_page_id, dtype: int64
5354    21268
Name: uk_page_id, dtype: int64
5394    21472
Name: uk_page_id, dtype: int64
5397    21528
Name: uk_page_id, dtype: int64
5402    21578
Name: uk_page_id, dtype: int64
5403    21581
Name: uk_page_id, dtype: int64
5404    21589
Name: uk_page_id, dtype: int64
5405    21592
Name: uk_page_id, dtype: int64
5425    21673
Name: uk_page_id, dtype: int64
5489    22035
Name: uk_page_id, dtype: int64
5500    22121
Name: uk_page_id, dtype: int64
5506    22186
Name: uk_page_id, dtype: int64
5507    22188
Name: uk_page_id, dtype: int64
5517    22213
Name: uk_page_id, dtype: int64
5518    22214
Name: uk_page_id, dtype: int64
5539    22360
Name: uk_page_id, dtype: int64
5548    22458
Name: uk_page_id, dtype: int64
5555    22491
Name: uk_page_id, dtype: int64
5556    22492
Name: uk_page_id, dtype: int64
5569    22576
Name: uk_page_id, dtype: int64
5570    22579
Name: uk_page_id, dtype: int64
5571    22588
Name: uk_page_id, dtype: int64
5573    22593
Name: uk_page_id, dtype: int64
5585    22652
Name: uk_page_id, dtype: int64
5587    22676
Name: uk_page_id, dtype: int64
5588    22684
Name: uk_page_id, dtype: int64
5595    22721
Name: uk_page_id, dtype: int64
5604    22779
Name: uk_page_id, dtype: int64
5605    22805
Name: uk_page_id, dtype: int64
5606    22808
Name: uk_page_id, dtype: int64
5607    22810
Name: uk_page_id, dtype: int64
5621    22953
Name: uk_page_id, dtype: int64
5628    22984
Name: uk_page_id, dtype: int64
5629    22987
Name: uk_page_id, dtype: int64
5630    22988
Name: uk_page_id, dtype: int64
5631    22992
Name: uk_page_id, dtype: int64
5632    23008
Name: uk_page_id, dtype: int64
5633    23017
Name: uk_page_id, dtype: int64
5635    23021
Name: uk_page_id, dtype: int64
5667    23230
Name: uk_page_id, dtype: int64
5668    23231
Name: uk_page_id, dtype: int64
5676    23278
Name: uk_page_id, dtype: int64
5688    23378
Name: uk_page_id, dtype: int64
5695    23404
Name: uk_page_id, dtype: int64
5725    23745
Name: uk_page_id, dtype: int64
5728    23759
Name: uk_page_id, dtype: int64
5732    23782
Name: uk_page_id, dtype: int64
5733    23790
Name: uk_page_id, dtype: int64
5788    24139
Name: uk_page_id, dtype: int64
5794    24180
Name: uk_page_id, dtype: int64
5799    24192
Name: uk_page_id, dtype: int64
5829    24461
Name: uk_page_id, dtype: int64
5831    24465
Name: uk_page_id, dtype: int64
5858    24629
Name: uk_page_id, dtype: int64
5873    24688
Name: uk_page_id, dtype: int64
5888    24790
Name: uk_page_id, dtype: int64
5892    24827
Name: uk_page_id, dtype: int64
5895    24837
Name: uk_page_id, dtype: int64
5905    24886
Name: uk_page_id, dtype: int64
5910    24921
Name: uk_page_id, dtype: int64
5912    24970
Name: uk_page_id, dtype: int64
5930    25037
Name: uk_page_id, dtype: int64
5932    25041
Name: uk_page_id, dtype: int64
5957    25266
Name: uk_page_id, dtype: int64
5959    25305
Name: uk_page_id, dtype: int64
5963    25319
Name: uk_page_id, dtype: int64
5969    25334
Name: uk_page_id, dtype: int64
5979    25387
Name: uk_page_id, dtype: int64
5980    25392
Name: uk_page_id, dtype: int64
6020    25609
Name: uk_page_id, dtype: int64
6021    25614
Name: uk_page_id, dtype: int64
6038    25744
Name: uk_page_id, dtype: int64
6043    25771
Name: uk_page_id, dtype: int64
6048    25806
Name: uk_page_id, dtype: int64
6050    25827
Name: uk_page_id, dtype: int64
6062    25899
Name: uk_page_id, dtype: int64
6072    25931
Name: uk_page_id, dtype: int64
6079    25957
Name: uk_page_id, dtype: int64
6082    25986
Name: uk_page_id, dtype: int64
6086    26004
Name: uk_page_id, dtype: int64
6098    26038
Name: uk_page_id, dtype: int64
6109    26090
Name: uk_page_id, dtype: int64
6131    26217
Name: uk_page_id, dtype: int64
6150    26322
Name: uk_page_id, dtype: int64
6168    26367
Name: uk_page_id, dtype: int64
6173    26382
Name: uk_page_id, dtype: int64
6185    26419
Name: uk_page_id, dtype: int64
6188    26468
Name: uk_page_id, dtype: int64
6197    26521
Name: uk_page_id, dtype: int64
6199    26534
Name: uk_page_id, dtype: int64
6203    26554
Name: uk_page_id, dtype: int64
6205    26566
Name: uk_page_id, dtype: int64
6206    26568
Name: uk_page_id, dtype: int64
6207    26578
Name: uk_page_id, dtype: int64
6215    26657
Name: uk_page_id, dtype: int64
6219    26700
Name: uk_page_id, dtype: int64
6240    27223
Name: uk_page_id, dtype: int64
6244    27290
Name: uk_page_id, dtype: int64
6253    27346
Name: uk_page_id, dtype: int64
6254    27351
Name: uk_page_id, dtype: int64
6255    27362
Name: uk_page_id, dtype: int64
6279    27443
Name: uk_page_id, dtype: int64
6293    27512
Name: uk_page_id, dtype: int64
6298    27531
Name: uk_page_id, dtype: int64
6304    27565
Name: uk_page_id, dtype: int64
6305    27567
Name: uk_page_id, dtype: int64
6306    27568
Name: uk_page_id, dtype: int64
6307    27570
Name: uk_page_id, dtype: int64
6308    27577
Name: uk_page_id, dtype: int64
6313    27605
Name: uk_page_id, dtype: int64
6315    27617
Name: uk_page_id, dtype: int64
6324    27696
Name: uk_page_id, dtype: int64
6359    27946
Name: uk_page_id, dtype: int64
6366    27981
Name: uk_page_id, dtype: int64
6372    28059
Name: uk_page_id, dtype: int64
6373    28061
Name: uk_page_id, dtype: int64
6389    28209
Name: uk_page_id, dtype: int64
6401    28312
Name: uk_page_id, dtype: int64
6432    28495
Name: uk_page_id, dtype: int64
6439    28527
Name: uk_page_id, dtype: int64
6441    28551
Name: uk_page_id, dtype: int64
6443    28610
Name: uk_page_id, dtype: int64
6457    28707
Name: uk_page_id, dtype: int64
6460    28729
Name: uk_page_id, dtype: int64
6477    28825
Name: uk_page_id, dtype: int64
6480    28849
Name: uk_page_id, dtype: int64
6484    28872
Name: uk_page_id, dtype: int64
6486    28894
Name: uk_page_id, dtype: int64
6487    28907
Name: uk_page_id, dtype: int64
6488    28925
Name: uk_page_id, dtype: int64
6491    28959
Name: uk_page_id, dtype: int64
6494    28984
Name: uk_page_id, dtype: int64
6495    28992
Name: uk_page_id, dtype: int64
6498    29025
Name: uk_page_id, dtype: int64
6501    29044
Name: uk_page_id, dtype: int64
6502    29045
Name: uk_page_id, dtype: int64
6513    29085
Name: uk_page_id, dtype: int64
6525    29136
Name: uk_page_id, dtype: int64
6526    29137
Name: uk_page_id, dtype: int64
6530    29155
Name: uk_page_id, dtype: int64
6531    29160
Name: uk_page_id, dtype: int64
6539    29274
Name: uk_page_id, dtype: int64
6541    29286
Name: uk_page_id, dtype: int64
6544    29290
Name: uk_page_id, dtype: int64
6547    29299
Name: uk_page_id, dtype: int64
6564    29506
Name: uk_page_id, dtype: int64
6566    29533
Name: uk_page_id, dtype: int64
6569    29626
Name: uk_page_id, dtype: int64
6572    29665
Name: uk_page_id, dtype: int64
6574    29733
Name: uk_page_id, dtype: int64
6581    29777
Name: uk_page_id, dtype: int64
6583    29782
Name: uk_page_id, dtype: int64
6584    29785
Name: uk_page_id, dtype: int64
6585    29786
Name: uk_page_id, dtype: int64
6586    29793
Name: uk_page_id, dtype: int64
6588    29798
Name: uk_page_id, dtype: int64
6589    29800
Name: uk_page_id, dtype: int64
6593    29850
Name: uk_page_id, dtype: int64
6595    30037
Name: uk_page_id, dtype: int64
6596    30046
Name: uk_page_id, dtype: int64
6599    30082
Name: uk_page_id, dtype: int64
6605    30140
Name: uk_page_id, dtype: int64
6608    30150
Name: uk_page_id, dtype: int64
6609    30153
Name: uk_page_id, dtype: int64
6611    30184
Name: uk_page_id, dtype: int64
6612    30187
Name: uk_page_id, dtype: int64
6613    30190
Name: uk_page_id, dtype: int64
6617    30234
Name: uk_page_id, dtype: int64
6625    30342
Name: uk_page_id, dtype: int64
6627    30362
Name: uk_page_id, dtype: int64
6628    30363
Name: uk_page_id, dtype: int64
6629    30407
Name: uk_page_id, dtype: int64
6630    30409
Name: uk_page_id, dtype: int64
6631    30441
Name: uk_page_id, dtype: int64
6636    30504
Name: uk_page_id, dtype: int64
6643    30537
Name: uk_page_id, dtype: int64
6651    30566
Name: uk_page_id, dtype: int64
6654    30595
Name: uk_page_id, dtype: int64
6657    30613
Name: uk_page_id, dtype: int64
6668    30662
Name: uk_page_id, dtype: int64
6669    30666
Name: uk_page_id, dtype: int64
6670    30691
Name: uk_page_id, dtype: int64
6674    30736
Name: uk_page_id, dtype: int64
6682    30832
Name: uk_page_id, dtype: int64
6684    30850
Name: uk_page_id, dtype: int64
6707    30914
Name: uk_page_id, dtype: int64
6752    31177
Name: uk_page_id, dtype: int64
6763    31227
Name: uk_page_id, dtype: int64
6764    31228
Name: uk_page_id, dtype: int64
6765    31232
Name: uk_page_id, dtype: int64
6772    31271
Name: uk_page_id, dtype: int64
6774    31276
Name: uk_page_id, dtype: int64
6780    31309
Name: uk_page_id, dtype: int64
6790    31339
Name: uk_page_id, dtype: int64
6802    31433
Name: uk_page_id, dtype: int64
6803    31436
Name: uk_page_id, dtype: int64
6804    31446
Name: uk_page_id, dtype: int64
6835    31577
Name: uk_page_id, dtype: int64
6852    31670
Name: uk_page_id, dtype: int64
6875    31922
Name: uk_page_id, dtype: int64
6876    31938
Name: uk_page_id, dtype: int64
6881    32033
Name: uk_page_id, dtype: int64
6882    32101
Name: uk_page_id, dtype: int64
6888    32145
Name: uk_page_id, dtype: int64
6889    32151
Name: uk_page_id, dtype: int64
6891    32159
Name: uk_page_id, dtype: int64
6895    32203
Name: uk_page_id, dtype: int64
6896    32207
Name: uk_page_id, dtype: int64
6906    32334
Name: uk_page_id, dtype: int64
6913    32374
Name: uk_page_id, dtype: int64
6915    32381
Name: uk_page_id, dtype: int64
6919    32392
Name: uk_page_id, dtype: int64
6923    32448
Name: uk_page_id, dtype: int64
6924    32457
Name: uk_page_id, dtype: int64
6937    32494
Name: uk_page_id, dtype: int64
6938    32496
Name: uk_page_id, dtype: int64
6955    32616
Name: uk_page_id, dtype: int64
6958    32625
Name: uk_page_id, dtype: int64
6962    32635
Name: uk_page_id, dtype: int64
6966    32663
Name: uk_page_id, dtype: int64
6970    32742
Name: uk_page_id, dtype: int64
6971    32746
Name: uk_page_id, dtype: int64
6975    32799
Name: uk_page_id, dtype: int64
6979    32833
Name: uk_page_id, dtype: int64
6982    32895
Name: uk_page_id, dtype: int64
6987    32938
Name: uk_page_id, dtype: int64
6989    32947
Name: uk_page_id, dtype: int64
6994    33022
Name: uk_page_id, dtype: int64
6996    33069
Name: uk_page_id, dtype: int64
7015    33400
Name: uk_page_id, dtype: int64
7016    33403
Name: uk_page_id, dtype: int64
7034    33554
Name: uk_page_id, dtype: int64
7036    33566
Name: uk_page_id, dtype: int64
7042    33660
Name: uk_page_id, dtype: int64
7063    33795
Name: uk_page_id, dtype: int64
7066    33814
Name: uk_page_id, dtype: int64
7067    33815
Name: uk_page_id, dtype: int64
7069    33826
Name: uk_page_id, dtype: int64
7071    33841
Name: uk_page_id, dtype: int64
7072    33855
Name: uk_page_id, dtype: int64
7073    33862
Name: uk_page_id, dtype: int64
7074    33874
Name: uk_page_id, dtype: int64
7081    33909
Name: uk_page_id, dtype: int64
7082    33939
Name: uk_page_id, dtype: int64
7084    33942
Name: uk_page_id, dtype: int64
7087    33955
Name: uk_page_id, dtype: int64
7088    33967
Name: uk_page_id, dtype: int64
7089    33975
Name: uk_page_id, dtype: int64
7090    33976
Name: uk_page_id, dtype: int64
7091    33980
Name: uk_page_id, dtype: int64
7101    34045
Name: uk_page_id, dtype: int64
7102    34050
Name: uk_page_id, dtype: int64
7103    34057
Name: uk_page_id, dtype: int64
7120    34181
Name: uk_page_id, dtype: int64
7122    34195
Name: uk_page_id, dtype: int64
7123    34215
Name: uk_page_id, dtype: int64
7124    34228
Name: uk_page_id, dtype: int64
7128    34251
Name: uk_page_id, dtype: int64
7130    34254
Name: uk_page_id, dtype: int64
7141    34392
Name: uk_page_id, dtype: int64
7142    34410
Name: uk_page_id, dtype: int64
7147    34441
Name: uk_page_id, dtype: int64
7153    34506
Name: uk_page_id, dtype: int64
7172    34636
Name: uk_page_id, dtype: int64
7173    34649
Name: uk_page_id, dtype: int64
7175    34687
Name: uk_page_id, dtype: int64
7178    34722
Name: uk_page_id, dtype: int64
7179    34732
Name: uk_page_id, dtype: int64
7187    34782
Name: uk_page_id, dtype: int64
7188    34784
Name: uk_page_id, dtype: int64
7197    34825
Name: uk_page_id, dtype: int64
7198    34837
Name: uk_page_id, dtype: int64
7200    34871
Name: uk_page_id, dtype: int64
7201    34877
Name: uk_page_id, dtype: int64
7202    34878
Name: uk_page_id, dtype: int64
7203    34891
Name: uk_page_id, dtype: int64
7204    34896
Name: uk_page_id, dtype: int64
7205    34907
Name: uk_page_id, dtype: int64
7206    34943
Name: uk_page_id, dtype: int64
7207    34964
Name: uk_page_id, dtype: int64
7208    34972
Name: uk_page_id, dtype: int64
7209    34975
Name: uk_page_id, dtype: int64
7210    34997
Name: uk_page_id, dtype: int64
7211    34998
Name: uk_page_id, dtype: int64
7212    35002
Name: uk_page_id, dtype: int64
7213    35003
Name: uk_page_id, dtype: int64
7214    35023
Name: uk_page_id, dtype: int64
7215    35035
Name: uk_page_id, dtype: int64
7238    35167
Name: uk_page_id, dtype: int64
7240    35171
Name: uk_page_id, dtype: int64
7241    35175
Name: uk_page_id, dtype: int64
7242    35197
Name: uk_page_id, dtype: int64
7249    35272
Name: uk_page_id, dtype: int64
7255    35331
Name: uk_page_id, dtype: int64
7256    35349
Name: uk_page_id, dtype: int64
7257    35350
Name: uk_page_id, dtype: int64
7259    35455
Name: uk_page_id, dtype: int64
7261    35580
Name: uk_page_id, dtype: int64
7263    35606
Name: uk_page_id, dtype: int64
7270    35645
Name: uk_page_id, dtype: int64
7282    35714
Name: uk_page_id, dtype: int64
7288    35800
Name: uk_page_id, dtype: int64
7296    35886
Name: uk_page_id, dtype: int64
7299    35907
Name: uk_page_id, dtype: int64
7308    35958
Name: uk_page_id, dtype: int64
7309    35960
Name: uk_page_id, dtype: int64
7322    36048
Name: uk_page_id, dtype: int64
7327    36135
Name: uk_page_id, dtype: int64
7328    36139
Name: uk_page_id, dtype: int64
7331    36152
Name: uk_page_id, dtype: int64
7338    36187
Name: uk_page_id, dtype: int64
7343    36210
Name: uk_page_id, dtype: int64
7344    36217
Name: uk_page_id, dtype: int64
7350    36228
Name: uk_page_id, dtype: int64
7351    36243
Name: uk_page_id, dtype: int64
7352    36256
Name: uk_page_id, dtype: int64
7358    36296
Name: uk_page_id, dtype: int64
7362    36333
Name: uk_page_id, dtype: int64
7363    36348
Name: uk_page_id, dtype: int64
7364    36351
Name: uk_page_id, dtype: int64
7365    36352
Name: uk_page_id, dtype: int64
7366    36353
Name: uk_page_id, dtype: int64
7369    36364
Name: uk_page_id, dtype: int64
7370    36370
Name: uk_page_id, dtype: int64
7376    36402
Name: uk_page_id, dtype: int64
7399    36582
Name: uk_page_id, dtype: int64
7400    36599
Name: uk_page_id, dtype: int64
7409    36621
Name: uk_page_id, dtype: int64
7413    36635
Name: uk_page_id, dtype: int64
7414    36649
Name: uk_page_id, dtype: int64
7418    36700
Name: uk_page_id, dtype: int64
7422    36732
Name: uk_page_id, dtype: int64
7424    36745
Name: uk_page_id, dtype: int64
7429    36777
Name: uk_page_id, dtype: int64
7430    36790
Name: uk_page_id, dtype: int64
7431    36794
Name: uk_page_id, dtype: int64
7432    36801
Name: uk_page_id, dtype: int64
7433    36802
Name: uk_page_id, dtype: int64
7462    36940
Name: uk_page_id, dtype: int64
7463    36948
Name: uk_page_id, dtype: int64
7464    36950
Name: uk_page_id, dtype: int64
7465    36952
Name: uk_page_id, dtype: int64
7468    36960
Name: uk_page_id, dtype: int64
7470    36975
Name: uk_page_id, dtype: int64
7471    36976
Name: uk_page_id, dtype: int64
7472    36984
Name: uk_page_id, dtype: int64
7474    37008
Name: uk_page_id, dtype: int64
7475    37022
Name: uk_page_id, dtype: int64
7478    37042
Name: uk_page_id, dtype: int64
7479    37047
Name: uk_page_id, dtype: int64
7480    37054
Name: uk_page_id, dtype: int64
7487    37085
Name: uk_page_id, dtype: int64
7489    37091
Name: uk_page_id, dtype: int64
7493    37117
Name: uk_page_id, dtype: int64
7494    37135
Name: uk_page_id, dtype: int64
7506    37178
Name: uk_page_id, dtype: int64
7515    37234
Name: uk_page_id, dtype: int64
7516    37235
Name: uk_page_id, dtype: int64
7517    37251
Name: uk_page_id, dtype: int64
7518    37274
Name: uk_page_id, dtype: int64
7519    37277
Name: uk_page_id, dtype: int64
7524    37349
Name: uk_page_id, dtype: int64
7525    37418
Name: uk_page_id, dtype: int64
7526    37419
Name: uk_page_id, dtype: int64
7527    37433
Name: uk_page_id, dtype: int64
7533    37596
Name: uk_page_id, dtype: int64
7550    37701
Name: uk_page_id, dtype: int64
7551    37703
Name: uk_page_id, dtype: int64
7553    37711
Name: uk_page_id, dtype: int64
7555    37734
Name: uk_page_id, dtype: int64
7556    37740
Name: uk_page_id, dtype: int64
7566    37759
Name: uk_page_id, dtype: int64
7570    37798
Name: uk_page_id, dtype: int64
7576    37821
Name: uk_page_id, dtype: int64
7582    37881
Name: uk_page_id, dtype: int64
7584    37948
Name: uk_page_id, dtype: int64
7595    38071
Name: uk_page_id, dtype: int64
7600    38090
Name: uk_page_id, dtype: int64
7601    38095
Name: uk_page_id, dtype: int64
7603    38118
Name: uk_page_id, dtype: int64
7615    38172
Name: uk_page_id, dtype: int64
7618    38204
Name: uk_page_id, dtype: int64
7619    38214
Name: uk_page_id, dtype: int64
7621    38225
Name: uk_page_id, dtype: int64
7622    38231
Name: uk_page_id, dtype: int64
7624    38241
Name: uk_page_id, dtype: int64
7627    38246
Name: uk_page_id, dtype: int64
7629    38317
Name: uk_page_id, dtype: int64
7634    38352
Name: uk_page_id, dtype: int64
7636    38381
Name: uk_page_id, dtype: int64
7637    38414
Name: uk_page_id, dtype: int64
7646    38483
Name: uk_page_id, dtype: int64
7650    38582
Name: uk_page_id, dtype: int64
7653    38711
Name: uk_page_id, dtype: int64
7655    38719
Name: uk_page_id, dtype: int64
7656    38726
Name: uk_page_id, dtype: int64
7657    38739
Name: uk_page_id, dtype: int64
7658    38745
Name: uk_page_id, dtype: int64
7665    38782
Name: uk_page_id, dtype: int64
7667    38785
Name: uk_page_id, dtype: int64
7669    38792
Name: uk_page_id, dtype: int64
7670    38798
Name: uk_page_id, dtype: int64
7671    38801
Name: uk_page_id, dtype: int64
7678    38854
Name: uk_page_id, dtype: int64
7699    39196
Name: uk_page_id, dtype: int64
7701    39209
Name: uk_page_id, dtype: int64
7713    39345
Name: uk_page_id, dtype: int64
7717    39399
Name: uk_page_id, dtype: int64
7718    39401
Name: uk_page_id, dtype: int64
7719    39405
Name: uk_page_id, dtype: int64
7721    39416
Name: uk_page_id, dtype: int64
7727    39514
Name: uk_page_id, dtype: int64
7729    39531
Name: uk_page_id, dtype: int64
7730    39552
Name: uk_page_id, dtype: int64
7732    39559
Name: uk_page_id, dtype: int64
7733    39560
Name: uk_page_id, dtype: int64
7734    39561
Name: uk_page_id, dtype: int64
7735    39564
Name: uk_page_id, dtype: int64
7736    39565
Name: uk_page_id, dtype: int64
7745    39640
Name: uk_page_id, dtype: int64
7750    39681
Name: uk_page_id, dtype: int64
7764    39738
Name: uk_page_id, dtype: int64
7768    39777
Name: uk_page_id, dtype: int64
7771    39813
Name: uk_page_id, dtype: int64
7772    39824
Name: uk_page_id, dtype: int64
7773    39828
Name: uk_page_id, dtype: int64
7774    39833
Name: uk_page_id, dtype: int64
7780    39888
Name: uk_page_id, dtype: int64
7781    39889
Name: uk_page_id, dtype: int64
7801    40094
Name: uk_page_id, dtype: int64
7811    40149
Name: uk_page_id, dtype: int64
7812    40169
Name: uk_page_id, dtype: int64
7815    40185
Name: uk_page_id, dtype: int64
7833    40316
Name: uk_page_id, dtype: int64
7835    40348
Name: uk_page_id, dtype: int64
7841    40434
Name: uk_page_id, dtype: int64
7852    40484
Name: uk_page_id, dtype: int64
7853    40489
Name: uk_page_id, dtype: int64
7878    40629
Name: uk_page_id, dtype: int64
7885    40689
Name: uk_page_id, dtype: int64
7907    40907
Name: uk_page_id, dtype: int64
7911    40948
Name: uk_page_id, dtype: int64
7915    40980
Name: uk_page_id, dtype: int64
7919    41013
Name: uk_page_id, dtype: int64
7929    41109
Name: uk_page_id, dtype: int64
7944    41213
Name: uk_page_id, dtype: int64
7945    41219
Name: uk_page_id, dtype: int64
7946    41226
Name: uk_page_id, dtype: int64
7950    41244
Name: uk_page_id, dtype: int64
7951    41284
Name: uk_page_id, dtype: int64
7956    41320
Name: uk_page_id, dtype: int64
7965    41445
Name: uk_page_id, dtype: int64
7966    41450
Name: uk_page_id, dtype: int64
7967    41454
Name: uk_page_id, dtype: int64
7968    41461
Name: uk_page_id, dtype: int64
7969    41462
Name: uk_page_id, dtype: int64
7971    41471
Name: uk_page_id, dtype: int64
7972    41474
Name: uk_page_id, dtype: int64
7975    41491
Name: uk_page_id, dtype: int64
7976    41495
Name: uk_page_id, dtype: int64
7977    41499
Name: uk_page_id, dtype: int64
7986    41579
Name: uk_page_id, dtype: int64
7992    41619
Name: uk_page_id, dtype: int64
7993    41623
Name: uk_page_id, dtype: int64
7994    41624
Name: uk_page_id, dtype: int64
7996    41644
Name: uk_page_id, dtype: int64
8008    41679
Name: uk_page_id, dtype: int64
8017    41777
Name: uk_page_id, dtype: int64
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-63-c25df4301885> in <module>()
      1 for el in df_outcoming_links_translated['uk_page_id'].values:
----> 2     if not(el in merged6['uk_page_id'].values):
      3         print(df_outcoming_links_translated[df_outcoming_links_translated['uk_page_id'] == el]['uk_page_id'] )

KeyboardInterrupt: 
merged5[merged5['uk_page_id'] == 22]
id uk_page_title en_page_title uk_page_id uk_translations_count uk_incoming_links Unnamed: 0_x uk_langlinks_count Unnamed: 0_y outcoming_links uk_first_edit uk_last_edit uk_revisions_count uk_minor_revisions_count uk_deleted_revisions
en_df = pd.read_csv('en.csv')
en_df.head()
Unnamed: 0 en_page_id en_page_title
0 0 5878274 b'!'
1 1 3632887 b'!!'
2 2 600744 b'!!!'
3 3 34443176 b'!!!Fuck_You!!!'
4 4 11011780 b'!!!Fuck_You!!!_And_Then_Some'
final_df = pd.read_csv('final_uk_merge.csv').drop(columns=['Unnamed: 0'])
/srv/paws/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2785: DtypeWarning: Columns (8,9) have mixed types. Specify dtype option on import or set low_memory=False.
  interactivity=interactivity, compiler=compiler, result=result)
en_df = pd.read_csv("en.csv").drop(columns=['Unnamed: 0'])
final_df.head()
Unnamed: 0 id uk_page_title uk_page_id uk_translations_count uk_incoming_links uk_langlinks_count outcoming_links uk_first_edit uk_last_edit uk_revisions_count uk_minor_revisions_count uk_deleted_revisions outcoming_links_translated en_page_title_clear
0 0 NaN !_(альбом_С.К.А.Й.) 829565.0 0.0 31.0 NaN 40.0 2010-10-14 09:59:01 2017-04-23 15:40:11 43.0 13.0 0.0 NaN NaN
1 1 4540205.0 !_(альбом) 2112530.0 0.0 NaN 12.0 12.0 2016-01-29 23:11:32 2018-03-06 11:57:03 10.0 1.0 0.0 NaN ! (The Dismemberment Plan album)
2 2 404000.0 !_(значення) 1366003.0 0.0 1.0 16.0 32.0 2012-09-08 09:20:52 2016-05-18 09:49:18 7.0 5.0 0.0 NaN ! (disambiguation)
3 3 353153.0 !!_(значення) 2141483.0 0.0 NaN 17.0 18.0 2016-03-11 16:18:49 2016-06-01 20:48:15 5.0 0.0 0.0 NaN !!
4 4 371.0 !!! 425480.0 24.0 8.0 25.0 36.0 2009-03-28 07:57:13 2018-04-05 10:46:42 28.0 20.0 0.0 36.0 !!!
uk_titles_df = pd.read_csv('en_uk_titles.csv')
uk_titles_df['uk_page_title'] = uk_titles_df['uk_page_title'].str.replace(' ', '_')
uk_titles_df[['uk_page_title', 'en_page_title_clear']].head()
uk_page_title en_page_title_clear
0 !_(альбом_С.К.А.Й.) NaN
1 !_(альбом) ! (The Dismemberment Plan album)
2 !_(значення) ! (disambiguation)
3 !!_(значення) !!
4 !!! !!!
final_new = pd.merge(final_df, uk_titles_df[['uk_page_title', 'en_page_title_clear']], how='left', on='uk_page_title')
final_df.drop(columns=['Unnamed: 0', 'Unnamed: 0.1']).to_csv('final_uk_merge.csv')