Comparing padding strategies for labor hours

In this notebook, we'll be comparing the effect of padding strategies.

For "const" we'll use 430 seconds of padding for every session. For "avg", we'll use 430 seconds for single edit sessions and the avg inter-edit time within the session for multi-edit sessions.

avg_intertime_f = open("total_labor_hours_avg_intertime.tsv")
avg_intertime_f.readline()
avg_intertime = [line.strip().split("\t") for line in avg_intertime_f]
const_intertime_f = !curl -s http://quarry.wmflabs.org/run/65743/output/0/tsv?download=true
const_intertime = [line.strip().split("\t") for line in const_intertime_f[1:]]
labor_hours_totals = [(m1, float(const_t), float(avg_t), float(avg_t)-float(const_t)) 
                      for (m1, avg_t), (m2, const_t) in 
                      zip(avg_intertime, const_intertime)]
labor_hours_totals
[('200101', 43.7272222221, 43.49839616401972, -0.228826058080287),
 ('200102', 99.6030555554, 96.02202798898222, -3.581027566417788),
 ('200103', 200.7447222222, 200.12607912235916, -0.6186430998408525),
 ('200104', 113.7174999997, 115.19569449939806, 1.4781944996980627),
 ('200105', 214.2611111102, 209.65533947242193, -4.605771637778076),
 ('200106', 170.7058333324, 169.44699085597443, -1.2588424764255706),
 ('200107', 331.8433333332, 334.8247289172197, 2.981395584019708),
 ('200108', 611.2988888883, 626.3391427409272, 15.040253852627188),
 ('200109', 1070.8127777776, 1086.467581069567, 15.6548032919668),
 ('200110', 1650.9505555554, 1666.3242634440844, 15.373707888684521),
 ('200111', 2294.7255555549, 2278.9241549505273, -15.801400604372702),
 ('200112', 3247.2666666658, 3194.031401034806, -53.235265630994036),
 ('200201', 2389.0280555551, 2389.341148289127, 0.3130927340271228),
 ('200202', 2528.6277777773, 2558.447951681542, 29.820173904241983),
 ('200203', 3061.7599999992, 3025.0798388661965, -36.68016113300337),
 ('200204', 2883.5674999999, 2907.319359682602, 23.75185968270216),
 ('200205', 2346.690277777, 2330.704071820244, -15.986205956756294),
 ('200206', 3449.179166666, 3428.6641826657815, -20.514984000218647),
 ('200207', 3940.3791666667, 3906.458301315325, -33.92086535137469),
 ('200208', 6606.8424999998, 6496.0471124740425, -110.79538752575718),
 ('200209', 7715.0786111106, 7583.018513325324, -132.0600977852755),
 ('200210', 8276.67, 8158.161528761147, -118.50847123885342),
 ('200211', 7189.7413888888, 7091.091921563128, -98.64946732567205),
 ('200212', 7871.4427777773, 7776.564779923153, -94.877997854147),
 ('200301', 10944.490555555, 10853.15696652758, -91.3335890274193),
 ('200302', 9955.4522222218, 9863.049237451449, -92.4029847703514),
 ('200303', 10543.9205555554, 10434.249800319752, -109.67075523564745),
 ('200304', 10861.7352777772, 10710.135885799977, -151.5993919772227),
 ('200305', 13690.4913888884, 13524.61374627006, -165.87764261833945),
 ('200306', 14633.0630555548, 14485.25855926625, -147.80449628854876),
 ('200307', 16329.5541666666, 16175.593562395643, -153.96060427095654),
 ('200308', 19884.3949999993, 19659.480924957075, -224.91407504222298),
 ('200309', 18397.9644444444, 18230.290332451543, -167.67411199285561),
 ('200310', 19617.5511111111, 19437.06437490094, -180.48673621016133),
 ('200311', 23831.8824999994, 23536.446379629837, -295.43612036956256),
 ('200312', 24921.2922222219, 24535.14693503914, -386.1452871827569),
 ('200401', 28262.5280555552, 28067.671445076834, -194.85661047836766),
 ('200402', 38030.9602777777, 37562.28965457132, -468.6706232063807),
 ('200403', 51607.3174999993, 50752.22143800767, -855.0960619916295),
 ('200404', 49667.2227777774, 48997.70483304784, -669.5179447295595),
 ('200405', 50712.0927777777, 49968.80660530631, -743.2861724713948),
 ('200406', 53479.8547222213, 52282.49909589732, -1197.3556263239807),
 ('200407', 63662.375555555, 62498.17442147225, -1164.201134082752),
 ('200408', 70630.5180555549, 69479.42441525741, -1151.0936402974912),
 ('200409', 76303.9430555556, 74488.35515713181, -1815.5878984237934),
 ('200410', 83718.2911111104, 81572.19210194283, -2146.0990091675776),
 ('200411', 97902.3813888886, 94817.96496998757, -3084.4164189010335),
 ('200412', 105788.2049999993, 103035.96744896367, -2752.237551035636),
 ('200501', 103720.0488888884, 101705.51749088512, -2014.5313980032806),
 ('200502', 99614.7430555556, 97356.21179477684, -2258.5312607787637),
 ('200503', 126630.8169444437, 123797.94691359233, -2832.8700308513653),
 ('200504', 150483.5805555549, 145942.83403993933, -4540.746515615552),
 ('200505', 160726.9791666662, 155762.67076110092, -4964.308405565273),
 ('200506', 174243.4180555554, 168392.74040923812, -5850.677646317286),
 ('200507', 217503.5738888886, 210797.45979161703, -6706.11409727158),
 ('200508', 241259.3716666664, 234267.09370140685, -6992.277965259564),
 ('200509', 222823.4094444443, 216103.80230635783, -6719.60713808646),
 ('200510', 267085.0644444438, 257236.27515478295, -9848.78928966084),
 ('200511', 274049.2761111111, 263151.2882545303, -10897.987856580818),
 ('200512', 355766.9897222218, 339904.68463158025, -15862.305090641545),
 ('200601', 407848.1836111104, 390499.2701228481, -17348.913488262275),
 ('200602', 385581.7291666662, 368547.2132703631, -17034.515896303114),
 ('200603', 442278.0088888888, 422431.7411340184, -19846.26775487041),
 ('200604', 434804.4927777769, 413983.46515578736, -20821.027621989546),
 ('200605', 493688.3480555548, 468581.0633299756, -25107.2847255792),
 ('200606', 497394.8672222222, 473766.0150688928, -23628.85215332941),
 ('200607', 516355.271111111, 492599.350413467, -23755.920697644004),
 ('200608', 555216.4808333329, 528628.2374660121, -26588.24336732074),
 ('200609', 523538.3930555548, 496196.7431939656, -27341.64986158925),
 ('200610', 563435.9819444444, 531736.3454437958, -31699.636500648572),
 ('200611', 583672.696944444, 549973.6212467575, -33699.07569768652),
 ('200612', 582712.686111111, 550577.8505275298, -32134.835583581124),
 ('200701', 649641.5702777774, 614121.2237348456, -35520.346542931744),
 ('200702', 614661.7180555548, 578100.1120783982, -36561.60597715655),
 ('200703', 675951.9816666666, 634929.2859691939, -41022.69569747266),
 ('200704', 654943.2944444442, 615134.237615674, -39809.056828770204),
 ('200705', 651424.412777777, 611710.3754729445, -39714.037304832484),
 ('200706', 580514.0722222221, 547734.8128789112, -32779.25934331096),
 ('200707', 583049.7708333331, 551198.9669983553, -31850.803834977793),
 ('200708', 574859.7577777778, 544682.4048521894, -30177.352925588493),
 ('200709', 557581.7961111107, 524505.9730177806, -33075.82309333014),
 ('200710', 586774.1449999997, 550610.7014542617, -36163.443545738),
 ('200711', 549292.0644444439, 516768.93470321386, -32523.12974123005),
 ('200712', 523794.319722222, 495023.1084671259, -28771.211255096074),
 ('200801', 586037.0324999999, 552850.5928786511, -33186.43962134875),
 ('200802', 560004.9411111108, 526371.7168748226, -33633.224236288224),
 ('200803', 601731.686111111, 566322.5467349195, -35409.13937619142),
 ('200804', 583275.0847222212, 548509.8280411022, -34765.25668111909),
 ('200805', 568635.6094444436, 534363.0800281186, -34272.52941632504),
 ('200806', 529137.8913888884, 499994.8313856769, -29143.06000321143),
 ('200807', 538937.1374999993, 510910.0592358756, -28027.078264123644),
 ('200808', 537472.4772222221, 509979.2819318251, -27493.195290397038),
 ('200809', 522668.643611111, 492320.9255744572, -30347.718036653765),
 ('200810', 540981.1699999997, 507247.93826071144, -33733.231739288254),
 ('200811', 500769.3936111109, 471018.605630029, -29750.78798108193),
 ('200812', 495557.8958333324, 468457.77200728963, -27100.12382604275),
 ('200901', 552825.5530555547, 522645.09260431497, -30180.46045123978),
 ('200902', 512633.0483333332, 482930.5149817605, -29702.533351572696),
 ('200903', 554556.4672222214, 522592.2539731412, -31964.213249080232),
 ('200904', 518600.7674999992, 490296.9136785998, -28303.85382139939),
 ('200905', 522655.5377777773, 493956.0315627097, -28699.50621506758),
 ('200906', 503036.3322222217, 476761.40567732917, -26274.926544892543),
 ('200907', 500558.1674999994, 476293.1761042407, -24264.991395758698),
 ('200908', 504251.173888888, 480281.43754716264, -23969.736341725336),
 ('200909', 487068.2741666659, 461816.5784359744, -25251.695730691485),
 ('200910', 505313.388611111, 478237.38585998066, -27076.00275113032),
 ('200911', 486447.3166666659, 460508.91698953253, -25938.399677133362),
 ('200912', 469483.2933333332, 445889.96748843056, -23593.32584490266),
 ('201001', 517116.0322222218, 490602.11817108456, -26513.914051137224),
 ('201002', 479779.6336111108, 453753.49016050453, -26026.143450606265),
 ('201003', 508407.9066666658, 480034.583704796, -28373.322961869824),
 ('201004', 485284.915555555, 458230.70470841276, -27054.21084714227),
 ('201005', 487581.181666666, 461405.4466836296, -26175.73498303641),
 ('201006', 452792.3211111104, 430199.0205331129, -22593.300577997463),
 ('201007', 459806.8152777769, 437924.9147592128, -21881.9005185641),
 ('201008', 471826.7791666662, 449676.71319925966, -22150.065967406554),
 ('201009', 453737.9280555553, 430824.68658779, -22913.2414677653),
 ('201010', 461932.3527777777, 437673.1811820192, -24259.171595758526),
 ('201011', 443285.7194444442, 420620.2633058154, -22665.456138628826),
 ('201012', 433967.5419444439, 412955.4098729166, -21012.132071527303),
 ('201101', 486876.0613888883, 463383.37678383733, -23492.68460505095),
 ('201102', 434399.9716666662, 413443.32038268, -20956.651283986226),
 ('201103', 459643.195277777, 438407.238268712, -21235.957009064965),
 ('201104', 430744.116944444, 410680.35357333603, -20063.763371107983),
 ('201105', 435663.9408333333, 415725.0983166215, -19938.842516711797),
 ('201106', 422225.304166666, 402825.91834775463, -19399.385818911367),
 ('201107', 429121.2927777773, 410416.9108703785, -18704.381907398812),
 ('201108', 436751.8488888886, 417734.7118557551, -19017.137033133535),
 ('201109', 413102.7624999997, 394136.81860033126, -18965.943899668462),
 ('201110', 426082.5613888887, 406219.2914409691, -19863.269947919587),
 ('201111', 416036.5855555547, 396430.20530979894, -19606.380245755776),
 ('201112', 412143.6744444443, 393436.4722895308, -18707.20215491351),
 ('201201', 436081.7247222217, 416489.7968094597, -19591.92791276204),
 ('201202', 414983.6108333332, 395764.24670329713, -19219.36413003609),
 ('201203', 426114.4572222218, 406426.39819314796, -19688.059029073862),
 ('201204', 414060.848333333, 395521.4678057388, -18539.380527594185),
 ('201205', 419551.4547222218, 400319.1391022989, -19232.31561992294),
 ('201206', 395066.6905555554, 377615.63858383894, -17451.051971716457),
 ('201207', 414797.6369444436, 397413.9282833397, -17383.708661103912),
 ('201208', 416797.8461111104, 398947.82953969785, -17850.01657141256),
 ('201209', 386694.7466666666, 369447.1072879995, -17247.63937866711),
 ('201210', 407950.7299999997, 389070.28641918086, -18880.44358081883),
 ('201211', 393362.1849999998, 375751.53817471006, -17610.64682528976),
 ('201212', 392492.0927777778, 375816.80401925824, -16675.28875851957),
 ('201301', 427035.5322222218, 409114.60105194326, -17920.93117027852),
 ('201302', 377154.4763888887, 360744.54550795525, -16409.930880933476),
 ('201303', 401351.735277777, 383291.0521242151, -18060.6831535619),
 ('201304', 398380.7794444438, 379650.97859351576, -18729.800850928063),
 ('201305', 404413.0269444443, 384647.7228513861, -19765.304093058163),
 ('201306', 380333.5186111106, 362764.1686745916, -17569.349936518993),
 ('201307', 379908.3141666662, 364979.1790307188, -14929.135135947377),
 ('201308', 370535.1330555548, 356335.6401455926, -14199.492909962195),
 ('201309', 347327.1347222222, 333413.14350788045, -13913.991214341775),
 ('201310', 372922.3005555553, 356053.49589155766, -16868.804663997667),
 ('201311', 361044.3572222219, 343722.1882260444, -17322.168996177497),
 ('201312', 367039.3355555549, 350167.2395378805, -16872.09601767437),
 ('201401', 396085.9752777776, 378401.9933970662, -17683.98188071139),
 ('201402', 355716.9252777778, 339320.3525341568, -16396.572743620956),
 ('201403', 386300.6277777776, 368498.99826546124, -17801.629512316373),
 ('201404', 363431.8386111109, 346869.7163916571, -16562.122219453857),
 ('201405', 365616.409722222, 349109.919425, -16506.490297222044),
 ('201406', 344522.528055555, 329696.4324620858, -14826.095593469217),
 ('201407', 354717.4916666661, 340121.65304540895, -14595.838621257164),
 ('201408', 361725.0580555556, 346738.244361502, -14986.81369405362),
 ('201409', 342144.3552777773, 327486.31760734314, -14658.037670434162),
 ('201410', 359831.5216666663, 344377.1998077579, -15454.321858908399),
 ('201411', 349234.0413888884, 334251.50827523, -14982.53311365837),
 ('201412', 351009.9166666661, 335506.813892438, -15503.102774228086),
 ('201501', 379677.3319444441, 362635.3472465804, -17041.984697863692),
 ('201502', 350283.8472222214, 334083.9071535674, -16199.94006865396),
 ('201503', 385647.5883333329, 367421.32408926805, -18226.264244064863),
 ('201504', 370458.0711111111, 352377.89033636096, -18080.18077475013),
 ('201505', 374868.0916666659, 356849.59712848824, -18018.494538177678),
 ('201506', 362678.68, 344791.52025551035, -17887.159744489647),
 ('201507', 379586.5169444436, 361032.51360525814, -18554.003339185438)]
% matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

month, const_hours, avg_hours, diff_hours = zip(*labor_hours_totals)
i, month = zip(*enumerate(month))

plt.plot(i, np.array(diff_hours) / np.array(const_hours))
[<matplotlib.lines.Line2D at 0x7f6121f30f98>]

The plot above shows the monthly proportional difference between the measures. It looks like the "avg" strategy estimates a ~5% lower labor hour count than the "const" strategy. But it also looks like there is a trend here that roughly corresponds to the total number of labor hours (and users) active. The more users/sessions/hours, the larger the gap between the two measures.

Here's the plot of the total "const" labor hours.

plt.plot(i, const_hours)
[<matplotlib.lines.Line2D at 0x7f6128f53e10>]
from scipy.stats import pearsonr, spearmanr

pearsonr(np.array(diff_hours) / np.array(const_hours), const_hours), \
spearmanr(np.array(diff_hours) / np.array(const_hours), const_hours)
((-0.9418812276652424, 7.1221296831421012e-84),
 SpearmanrResult(correlation=-0.9410322436184505, pvalue=2.4063612862828253e-83))
month[60]
'200601'