#from http://pbpython.com/pandas-pivot-table-explained.html
import pandas as pd
import numpy as np
df = pd.read_excel("sales-funnel.xlsx")
df.head()
Account Name Rep Manager Product Quantity Price Status
0 714466 Trantow-Barrows Craig Booker Debra Henley CPU 1 30000 presented
1 714466 Trantow-Barrows Craig Booker Debra Henley Software 1 10000 presented
2 714466 Trantow-Barrows Craig Booker Debra Henley Maintenance 2 5000 pending
3 737550 Fritsch, Russel and Anderson Craig Booker Debra Henley CPU 1 35000 declined
4 146832 Kiehn-Spinka Daniel Hilton Debra Henley CPU 2 65000 won
df["Status"] = df["Status"].astype("category")
df["Status"].cat.set_categories(["won","pending","presented","declined"],inplace=True)
pd.pivot_table(df,index=["Name"])
Account Price Quantity
Name
Barton LLC 740150.0 35000.0 1.000000
Fritsch, Russel and Anderson 737550.0 35000.0 1.000000
Herman LLC 141962.0 65000.0 2.000000
Jerde-Hilpert 412290.0 5000.0 2.000000
Kassulke, Ondricka and Metz 307599.0 7000.0 3.000000
Keeling LLC 688981.0 100000.0 5.000000
Kiehn-Spinka 146832.0 65000.0 2.000000
Koepp Ltd 729833.0 35000.0 2.000000
Kulas Inc 218895.0 25000.0 1.500000
Purdy-Kunde 163416.0 30000.0 1.000000
Stokes LLC 239344.0 7500.0 1.000000
Trantow-Barrows 714466.0 15000.0 1.333333
pd.pivot_table(df,index=["Name","Rep","Manager"])
Account Price Quantity
Name Rep Manager
Barton LLC John Smith Debra Henley 740150.0 35000.0 1.000000
Fritsch, Russel and Anderson Craig Booker Debra Henley 737550.0 35000.0 1.000000
Herman LLC Cedric Moss Fred Anderson 141962.0 65000.0 2.000000
Jerde-Hilpert John Smith Debra Henley 412290.0 5000.0 2.000000
Kassulke, Ondricka and Metz Wendy Yule Fred Anderson 307599.0 7000.0 3.000000
Keeling LLC Wendy Yule Fred Anderson 688981.0 100000.0 5.000000
Kiehn-Spinka Daniel Hilton Debra Henley 146832.0 65000.0 2.000000
Koepp Ltd Wendy Yule Fred Anderson 729833.0 35000.0 2.000000
Kulas Inc Daniel Hilton Debra Henley 218895.0 25000.0 1.500000
Purdy-Kunde Cedric Moss Fred Anderson 163416.0 30000.0 1.000000
Stokes LLC Cedric Moss Fred Anderson 239344.0 7500.0 1.000000
Trantow-Barrows Craig Booker Debra Henley 714466.0 15000.0 1.333333
pd.pivot_table(df, index=["Manager","Rep"])
Account Price Quantity
Manager Rep
Debra Henley Craig Booker 720237.0 20000.000000 1.250000
Daniel Hilton 194874.0 38333.333333 1.666667
John Smith 576220.0 20000.000000 1.500000
Fred Anderson Cedric Moss 196016.5 27500.000000 1.250000
Wendy Yule 614061.5 44250.000000 3.000000
pd.pivot_table(df,index=["Manager","Rep"],values=["Price"])
Price
Manager Rep
Debra Henley Craig Booker 20000
Daniel Hilton 38333
John Smith 20000
Fred Anderson Cedric Moss 27500
Wendy Yule 44250
pd.pivot_table(df,index=["Manager","Rep"],values=["Price"],aggfunc=np.sum)
Price
Manager Rep
Debra Henley Craig Booker 80000
Daniel Hilton 115000
John Smith 40000
Fred Anderson Cedric Moss 110000
Wendy Yule 177000
pd.pivot_table(df,index=["Manager","Rep"],values=["Price"],aggfunc=[np.mean,len])
mean len
Price Price
Manager Rep
Debra Henley Craig Booker 20000 4
Daniel Hilton 38333 3
John Smith 20000 2
Fred Anderson Cedric Moss 27500 4
Wendy Yule 44250 4
pd.pivot_table(df,index=["Manager","Rep"],columns=["Product"],values=["Price"],aggfunc=np.sum)
Price
Product CPU Maintenance Monitor Software
Manager Rep
Debra Henley Craig Booker 65000.0 5000.0 NaN 10000.0
Daniel Hilton 105000.0 NaN NaN 10000.0
John Smith 35000.0 5000.0 NaN NaN
Fred Anderson Cedric Moss 95000.0 5000.0 NaN 10000.0
Wendy Yule 165000.0 7000.0 5000.0 NaN
pd.pivot_table(df,index=["Manager","Rep"],columns=["Product"],values=["Price"],aggfunc=np.sum,fill_value=0)
Price
Product CPU Maintenance Monitor Software
Manager Rep
Debra Henley Craig Booker 65000 5000 0 10000
Daniel Hilton 105000 0 0 10000
John Smith 35000 5000 0 0
Fred Anderson Cedric Moss 95000 5000 0 10000
Wendy Yule 165000 7000 5000 0
pd.pivot_table(df,index=["Manager","Rep"],columns=["Product"],
               values=["Price","Quantity"],
               aggfunc=[np.sum],
               fill_value=0)
sum
Price Quantity
Product CPU Maintenance Monitor Software CPU Maintenance Monitor Software
Manager Rep
Debra Henley Craig Booker 65000 5000 0 10000 2 2 0 1
Daniel Hilton 105000 0 0 10000 4 0 0 1
John Smith 35000 5000 0 0 1 2 0 0
Fred Anderson Cedric Moss 95000 5000 0 10000 3 1 0 1
Wendy Yule 165000 7000 5000 0 7 3 2 0
pd.pivot_table(df,index=["Manager","Rep","Product"],
               values=["Price","Quantity"],
               aggfunc=[np.sum],
               fill_value=0)
sum
Price Quantity
Manager Rep Product
Debra Henley Craig Booker CPU 65000 2
Maintenance 5000 2
Software 10000 1
Daniel Hilton CPU 105000 4
Software 10000 1
John Smith CPU 35000 1
Maintenance 5000 2
Fred Anderson Cedric Moss CPU 95000 3
Maintenance 5000 1
Software 10000 1
Wendy Yule CPU 165000 7
Maintenance 7000 3
Monitor 5000 2
pd.pivot_table(df,index=["Manager","Rep","Product"],
               values=["Price","Quantity"],
               aggfunc=[np.sum,np.mean],
               fill_value=0,
              margins=True)
sum mean
Price Quantity Price Quantity
Manager Rep Product
Debra Henley Craig Booker CPU 65000.0 2.0 32500.000000 1.000000
Maintenance 5000.0 2.0 5000.000000 2.000000
Software 10000.0 1.0 10000.000000 1.000000
Daniel Hilton CPU 105000.0 4.0 52500.000000 2.000000
Software 10000.0 1.0 10000.000000 1.000000
John Smith CPU 35000.0 1.0 35000.000000 1.000000
Maintenance 5000.0 2.0 5000.000000 2.000000
Fred Anderson Cedric Moss CPU 95000.0 3.0 47500.000000 1.500000
Maintenance 5000.0 1.0 5000.000000 1.000000
Software 10000.0 1.0 10000.000000 1.000000
Wendy Yule CPU 165000.0 7.0 82500.000000 3.500000
Maintenance 7000.0 3.0 7000.000000 3.000000
Monitor 5000.0 2.0 5000.000000 2.000000
All 522000.0 30.0 30705.882353 1.764706
#note: I want to use categoricals in some of my data analysis
#that involves text responses. see: http://pandas.pydata.org/pandas-docs/stable/categorical.html
pd.pivot_table(df,index=["Manager","Status"],
               values=["Price"],
               aggfunc=[np.sum],
               fill_value=0,
              margins=True)
sum
Price
Manager Status
Debra Henley won 65000.0
pending 50000.0
presented 50000.0
declined 70000.0
Fred Anderson won 172000.0
pending 5000.0
presented 45000.0
declined 65000.0
All 522000.0
pd.pivot_table(df,index=["Manager","Status"],
               columns=["Product"],
               values=["Quantity","Price"],
                aggfunc={"Quantity":len,"Price":np.sum},
               fill_value=0)
Quantity Price
Product CPU Maintenance Monitor Software CPU Maintenance Monitor Software
Manager Status
Debra Henley won 1 0 0 0 65000 0 0 0
pending 1 2 0 0 40000 10000 0 0
presented 1 0 0 2 30000 0 0 20000
declined 2 0 0 0 70000 0 0 0
Fred Anderson won 2 1 0 0 165000 7000 0 0
pending 0 1 0 0 0 5000 0 0
presented 1 0 1 1 30000 0 5000 10000
declined 1 0 0 0 65000 0 0 0
#now just a simpler example. I want it to be indexed by manager and status, but not by product
pd.pivot_table(df,index=["Manager","Status"],
#                columns=["Product"],
               values=["Quantity",],
                aggfunc={"Quantity":len,},
               fill_value=0)
Quantity
Manager Status
Debra Henley won 1
pending 3
presented 3
declined 2
Fred Anderson won 3
pending 1
presented 3
declined 1