debug = False # prints debug output if True


import sys
!{sys.executable} -m pip install pgeocode # install pgeocode using pip, since this environment doesn't have it by default

Requirement already satisfied: pgeocode in /opt/conda/lib/python3.9/site-packages (0.3.0)
Requirement already satisfied: requests in /opt/conda/lib/python3.9/site-packages (from pgeocode) (2.27.1)
Requirement already satisfied: pandas in /opt/conda/lib/python3.9/site-packages (from pgeocode) (1.4.0)
Requirement already satisfied: numpy in /opt/conda/lib/python3.9/site-packages (from pgeocode) (1.21.5)
Requirement already satisfied: python-dateutil>=2.8.1 in /opt/conda/lib/python3.9/site-packages (from pandas->pgeocode) (2.8.2)
Requirement already satisfied: pytz>=2020.1 in /opt/conda/lib/python3.9/site-packages (from pandas->pgeocode) (2021.3)
Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.9/site-packages (from requests->pgeocode) (2021.10.8)
Requirement already satisfied: charset-normalizer~=2.0.0 in /opt/conda/lib/python3.9/site-packages (from requests->pgeocode) (2.0.10)
Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.9/site-packages (from requests->pgeocode) (3.3)
Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.9/site-packages (from requests->pgeocode) (1.26.8)
Requirement already satisfied: six>=1.5 in /opt/conda/lib/python3.9/site-packages (from python-dateutil>=2.8.1->pandas->pgeocode) (1.16.0)


import pandas as pd
import numpy as np
import folium
import seaborn as sns
import matplotlib as plt
from matplotlib import pyplot
from datetime import datetime
import pgeocode
import requests
from bs4 import BeautifulSoup
import re
import scipy


data = pd.read_csv('superstore.csv', sep = ',', encoding='windows-1252')
data.head()


data.dropna(inplace = True)


def to_datetime(row, target_col):
    dt = datetime.strptime(row[target_col], '%m/%d/%Y') # expected format
    return dt
data['Order Datetime'] = data.apply(lambda row: to_datetime(row, 'Order Date'), axis=1)
data['Ship Datetime'] = data.apply(lambda row: to_datetime(row, 'Ship Date'), axis=1)
data.drop(['Order Date', 'Ship Date'], axis = 1, inplace = True)
data.head()


def get_base_price(row):
    return row['Sales'] / (1 - row['Discount']) / row['Quantity'] # if there were ever 100% discounts, we would have a problem here, but there aren't

def get_sale_price(row):
    return row['Sales'] / row['Quantity']

def get_per_item_profit(row):
    return row['Profit'] / row['Quantity']

data['Base Price'] = data.apply(lambda row: get_base_price(row), axis=1)
data['Sale Price'] = data.apply(lambda row: get_sale_price(row), axis=1)
data['Profit Per Item'] = data.apply(lambda row: get_per_item_profit(row), axis=1)

data.sort_values(by = 'Profit Per Item', inplace = True, ascending = False) # we sort for the highest profit per item

data


neg_profit = data[data['Profit'] < 0] # select entries where profit is negative
neg_profit = neg_profit.sort_values(by = 'Profit', ascending = True) # display the biggest losses first
neg_profit


neg_profit = neg_profit.sort_values(by = 'Profit Per Item', ascending = True) # display the biggest losses first
neg_profit


neg_profit2 = neg_profit[neg_profit['Discount'] == 0] # filter for discount = 0
neg_profit2


fig, ax = plt.pyplot.subplots()

sns.violinplot(x = 'Ship Mode', y = 'Profit Per Item', data=data, ax = ax)
ax.set_xlabel("Ship Mode")
ax.set_ylabel("Profit Per Item")

Text(0, 0.5, 'Profit Per Item')


no_outliers = data[data['Profit Per Item'] < 500]
no_outliers = no_outliers[no_outliers['Profit Per Item'] > -500]

fig, ax = plt.pyplot.subplots()

sns.violinplot(x = 'Ship Mode', y = 'Profit Per Item', data=no_outliers, ax = ax)
ax.set_xlabel('Ship Mode')
ax.set_ylabel('Profit Per Item')

Text(0, 0.5, 'Profit Per Item')


data[data['Ship Mode'] == 'Standard Class'].hist(column = 'Profit Per Item', bins = 100)

array([[<AxesSubplot:title={'center':'Profit Per Item'}>]], dtype=object)


data[data['Ship Mode'] == 'First Class'].hist(column = 'Profit Per Item', bins = 100)

array([[<AxesSubplot:title={'center':'Profit Per Item'}>]], dtype=object)


data[data['Ship Mode'] == 'Second Class'].hist(column = 'Profit Per Item', bins = 100)

array([[<AxesSubplot:title={'center':'Profit Per Item'}>]], dtype=object)


data[data['Ship Mode'] == 'Same Day'].hist(column = 'Profit Per Item', bins = 100)

array([[<AxesSubplot:title={'center':'Profit Per Item'}>]], dtype=object)


pruned_data = data.drop(['Row ID', 'Base Price', 'Sale Price', 'Region', 'Product ID', 'Category', 'Sub-Category', 'Order Datetime', 'Ship Datetime', 'Order ID', 'Ship Mode', 'Customer Name', 'Customer ID', 'Segment', 'Country', 'City', 'State', 'Postal Code'], axis = 1)
grouped_data = pruned_data.groupby('Product Name', as_index = False).agg({'Sales':'sum', 'Quantity':'sum','Profit':'sum', 'Discount':'mean', 'Profit Per Item':'mean'}) # we separate the dataframe into groups by product name and sum the results, but take the mean discount
grouped_data['Profit Per Item'] = grouped_data.apply(lambda row: get_per_item_profit(row), axis=1) # update profit per item since adding it together doesnt work
grouped_data['Profit Percent'] = grouped_data.apply(lambda row: row['Profit'] / row['Sales'] * 100, axis=1)
grouped_data.sort_values(by = 'Profit Percent', inplace = True, ascending = False)

grouped_data


grouped_data.sort_values(by = 'Quantity', inplace = True, ascending = False)

grouped_data


grouped_data.sort_values(by = 'Profit', inplace = True, ascending = False)

grouped_data


pruned_data = data.drop(['Row ID', 'Base Price', 'Sale Price', 'Product Name', 'Region', 'Product ID', 'Category', 'Sub-Category', 'Order Datetime', 'Ship Datetime', 'Order ID', 'Ship Mode', 'Customer Name', 'Customer ID', 'Segment', 'Country', 'City', 'State'], axis = 1)
grouped_data2 = pruned_data.groupby('Postal Code', as_index = False).agg({'Sales':'sum', 'Quantity':'sum','Profit':'sum', 'Discount':'mean', 'Profit Per Item':'mean'}) # we separate the dataframe into groups by postal code
grouped_data2['Profit Per Item'] = grouped_data2.apply(lambda row: get_per_item_profit(row), axis=1) # update profit per item since adding it together doesnt work
grouped_data2['Profit Percent'] = grouped_data2.apply(lambda row: row['Profit'] / row['Sales'] * 100, axis=1)
grouped_data2.sort_values(by = 'Quantity', inplace = True, ascending = False)

grouped_data2


print(data['Country'].unique()) # the only country in this datset is the US

['United States']


def sales_circle_size(sales): # two functions to help determine how large the plotted points will be based on the number of sales
    if sales <= 1:
        return 1
    return sales ** 0.5/ 10

def profit_circle_size(profit):
    if profit <= 1:
        return 1
    return profit ** 0.5 / 10


map_osm = folium.Map(location=[36.84, -90.3], zoom_start=4)

code_to_loc = {}
nomi = pgeocode.Nominatim('us') # use us postal codes
for code, row in grouped_data2.iterrows():
    loc = nomi.query_postal_code('{:05}'.format(int(row[0])))
    if(np.isnan(loc.latitude)):
        if(debug):
            print(loc)
        continue
    code_to_loc[int(row[0])] = [loc.latitude, loc.longitude]
    if row['Sales'] > 0:
        folium.CircleMarker(location = [loc.latitude, loc.longitude], radius = sales_circle_size(row['Sales']), weight = 0.5, color = "blue", fill = True).add_to(map_osm)
    elif debug:
        print("0 sales?")
    if row['Profit'] > 0:
        folium.CircleMarker(location = [loc.latitude, loc.longitude], radius = profit_circle_size(row['Profit']), weight = 2, color = "green", fill = True).add_to(map_osm)
    else:
        folium.CircleMarker(location = [loc.latitude, loc.longitude], radius = profit_circle_size(-1 * row['Profit']), weight = 1, color = "red", fill = True).add_to(map_osm)

map_osm


full_port_set = False # toggle for using full set of available US ports


def parse_coord(s):
    s = strip_td(s)
    coords = re.split('°|\'', s)
    val = float(coords[0]) + float(coords[1]) / 60.0 # process degrees and minutes
    if coords[2] == 'W' or coords[2] == 'S':
        val = val * -1
    return val

def strip_td(s):
    s = str(s)
    s = s[4:len(s) - 5]
    return s

target_ports = ['Miami', 'Oakland', 'Charleston', 'Georgetown', 'Houston', 'Norfolk', 'Seattle', 'Tacoma', 'Savannah', 'Brunswick', 'Newark', 'Brooklyn', 'Long Beach', 'Los Angeles']
target_coords = []
port_data = {}
for i in range(1, 10): # 9 pages
    # this websites SSL certificate is expired so we have to disable verification if we don't want to do it manually
    res = requests.get('https://www.gccports.com/ports/latitude-longitude/search/?n=&c=231&cd=&per_page=' + str(i), verify = False)
    root = BeautifulSoup(res.content, 'html.parser') # parse html using beautiful soup
    table = root.find('table') # find the table of ports
    ports = table.findAll('td') # find all the rows containing data
    for i in range(0, len(ports))[::5]: # row are 5 elements long, position 0 contains name, 3 and 4 contains latitude
        port_data[strip_td(ports[i])] = [parse_coord(ports[i + 3]), parse_coord(ports[i + 4])]
if debug:
    print(port_data)
for port in (port_data.keys() if full_port_set else target_ports):
    if port in port_data.keys():
        target_coords.append(port_data[port])
    else:
        print('Could not find port ' + port)
if debug:
    print(target_coords)

/opt/conda/lib/python3.9/site-packages/urllib3/connectionpool.py:1043: InsecureRequestWarning: Unverified HTTPS request is being made to host 'www.gccports.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
  warnings.warn(
/opt/conda/lib/python3.9/site-packages/urllib3/connectionpool.py:1043: InsecureRequestWarning: Unverified HTTPS request is being made to host 'www.gccports.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
  warnings.warn(
/opt/conda/lib/python3.9/site-packages/urllib3/connectionpool.py:1043: InsecureRequestWarning: Unverified HTTPS request is being made to host 'www.gccports.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
  warnings.warn(
/opt/conda/lib/python3.9/site-packages/urllib3/connectionpool.py:1043: InsecureRequestWarning: Unverified HTTPS request is being made to host 'www.gccports.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
  warnings.warn(
/opt/conda/lib/python3.9/site-packages/urllib3/connectionpool.py:1043: InsecureRequestWarning: Unverified HTTPS request is being made to host 'www.gccports.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
  warnings.warn(
/opt/conda/lib/python3.9/site-packages/urllib3/connectionpool.py:1043: InsecureRequestWarning: Unverified HTTPS request is being made to host 'www.gccports.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
  warnings.warn(
/opt/conda/lib/python3.9/site-packages/urllib3/connectionpool.py:1043: InsecureRequestWarning: Unverified HTTPS request is being made to host 'www.gccports.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
  warnings.warn(
/opt/conda/lib/python3.9/site-packages/urllib3/connectionpool.py:1043: InsecureRequestWarning: Unverified HTTPS request is being made to host 'www.gccports.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
  warnings.warn(
/opt/conda/lib/python3.9/site-packages/urllib3/connectionpool.py:1043: InsecureRequestWarning: Unverified HTTPS request is being made to host 'www.gccports.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
  warnings.warn(

Could not find port Oakland
Could not find port Georgetown
Could not find port Brunswick
Could not find port Newark
Could not find port Brooklyn


def distance(a, b):
    return ((a[0] - b[0]) ** 2 + (a[1] - b[1]) ** 2) ** 0.5

def best_distance(loc):
    best_distance = distance(loc, target_coords[0])
    best_port = target_coords[0]
    for target in target_coords:
        dis = distance(loc, target)
        if dis < best_distance:
            best_distance = dis
            best_port = target
    return best_distance

grouped_data2['Port Distance'] = grouped_data2.apply(lambda row: best_distance(code_to_loc[int(row[0])]), axis=1)
grouped_data2.sort_values(by = 'Port Distance', inplace = True)
grouped_data2


m = np.polyfit(grouped_data2["Port Distance"], grouped_data2["Profit Per Item"], 1)
p = np.poly1d(m)
grouped_data2["trendline"] = p(grouped_data2.loc[:, "Port Distance"])

ax = grouped_data2.plot(x = "Port Distance", y = "Profit Per Item", kind = "scatter")
dfm2 = grouped_data2.set_index("Port Distance", inplace=False)
dfm2.trendline.sort_index(ascending=False).plot(ax=ax)

<AxesSubplot:xlabel='Port Distance', ylabel='Profit Per Item'>


slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(grouped_data2['Port Distance'], grouped_data2['Profit Per Item'])
p_value

0.28374378416163254


slope

0.20621553633367354


slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(grouped_data2['Port Distance'], grouped_data2['Sales'])
p_value

0.0010136492863376115


slope

-246.3898496821685


ax = grouped_data2.plot(x = "Port Distance", y = "Sales", kind = "scatter")
dfm2 = grouped_data2.set_index("Port Distance", inplace=False)

	Row ID	Order ID	Order Date	Ship Date	Ship Mode	Customer ID	Customer Name	Segment	Country	City	...	Postal Code	Region	Product ID	Category	Sub-Category	Product Name	Sales	Quantity	Discount	Profit
0	1	CA-2016-152156	11/8/2016	11/11/2016	Second Class	CG-12520	Claire Gute	Consumer	United States	Henderson	...	42420	South	FUR-BO-10001798	Furniture	Bookcases	Bush Somerset Collection Bookcase	261.9600	2	0.00	41.9136
1	2	CA-2016-152156	11/8/2016	11/11/2016	Second Class	CG-12520	Claire Gute	Consumer	United States	Henderson	...	42420	South	FUR-CH-10000454	Furniture	Chairs	Hon Deluxe Fabric Upholstered Stacking Chairs,...	731.9400	3	0.00	219.5820
2	3	CA-2016-138688	6/12/2016	6/16/2016	Second Class	DV-13045	Darrin Van Huff	Corporate	United States	Los Angeles	...	90036	West	OFF-LA-10000240	Office Supplies	Labels	Self-Adhesive Address Labels for Typewriters b...	14.6200	2	0.00	6.8714
3	4	US-2015-108966	10/11/2015	10/18/2015	Standard Class	SO-20335	Sean O'Donnell	Consumer	United States	Fort Lauderdale	...	33311	South	FUR-TA-10000577	Furniture	Tables	Bretford CR4500 Series Slim Rectangular Table	957.5775	5	0.45	-383.0310
4	5	US-2015-108966	10/11/2015	10/18/2015	Standard Class	SO-20335	Sean O'Donnell	Consumer	United States	Fort Lauderdale	...	33311	South	OFF-ST-10000760	Office Supplies	Storage	Eldon Fold 'N Roll Cart System	22.3680	2	0.20	2.5164

	Row ID	Order ID	Ship Mode	Customer ID	Customer Name	Segment	Country	City	State	Postal Code	...	Product ID	Category	Sub-Category	Product Name	Sales	Quantity	Discount	Profit	Order Datetime	Ship Datetime
0	1	CA-2016-152156	Second Class	CG-12520	Claire Gute	Consumer	United States	Henderson	Kentucky	42420	...	FUR-BO-10001798	Furniture	Bookcases	Bush Somerset Collection Bookcase	261.9600	2	0.00	41.9136	2016-11-08	2016-11-11
1	2	CA-2016-152156	Second Class	CG-12520	Claire Gute	Consumer	United States	Henderson	Kentucky	42420	...	FUR-CH-10000454	Furniture	Chairs	Hon Deluxe Fabric Upholstered Stacking Chairs,...	731.9400	3	0.00	219.5820	2016-11-08	2016-11-11
2	3	CA-2016-138688	Second Class	DV-13045	Darrin Van Huff	Corporate	United States	Los Angeles	California	90036	...	OFF-LA-10000240	Office Supplies	Labels	Self-Adhesive Address Labels for Typewriters b...	14.6200	2	0.00	6.8714	2016-06-12	2016-06-16
3	4	US-2015-108966	Standard Class	SO-20335	Sean O'Donnell	Consumer	United States	Fort Lauderdale	Florida	33311	...	FUR-TA-10000577	Furniture	Tables	Bretford CR4500 Series Slim Rectangular Table	957.5775	5	0.45	-383.0310	2015-10-11	2015-10-18
4	5	US-2015-108966	Standard Class	SO-20335	Sean O'Donnell	Consumer	United States	Fort Lauderdale	Florida	33311	...	OFF-ST-10000760	Office Supplies	Storage	Eldon Fold 'N Roll Cart System	22.3680	2	0.20	2.5164	2015-10-11	2015-10-18

	Row ID	Order ID	Ship Mode	Customer ID	Customer Name	Segment	Country	City	State	Postal Code	...	Product Name	Sales	Quantity	Discount	Profit	Order Datetime	Ship Datetime	Base Price	Sale Price	Profit Per Item
6826	6827	CA-2016-118689	Standard Class	TC-20980	Tamara Chand	Corporate	United States	Lafayette	Indiana	47905	...	Canon imageCLASS 2200 Advanced Copier	17499.950	5	0.0	8399.9760	2016-10-02	2016-10-09	3499.99	3499.990	1679.9952
4190	4191	CA-2017-166709	Standard Class	HL-15040	Hunter Lopez	Consumer	United States	Newark	Delaware	19711	...	Canon imageCLASS 2200 Advanced Copier	10499.970	3	0.0	5039.9856	2017-11-17	2017-11-22	3499.99	3499.990	1679.9952
8153	8154	CA-2017-140151	First Class	RB-19360	Raymond Buch	Consumer	United States	Seattle	Washington	98115	...	Canon imageCLASS 2200 Advanced Copier	13999.960	4	0.0	6719.9808	2017-03-23	2017-03-25	3499.99	3499.990	1679.9952
318	319	CA-2014-164973	Standard Class	NM-18445	Nathan Mautz	Home Office	United States	New York City	New York	10024	...	Canon imageCLASS MF7460 Monochrome Digital Las...	3991.980	2	0.0	1995.9900	2014-11-04	2014-11-09	1995.99	1995.990	997.9950
2623	2624	CA-2017-127180	First Class	TA-21385	Tom Ashbrook	Home Office	United States	New York City	New York	10024	...	Canon imageCLASS 2200 Advanced Copier	11199.968	4	0.2	3919.9888	2017-10-22	2017-10-24	3499.99	2799.992	979.9972
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
4991	4992	US-2017-122714	Standard Class	HG-14965	Henry Goldwyn	Corporate	United States	Chicago	Illinois	60653	...	Ibico EPK-21 Electric Binding System	1889.990	5	0.8	-2929.4845	2017-12-07	2017-12-13	1889.99	377.998	-585.8969
3011	3012	CA-2017-134845	Standard Class	SR-20425	Sharelle Roach	Home Office	United States	Louisville	Colorado	80027	...	Lexmark MX611dhe Monochrome Laser Printer	2549.985	5	0.7	-3399.9800	2017-04-17	2017-04-23	1699.99	509.997	-679.9960
683	684	US-2017-168116	Same Day	GT-14635	Grant Thornton	Corporate	United States	Burlington	North Carolina	27217	...	Cubify CubeX 3D Printer Triple Head Print	7999.980	4	0.5	-3839.9904	2017-11-04	2017-11-04	3999.99	1999.995	-959.9976
7772	7773	CA-2016-108196	Standard Class	CS-12505	Cindy Stewart	Consumer	United States	Lancaster	Ohio	43130	...	Cubify CubeX 3D Printer Double Head Print	4499.985	5	0.7	-6599.9780	2016-11-25	2016-12-02	2999.99	899.997	-1319.9956
3151	3152	CA-2015-147830	First Class	NF-18385	Natalie Fritzler	Consumer	United States	Newark	Ohio	43055	...	Cubify CubeX 3D Printer Double Head Print	1799.994	2	0.7	-2639.9912	2015-12-15	2015-12-18	2999.99	899.997	-1319.9956

	Row ID	Order ID	Ship Mode	Customer ID	Customer Name	Segment	Country	City	State	Postal Code	...	Product Name	Sales	Quantity	Discount	Profit	Order Datetime	Ship Datetime	Base Price	Sale Price	Profit Per Item
7772	7773	CA-2016-108196	Standard Class	CS-12505	Cindy Stewart	Consumer	United States	Lancaster	Ohio	43130	...	Cubify CubeX 3D Printer Double Head Print	4499.985	5	0.7	-6599.9780	2016-11-25	2016-12-02	2999.99	899.997	-1319.9956
683	684	US-2017-168116	Same Day	GT-14635	Grant Thornton	Corporate	United States	Burlington	North Carolina	27217	...	Cubify CubeX 3D Printer Triple Head Print	7999.980	4	0.5	-3839.9904	2017-11-04	2017-11-04	3999.99	1999.995	-959.9976
9774	9775	CA-2014-169019	Standard Class	LF-17185	Luke Foster	Consumer	United States	San Antonio	Texas	78207	...	GBC DocuBind P400 Electric Binding System	2177.584	8	0.8	-3701.8928	2014-07-26	2014-07-30	1360.99	272.198	-462.7366
3011	3012	CA-2017-134845	Standard Class	SR-20425	Sharelle Roach	Home Office	United States	Louisville	Colorado	80027	...	Lexmark MX611dhe Monochrome Laser Printer	2549.985	5	0.7	-3399.9800	2017-04-17	2017-04-23	1699.99	509.997	-679.9960
4991	4992	US-2017-122714	Standard Class	HG-14965	Henry Goldwyn	Corporate	United States	Chicago	Illinois	60653	...	Ibico EPK-21 Electric Binding System	1889.990	5	0.8	-2929.4845	2017-12-07	2017-12-13	1889.99	377.998	-585.8969
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
4660	4661	CA-2015-112130	Standard Class	SV-20785	Stewart Visinsky	Consumer	United States	Philadelphia	Pennsylvania	19140	...	SanDisk Cruzer 16 GB USB Flash Drive	27.552	3	0.2	-0.3444	2015-05-03	2015-05-07	11.48	9.184	-0.1148
7413	7414	CA-2017-121125	Standard Class	MG-17890	Michael Granlund	Home Office	United States	Tigard	Oregon	97224	...	Tensor Brushed Steel Torchiere Floor Lamp	13.592	1	0.2	-0.3398	2017-05-30	2017-06-03	16.99	13.592	-0.3398
1566	1567	CA-2015-129112	First Class	AW-10840	Anthony Witt	Consumer	United States	Allen	Texas	75002	...	Kingston Digital DataTraveler 16GB USB 2.0	21.480	3	0.2	-0.2685	2015-11-29	2015-11-30	8.95	7.160	-0.0895
1496	1497	CA-2017-152485	Standard Class	JD-15790	John Dryer	Consumer	United States	Coppell	Texas	75019	...	Acco Perma 3000 Stacking Storage Drawers	16.784	1	0.2	-0.2098	2017-09-04	2017-09-08	20.98	16.784	-0.2098
3228	3229	CA-2014-108189	First Class	ES-14080	Erin Smith	Corporate	United States	Tempe	Arizona	85281	...	Kingston Digital DataTraveler 16GB USB 2.0	7.160	1	0.2	-0.0895	2014-10-02	2014-10-05	8.95	7.160	-0.0895

	Row ID	Order ID	Ship Mode	Customer ID	Customer Name	Segment	Country	City	State	Postal Code	...	Product Name	Sales	Quantity	Discount	Profit	Order Datetime	Ship Datetime	Base Price	Sale Price	Profit Per Item
7772	7773	CA-2016-108196	Standard Class	CS-12505	Cindy Stewart	Consumer	United States	Lancaster	Ohio	43130	...	Cubify CubeX 3D Printer Double Head Print	4499.985	5	0.7	-6599.9780	2016-11-25	2016-12-02	2999.99	899.997	-1319.9956
3151	3152	CA-2015-147830	First Class	NF-18385	Natalie Fritzler	Consumer	United States	Newark	Ohio	43055	...	Cubify CubeX 3D Printer Double Head Print	1799.994	2	0.7	-2639.9912	2015-12-15	2015-12-18	2999.99	899.997	-1319.9956
683	684	US-2017-168116	Same Day	GT-14635	Grant Thornton	Corporate	United States	Burlington	North Carolina	27217	...	Cubify CubeX 3D Printer Triple Head Print	7999.980	4	0.5	-3839.9904	2017-11-04	2017-11-04	3999.99	1999.995	-959.9976
3011	3012	CA-2017-134845	Standard Class	SR-20425	Sharelle Roach	Home Office	United States	Louisville	Colorado	80027	...	Lexmark MX611dhe Monochrome Laser Printer	2549.985	5	0.7	-3399.9800	2017-04-17	2017-04-23	1699.99	509.997	-679.9960
4991	4992	US-2017-122714	Standard Class	HG-14965	Henry Goldwyn	Corporate	United States	Chicago	Illinois	60653	...	Ibico EPK-21 Electric Binding System	1889.990	5	0.8	-2929.4845	2017-12-07	2017-12-13	1889.99	377.998	-585.8969
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
4660	4661	CA-2015-112130	Standard Class	SV-20785	Stewart Visinsky	Consumer	United States	Philadelphia	Pennsylvania	19140	...	SanDisk Cruzer 16 GB USB Flash Drive	27.552	3	0.2	-0.3444	2015-05-03	2015-05-07	11.48	9.184	-0.1148
1566	1567	CA-2015-129112	First Class	AW-10840	Anthony Witt	Consumer	United States	Allen	Texas	75002	...	Kingston Digital DataTraveler 16GB USB 2.0	21.480	3	0.2	-0.2685	2015-11-29	2015-11-30	8.95	7.160	-0.0895
8881	8882	CA-2016-135594	Second Class	AH-10120	Adrian Hane	Home Office	United States	Aurora	Illinois	60505	...	Kingston Digital DataTraveler 16GB USB 2.0	50.120	7	0.2	-0.6265	2016-06-30	2016-07-03	8.95	7.160	-0.0895
2769	2770	US-2015-122140	Standard Class	MO-17950	Michael Oakman	Consumer	United States	Dallas	Texas	75220	...	Kingston Digital DataTraveler 16GB USB 2.0	50.120	7	0.2	-0.6265	2015-04-02	2015-04-07	8.95	7.160	-0.0895
3228	3229	CA-2014-108189	First Class	ES-14080	Erin Smith	Corporate	United States	Tempe	Arizona	85281	...	Kingston Digital DataTraveler 16GB USB 2.0	7.160	1	0.2	-0.0895	2014-10-02	2014-10-05	8.95	7.160	-0.0895

	Product Name	Sales	Quantity	Profit	Discount	Profit Per Item	Profit Percent
109	Adams Telephone Message Book w/Frequently-Call...	223.440	28	111.7200	0.0	3.990000	50.000000
405	Canon imageCLASS MF7460 Monochrome Digital Las...	3991.980	2	1995.9900	0.0	997.995000	50.000000
181	Avery 475	266.400	18	133.2000	0.0	7.400000	50.000000
1673	Xerox 1890	244.700	5	122.3500	0.0	24.470000	50.000000
1567	Tops Green Bar Computer Printout Paper	342.580	7	171.2900	0.0	24.470000	50.000000
...	...	...	...	...	...	...	...
1838	Zebra GK420t Direct Thermal/Thermal Transfer P...	703.710	6	-938.2800	0.7	-156.380000	-133.333333
1215	Okidata B401 Printer	179.991	3	-251.9874	0.7	-83.995800	-140.000000
613	Euro Pro Shark Stick Mini Vacuum	170.744	11	-325.6332	0.6	-29.603018	-190.714286
388	Bush Westfield Collection Bookcases, Dark Cher...	90.882	3	-190.8522	0.7	-63.617400	-210.000000
605	Eureka Disposable Bags for Sanitaire Vibra Gro...	1.624	2	-4.4660	0.8	-2.233000	-275.000000

	Product Name	Sales	Quantity	Profit	Discount	Profit Per Item	Profit Percent
1499	Staples	755.470	215	292.2046	0.100000	1.359091	38.678518
1493	Staple envelope	1686.812	170	751.6290	0.075000	4.421347	44.559145
537	Easy-staple paper	2504.192	150	1096.0292	0.060870	7.306861	43.767778
1500	Staples in misc. colors	478.812	86	124.0516	0.063158	1.442460	25.908206
942	KI Adjustable-Height Table	4552.641	74	-248.4822	0.236111	-3.357868	-5.457979
...	...	...	...	...	...	...	...
385	Bush Saratoga Collection 5-Shelf Bookcase, Han...	119.833	1	-12.6882	0.150000	-12.688200	-10.588235
749	Global Enterprise Series Seating Low-Back Swiv...	207.184	1	25.8980	0.200000	25.898000	12.500000
1253	Penpower WorldCard Pro Card Scanner	91.475	1	-1.8295	0.500000	-1.829500	-2.000000
356	Boston 1900 Electric Pencil Sharpener	14.980	1	4.4940	0.000000	4.494000	30.000000
1793	Xerox 20	6.480	1	3.1104	0.000000	3.110400	48.000000

	Product Name	Sales	Quantity	Profit	Discount	Profit Per Item	Profit Percent
404	Canon imageCLASS 2200 Advanced Copier	61599.824	20	25199.9280	0.120000	1259.996400	40.909091
650	Fellowes PB500 Electric Punch Plastic Comb Bin...	27453.384	31	7753.0390	0.240000	250.098032	28.240741
805	Hewlett Packard LaserJet 3310 Copier	18839.686	38	6983.8836	0.200000	183.786411	37.070064
400	Canon PC1060 Personal Laser Copier	11619.834	19	4570.9347	0.150000	240.575511	39.337349
787	HP Designjet T520 Inkjet Large Format Printer ...	18374.895	12	4094.9766	0.166667	341.248050	22.285714
...	...	...	...	...	...	...	...
376	Bush Advantage Collection Racetrack Conference...	9544.725	33	-1934.3976	0.350000	-58.618109	-20.266667
425	Chromcraft Bull-Nose Wood Oval Conference Tabl...	9917.640	27	-2876.1156	0.280000	-106.522800	-29.000000
476	Cubify CubeX 3D Printer Triple Head Print	7999.980	4	-3839.9904	0.500000	-959.997600	-48.000000
985	Lexmark MX611dhe Monochrome Laser Printer	16829.901	18	-4589.9730	0.400000	-254.998500	-27.272727
475	Cubify CubeX 3D Printer Double Head Print	11099.963	9	-8879.9704	0.533333	-986.663378	-80.000000

	Postal Code	Sales	Quantity	Profit	Discount	Profit Per Item	Profit Percent
55	10035	77357.885	953	16533.8669	0.047529	17.349283	21.373215
52	10009	54761.496	848	13697.0019	0.054585	16.152125	25.012103
54	10024	78697.182	840	21653.7248	0.064783	25.778244	27.515248
578	94122	52667.467	802	7712.5958	0.072660	9.616703	14.643947
53	10011	45551.598	776	10152.3901	0.059585	13.082977	22.287671
...	...	...	...	...	...	...	...
623	98208	3.856	1	1.3978	0.200000	1.397800	36.250000
462	80122	72.294	1	-98.8018	0.700000	-98.801800	-136.666667
63	11757	55.480	1	26.6304	0.000000	26.630400	48.000000
475	83501	9.584	1	3.3544	0.200000	3.354400	35.000000
612	98002	4.180	1	1.5048	0.000000	1.504800	36.000000

	Postal Code	Sales	Quantity	Profit	Discount	Profit Per Item	Profit Percent	Port Distance
620	98105	41838.008	625	8734.6042	0.069091	13.975367	20.877199	0.070542
619	98103	36541.826	539	7117.6170	0.058278	13.205226	19.478000	0.073883
166	33142	3619.421	38	-980.0357	0.381818	-25.790413	-27.077140	0.080095
621	98115	41160.908	426	13303.8755	0.067857	31.229755	32.321628	0.092427
165	33134	64.248	6	22.3578	0.200000	3.726300	34.799216	0.104742
...	...	...	...	...	...	...	...	...
315	57401	25.500	3	6.6300	0.000000	2.210000	26.000000	16.029117
316	57701	152.640	4	44.7684	0.000000	11.192100	29.329403	16.403706
317	58103	919.910	30	230.1497	0.000000	7.671657	25.018719	17.170214
313	56560	543.860	9	101.4106	0.000000	11.267844	18.646453	17.176817
299	54880	1299.730	37	514.0822	0.000000	13.894114	39.552999	17.258858