PADD 1 District¶

No description has been provided for this image
In [1]:
from odin_eia_db.engine.odin_spark import ODINSpark
from eia.utils.aws_athena import AWSAthena
from pyspark.sql import functions as f 
import pandas as pd 
import numpy as np 
import warnings 
from odin_eia_db.utils.constants import INVERSE_US_REGIONS, US_REGIONS, PADD_AREAS
from eia.utils.constants import INVERSE_STATES

import matplotlib.pyplot as plt 
import seaborn as sns 

from typing import List, Dict 
plt.style.use('ggplot')
warnings.filterwarnings('ignore') 
pd.options.display.max_rows = 200 

Constant Vars¶

In [2]:
MONTH_LOOKUP: Dict = { x:pd.to_datetime(f"1900-{x}-01").month_name()  for x in range(1,13) }

SEASON_LOOKUP: Dict = {'Spring': range(3,6),
                       'Summer': range(6,10), 
                       'Fall': range(9,12), 
                       'Winter': range(1,3)
                      }

Helper Funcitons¶

In [3]:
@f.udf
def get_transc_date(transc_date) -> str:
    try:
        return pd.to_datetime(transc_date, format="ISO8601").strftime("%Y-%m-%d") 

    except: 
        return "N/A"

@f.udf 
def get_padd_district(state_name: str): 
    # Get PADD District loc based on state_name 
    return PADD_AREAS.get( INVERSE_US_REGIONS.get( INVERSE_STATES.get(state_name) ) , 'N/A' )

@f.udf 
def get_month_name(month: int): 
    return MONTH_LOOKUP.get(month, np.nan)

@f.udf
def get_state_name(state: str): 
    return INVERSE_STATES.get(state, 'N/A')

ODIN Project Helper¶

In [4]:
odin_athena: AWSAthena = AWSAthena() 
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials

ODIN Athena¶

  • Retrieve gasoline legacy pricing
  • Retrieve current gasoline pricing
In [69]:
query: str = """ 
WITH legacy_pricing AS 
( SELECT 
    ROW_NUMBER() OVER() AS "id",
	col1 AS "gas_station",
	col2 AS "country",
	col3 AS "city",
	col4 AS "address",
	col5 AS "zip_code",
	col6 AS "state",
	col7 AS "regular_gas",
	col8 AS "midgrade_gas",
	col9 AS "premium_gas",
	col10 AS "price_unit",
	col11 AS "star_rating",
	col12 AS "latitude",
	col13 AS "longitude",
	col14 AS "timestamp",
	col15 AS "price",
	col16 AS "review",
	col17 AS "review_date",
	col18 AS "sentiment_score"
    
FROM live_gasoline_prices l )

SELECT 
    l.* 

FROM legacy_pricing l 
WHERE l.id > 1 
""" 
query_execution_id: str = odin_athena.execute_query(db='odin_db', query=query)
s3_path: str = query_execution_id.get('s3_path') 
# consume using ODIN-Spark 

TaskForce ODIN¶

  • Current Gasoline Pricing
In [2]:
query: str = """
SELECT 
    l.gas_station_name AS `gas_station`, 
    l.postedtime AS `timestamp`,
    DATE_FORMAT(l.postedtime , '%Y-%m-%d' ) AS `transc_date`, 
    CONCAT(l.address,', ', l.city, ', ', l.state, ', ', l.zip_code) AS `store_loc`, 
    IFNULL( l.price, 0.0) AS `price`, 
    IFNULL( l.longitude, 0.0) AS `longitude`, 
    IFNULL( l.latitude, 0.0) AS `latitude`,  
    IFNULL( l.star_rating, 0.0) AS `star_rating`,
    l.state, 
    CASE 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(l.postedtime), '-', DAY(l.postedtime)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-06-20", '%Y-%m-%d' ) AND DATE_FORMAT("2024-09-22", '%Y-%m-%d' ) THEN 'Summer' 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(l.postedtime), '-', DAY(l.postedtime)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-09-22", '%Y-%m-%d' ) AND DATE_FORMAT("2024-12-20", '%Y-%m-%d' ) THEN 'Fall' 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(l.postedtime), '-', DAY(l.postedtime)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-12-21", '%Y-%m-%d' ) AND DATE_FORMAT("2024-03-20", '%Y-%m-%d' ) THEN 'Winter' 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(l.postedtime), '-', DAY(l.postedtime)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-03-21", '%Y-%m-%d' ) AND DATE_FORMAT("2024-06-19", '%Y-%m-%d' ) THEN 'Spring' 
        
    END AS `season`

FROM gasoline_pricing l 
WHERE IFNULL(l.postedtime, 'N/A') != 'N/A'

"""
gasoline_pricing: 'ODINSpark' = ODINSpark(app_name='gasoline_pricing').exec_odin_mysql_query(query=query)
gasoline_pricing = gasoline_pricing.drop_duplicates(subset=['timestamp', 'latitude', 'longitude' ] )
gasoline_pricing = gasoline_pricing.withColumn('price', f.col('price').cast('double').alias('price'))
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
DEBUG:odin_eia_db.engine.odin_spark:
        
                  ..`                                 ./sdmds:`                                             
                +dmmds+-`                          -+ymmmmmmmmdo:`                            ./osyyy/`     
              `ommmmmmmmdhso:`                 ./shmmmmmmmmmmmmmmds/.                     ./ohmmmdmdmm/     
              +mmmmmmmmmmmmmmmhy+//:://///++shdmmmmmmmmmmhsdmmmmmmmmmddyso+/::--.-://+syhdmmmmmmmmmmmmh`    
             :dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm+    
            .hdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmsooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.   
            ommmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmh+osoymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm/   
           :dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+oohmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmms   
          `hdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdsoo+ososmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.  
          /dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmyoo++ooooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmo  
         `ydmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmhoooooooooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd` 
         :mmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+o+ooooosmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm/ 
         ommmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmy+o+o+o+o+oooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmy 
         ydmmmdddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmhooo+ooo+oo+ooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.
        .dmmmddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+oo+o+ooo+oosmmmmmmmmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmd:
        -dmmmmddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmsoo+oooo+oo+oooo+hmmmmmmmmmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmdo
        +dmmmmdmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmh+oo+oooooooooooooodmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdy
        sdmmmmddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+ooooo+o+oooooo+smmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdh
        ymmmmddsoosssssyyyyyyyhhhhhhhhhhhhhhhhhs+o+o++o+o++oooooooo+hhhddddddddddddddmdmmmddmdmdmdmdmmmmmmmd
        hmmmmdddyo+++++++++ooo+++o+oooooooooooooo+oosyyyhhhddhdddddyssssoooooooooooooooooooosoooooooymmmmmmd
        hmmmmddmmdhso++++++++o++o+++ooo++++o+++ohhdddmddddddddddddddddddhy+++o++oo+oooooooooo+++++shdmmmmmmh
        ymmmmdddmmmmdyso++++o+o+ooooooooooo+oshdmddddddddmddddddddddddhyso+oooooo++o++o+o++o++oshdmmmmmmmmdh
        ydmmmddmmmmmdmmdyso+o++oooooooo+++o+ddddddmddddmddddddddddddddddhyo+oo+o+++o+++++++oshddmmmmmmmmmmdy
        odmmmdddmdmddmmmmmhyoo++o+o+ooo+o+osddmmdddddddddddddddddddddddhyso++o++++++++++ooydmmmmmdmmmmmmmmdo
        /dmmmdddmddmdmmmmmmmdysoo++++oo+o+oddddmdddddddddddddddddddddddddyo++++o++++++oyddmmmmmmmmmmmmmmmmd/
        .dmmmmdddddmddmmmmmmmmmdyso+++o+ooyddmmdmddddddddmddddddddddddddysoooo++++ooyhdmmdmmmmmmmmmmmmmmmdd`
        `ydmmmdddddmmddmmmmmmmmmmdhso++++sdddmmmmdddddddmmddddddddddddmddyo+++++oyhdmmmmmmmmmmmmmmmmmmmmmds 
         +dmmmdddddddddmmmmmmmmmmmmmdysoohdmdmmmmmdddddddmmdddddddddddddhyo++oyhdmmmmmmmmmmmmmmmmmmmmmmmmd: 
         -dmmmddddddddmmmmmmmmmmmmmmmmdhooyddmdmmmddddmdddddddddddddddddy+oyhdmmmmmmmmmmmmmmmmmmmmmdmmmmdy` 
         `ydmmmdddddddddddmmmmmmmmmmmmmyoohddddmmmdddmmmddmddmdddddddddds+sdmmmmmmmmmmmmmmmmmmmmmmmmmmmmd/  
          :dmmmddddmddddddmmmmmmmmmmmddoo+yddddddmdmmmmmdddddddddddddddddy+ymmmmmmdmmmmmdmmmmmmmmmmmmmmdh`  
           sdmmmddddddmddmmmmmmmmmmmmdso+oosyyyydddmmddddddddddddddddddyos+odmmmmmddmmmmmmmmdmmmmmmmmmmdo   
           -dmmmdddddddmdmmmmmmmmdmmmyoo++o+o++oyddmdddddddddddddddddddh+o++sdmmmmdmmmmmmmmmmmmmmmmmmmmy`   
            odmmdddddddmmmmmmmmmmddmhoooooo+oo++ydddddhhyosssdddddddddso+++o+hmdmmmmmmmmmmmmdmmdmmmmmmd-    
            `hdmmmdddddmdmmmdmmmmdddoo++oo+++o++hddhso+++++++oydddddddyo+++o+oddmmmmmmdmmmmmmmmmmmmmmdo     
             -hdmmdddddmdddddmddmmdy++++oo++++o+hys++ooshyso+++yddddyso++++o++ydmmmmmmmmmdmmmmddmmmmdy`     
              /ddmdddddddmdddmmmmmh+o+++oo+++++++++oshdddddhyoooyhddy++++++++++ddmdmmmdmdmmmmmddmmmdh-      
              `sdmmmdddddddmdmmmmdo++++oo++++++++oydddmdddddddhyoosyo+++++o++o+ydddddddmdmmmmmmmmmmd:       
               `yddmmdddddmmmmmmms++++o++++++ooyddddmmdmdmdddmdddys+++++++o+++oodddmddddddmmddmmmdd+        
                -ydmmmddddddddmmh++++++o++oshddddddmmmdddddddddddddhyoo+++o+++o+sdddmddmmmddddmmmd+         
                 .hdmmmdmddddddds++++++osydmmdddddmmddddddddddddddddddhso++++++oohdmmmddddddmmmddo`         
                  -ydmmmmdddddds++++ooyhdmmdmmmddddmdddmdmddddddmddddddddyso+++++odddmddddddmmddo`          
                   .ydmmmddddddoo+oyhdmmmmmdmmmddddddddmdddmddddddddddddddddyo++++sddmdddddmmdd+`           
                    `sdmmmmmdds+shddmdmmmmmmddmdddddddddddddddddddddddddddddddhs+++hdddddmmmdd+`            
                     `+ddmmmddyhddmmmmmmmmmdddmdddddddddddddddddddddddddddddddmdhsosddddmmmdh/              
                       :hdmmmddddddmmmmdmmmddddddddddddddddddddddddddddddddddddddddddddmmmdh-               
                        .sdmmmdddmmmmmmmdmdmmddddddddddddddddddddddddddddddddddddddddmmmddy.                
                          /hdmmmmmmmmmddddddmddddddddddddddddddddddddddddddddddddddmmmmdh/`                 
                           .sddmmmmddddddddddddddddddddddddddddddddddddddddddddddddddddo.                   
                             /hdmdmmdmddddddmdmddddddddddddddddddddddddddddddddddddddo-                     
                              .odddmdmddddddmdmddddddddddddddddmdddddddddddddmdddddo-                       
                                .+hddmdmddddmdddddddddddddddddddddddddddddddddddh+.                         
                                  `/yddddmddddmddddddddddddddddddmddddddddddddy/`                           
                                    `:sddddddddddddddddddddddddmddddddddmddds:                              
                                       ./ydddddddddddddddddddddddddddddddy/`                                
                                          -+hdddddddddddddddddddddddddy+.                                   
                                            `-ohdddddddddddddddddddy/.                                      
                                                -+yddddddddddddyo-`                                         
                                                   .:oyddddhs/.    
           ▄▄▄▄    ▄▄▄▄▄      ▄▄▄▄▄▄   ▄▄▄   ▄▄              ▄▄▄▄    ▄▄▄▄▄▄       ▄▄     ▄▄▄▄▄▄    ▄▄   ▄▄▄ 
          ██▀▀██   ██▀▀▀██    ▀▀██▀▀   ███   ██            ▄█▀▀▀▀█   ██▀▀▀▀█▄    ████    ██▀▀▀▀██  ██  ██▀  
         ██    ██  ██    ██     ██     ██▀█  ██            ██▄       ██    ██    ████    ██    ██  ██▄██    
         ██    ██  ██    ██     ██     ██ ██ ██             ▀████▄   ██████▀    ██  ██   ███████   █████    
         ██    ██  ██    ██     ██     ██  █▄██   █████         ▀██  ██         ██████   ██  ▀██▄  ██  ██▄  
          ██▄▄██   ██▄▄▄██    ▄▄██▄▄   ██   ███            █▄▄▄▄▄█▀  ██        ▄██  ██▄  ██    ██  ██   ██▄ 
           ▀▀▀▀    ▀▀▀▀▀      ▀▀▀▀▀▀   ▀▀   ▀▀▀             ▀▀▀▀▀    ▀▀        ▀▀    ▀▀  ▀▀    ▀▀▀ ▀▀    ▀▀ 

        [ OK ] Launching ODIN @2024-08-14T19:35:38.302220                                                                   
        
DEBUG:odin_eia_db.engine.odin_spark:[ OK ] Loading Spark Configuration For ODIN-Project...
:: loading settings :: url = jar:file:/opt/spark-3.5.1-bin-hadoop3/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml
Ivy Default Cache set to: /home/ubuntu/.ivy2/cache
The jars for the packages stored in: /home/ubuntu/.ivy2/jars
org.apache.hadoop#hadoop-aws added as a dependency
com.amazonaws#aws-java-sdk-bundle added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-ab2ec321-17ef-4028-8654-4625952ed070;1.0
	confs: [default]
	found org.apache.hadoop#hadoop-aws;3.3.1 in central
	found com.amazonaws#aws-java-sdk-bundle;1.11.901 in central
	found org.wildfly.openssl#wildfly-openssl;1.0.7.Final in central
:: resolution report :: resolve 400ms :: artifacts dl 11ms
	:: modules in use:
	com.amazonaws#aws-java-sdk-bundle;1.11.901 from central in [default]
	org.apache.hadoop#hadoop-aws;3.3.1 from central in [default]
	org.wildfly.openssl#wildfly-openssl;1.0.7.Final from central in [default]
	---------------------------------------------------------------------
	|                  |            modules            ||   artifacts   |
	|       conf       | number| search|dwnlded|evicted|| number|dwnlded|
	---------------------------------------------------------------------
	|      default     |   3   |   0   |   0   |   0   ||   3   |   0   |
	---------------------------------------------------------------------
:: retrieving :: org.apache.spark#spark-submit-parent-ab2ec321-17ef-4028-8654-4625952ed070
	confs: [default]
	0 artifacts copied, 3 already retrieved (0kB/5ms)
24/08/14 19:35:44 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
DEBUG:odin_eia_db.engine.odin_spark:[ OK ] Loading Hadoop Configuration For ODIN-Project...
In [10]:
gasoline_pricing.printSchema() 
root
 |-- gas_station: string (nullable = true)
 |-- timestamp: timestamp (nullable = true)
 |-- transc_date: string (nullable = true)
 |-- store_loc: string (nullable = true)
 |-- price: double (nullable = true)
 |-- longitude: double (nullable = true)
 |-- latitude: double (nullable = true)
 |-- star_rating: double (nullable = true)
 |-- state: string (nullable = true)
 |-- season: string (nullable = true)

In [15]:
gasoline_pricing.show(5) 
[Stage 19:>                                                         (0 + 1) / 1]
+--------------+-------------------+-----------+--------------------+-----+-----------+---------+-----------+-----+------+
|   gas_station|          timestamp|transc_date|           store_loc|price|  longitude| latitude|star_rating|state|season|
+--------------+-------------------+-----------+--------------------+-----+-----------+---------+-----------+-----+------+
|Murphy Express|2024-08-02 00:00:14| 2024-08-02|221 S White Sands...| 2.86|-105.959781|32.888533|        0.0|   NM|Summer|
|Murphy Express|2024-08-02 00:00:16| 2024-08-02|199 Maryport Dr, ...| 2.85| -78.956583|33.635091|        0.0|   SC|Summer|
|Murphy Express|2024-08-02 00:00:16| 2024-08-02|13443 South Hamil...| 3.15|-111.976158|40.506858|        0.0|   UT|Summer|
|    Murphy USA|2024-08-02 00:00:21| 2024-08-02|210 Haynes Street...| 2.82| -86.081775|33.439887|        0.0|   AL|Summer|
|    Murphy USA|2024-08-02 00:00:21| 2024-08-02|1807 E Highland D...| 2.81| -90.685234|35.821236|        0.0|   AR|Summer|
+--------------+-------------------+-----------+--------------------+-----+-----------+---------+-----------+-----+------+
only showing top 5 rows

                                                                                
  • ODIN Legacy Gasoline Pricing
In [3]:
query: str = """
SELECT 
    l.gas_station, 
    l.timestamp,
    DATE_FORMAT(l.timestamp , '%Y-%m-%d' ) AS `transc_date`, 
    CONCAT(l.address,', ', l.city, ', ', l.state, ', ', l.zip_code) AS `store_loc`, 
    IFNULL( l.price, 0.0) AS `price`, 
    IFNULL( l.longitude, 0.0) AS `longitude`, 
    IFNULL( l.latitude, 0.0) AS `latitude`,  
    l.star_rating,
    l.state,

    /* naive season labeling */
    CASE 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(l.timestamp), '-', DAY(l.timestamp)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-06-20", '%Y-%m-%d' ) AND DATE_FORMAT("2024-09-22", '%Y-%m-%d' ) THEN 'Summer' 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(l.timestamp), '-', DAY(l.timestamp)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-09-22", '%Y-%m-%d' ) AND DATE_FORMAT("2024-12-20", '%Y-%m-%d' ) THEN 'Fall' 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(l.timestamp), '-', DAY(l.timestamp)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-12-21", '%Y-%m-%d' ) AND DATE_FORMAT("2025-03-20", '%Y-%m-%d' ) THEN 'Winter' 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(l.timestamp), '-', DAY(l.timestamp)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-03-21", '%Y-%m-%d' ) AND DATE_FORMAT("2024-06-19", '%Y-%m-%d' ) THEN 'Spring'
        ELSE 'Winter'
        
    END AS `season`

FROM legacy_gasoline_pricing l 
WHERE IFNULL(l.timestamp, 'N/A') != 'N/A'

"""
legacy_pricing: 'ODINSpark' = ODINSpark(app_name='gasoline_legacy_pricing').exec_odin_mysql_query(query=query)
legacy_pricing = legacy_pricing.drop_duplicates(subset=['timestamp', 'latitude', 'longitude' ] )
legacy_pricing = legacy_pricing.withColumn('price', f.col('price').cast('double').alias('price'))
DEBUG:odin_eia_db.engine.odin_spark:
        
                  ..`                                 ./sdmds:`                                             
                +dmmds+-`                          -+ymmmmmmmmdo:`                            ./osyyy/`     
              `ommmmmmmmdhso:`                 ./shmmmmmmmmmmmmmmds/.                     ./ohmmmdmdmm/     
              +mmmmmmmmmmmmmmmhy+//:://///++shdmmmmmmmmmmhsdmmmmmmmmmddyso+/::--.-://+syhdmmmmmmmmmmmmh`    
             :dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm+    
            .hdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmsooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.   
            ommmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmh+osoymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm/   
           :dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+oohmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmms   
          `hdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdsoo+ososmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.  
          /dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmyoo++ooooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmo  
         `ydmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmhoooooooooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd` 
         :mmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+o+ooooosmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm/ 
         ommmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmy+o+o+o+o+oooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmy 
         ydmmmdddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmhooo+ooo+oo+ooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.
        .dmmmddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+oo+o+ooo+oosmmmmmmmmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmd:
        -dmmmmddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmsoo+oooo+oo+oooo+hmmmmmmmmmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmdo
        +dmmmmdmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmh+oo+oooooooooooooodmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdy
        sdmmmmddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+ooooo+o+oooooo+smmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdh
        ymmmmddsoosssssyyyyyyyhhhhhhhhhhhhhhhhhs+o+o++o+o++oooooooo+hhhddddddddddddddmdmmmddmdmdmdmdmmmmmmmd
        hmmmmdddyo+++++++++ooo+++o+oooooooooooooo+oosyyyhhhddhdddddyssssoooooooooooooooooooosoooooooymmmmmmd
        hmmmmddmmdhso++++++++o++o+++ooo++++o+++ohhdddmddddddddddddddddddhy+++o++oo+oooooooooo+++++shdmmmmmmh
        ymmmmdddmmmmdyso++++o+o+ooooooooooo+oshdmddddddddmddddddddddddhyso+oooooo++o++o+o++o++oshdmmmmmmmmdh
        ydmmmddmmmmmdmmdyso+o++oooooooo+++o+ddddddmddddmddddddddddddddddhyo+oo+o+++o+++++++oshddmmmmmmmmmmdy
        odmmmdddmdmddmmmmmhyoo++o+o+ooo+o+osddmmdddddddddddddddddddddddhyso++o++++++++++ooydmmmmmdmmmmmmmmdo
        /dmmmdddmddmdmmmmmmmdysoo++++oo+o+oddddmdddddddddddddddddddddddddyo++++o++++++oyddmmmmmmmmmmmmmmmmd/
        .dmmmmdddddmddmmmmmmmmmdyso+++o+ooyddmmdmddddddddmddddddddddddddysoooo++++ooyhdmmdmmmmmmmmmmmmmmmdd`
        `ydmmmdddddmmddmmmmmmmmmmdhso++++sdddmmmmdddddddmmddddddddddddmddyo+++++oyhdmmmmmmmmmmmmmmmmmmmmmds 
         +dmmmdddddddddmmmmmmmmmmmmmdysoohdmdmmmmmdddddddmmdddddddddddddhyo++oyhdmmmmmmmmmmmmmmmmmmmmmmmmd: 
         -dmmmddddddddmmmmmmmmmmmmmmmmdhooyddmdmmmddddmdddddddddddddddddy+oyhdmmmmmmmmmmmmmmmmmmmmmdmmmmdy` 
         `ydmmmdddddddddddmmmmmmmmmmmmmyoohddddmmmdddmmmddmddmdddddddddds+sdmmmmmmmmmmmmmmmmmmmmmmmmmmmmd/  
          :dmmmddddmddddddmmmmmmmmmmmddoo+yddddddmdmmmmmdddddddddddddddddy+ymmmmmmdmmmmmdmmmmmmmmmmmmmmdh`  
           sdmmmddddddmddmmmmmmmmmmmmdso+oosyyyydddmmddddddddddddddddddyos+odmmmmmddmmmmmmmmdmmmmmmmmmmdo   
           -dmmmdddddddmdmmmmmmmmdmmmyoo++o+o++oyddmdddddddddddddddddddh+o++sdmmmmdmmmmmmmmmmmmmmmmmmmmy`   
            odmmdddddddmmmmmmmmmmddmhoooooo+oo++ydddddhhyosssdddddddddso+++o+hmdmmmmmmmmmmmmdmmdmmmmmmd-    
            `hdmmmdddddmdmmmdmmmmdddoo++oo+++o++hddhso+++++++oydddddddyo+++o+oddmmmmmmdmmmmmmmmmmmmmmdo     
             -hdmmdddddmdddddmddmmdy++++oo++++o+hys++ooshyso+++yddddyso++++o++ydmmmmmmmmmdmmmmddmmmmdy`     
              /ddmdddddddmdddmmmmmh+o+++oo+++++++++oshdddddhyoooyhddy++++++++++ddmdmmmdmdmmmmmddmmmdh-      
              `sdmmmdddddddmdmmmmdo++++oo++++++++oydddmdddddddhyoosyo+++++o++o+ydddddddmdmmmmmmmmmmd:       
               `yddmmdddddmmmmmmms++++o++++++ooyddddmmdmdmdddmdddys+++++++o+++oodddmddddddmmddmmmdd+        
                -ydmmmddddddddmmh++++++o++oshddddddmmmdddddddddddddhyoo+++o+++o+sdddmddmmmddddmmmd+         
                 .hdmmmdmddddddds++++++osydmmdddddmmddddddddddddddddddhso++++++oohdmmmddddddmmmddo`         
                  -ydmmmmdddddds++++ooyhdmmdmmmddddmdddmdmddddddmddddddddyso+++++odddmddddddmmddo`          
                   .ydmmmddddddoo+oyhdmmmmmdmmmddddddddmdddmddddddddddddddddyo++++sddmdddddmmdd+`           
                    `sdmmmmmdds+shddmdmmmmmmddmdddddddddddddddddddddddddddddddhs+++hdddddmmmdd+`            
                     `+ddmmmddyhddmmmmmmmmmdddmdddddddddddddddddddddddddddddddmdhsosddddmmmdh/              
                       :hdmmmddddddmmmmdmmmddddddddddddddddddddddddddddddddddddddddddddmmmdh-               
                        .sdmmmdddmmmmmmmdmdmmddddddddddddddddddddddddddddddddddddddddmmmddy.                
                          /hdmmmmmmmmmddddddmddddddddddddddddddddddddddddddddddddddmmmmdh/`                 
                           .sddmmmmddddddddddddddddddddddddddddddddddddddddddddddddddddo.                   
                             /hdmdmmdmddddddmdmddddddddddddddddddddddddddddddddddddddo-                     
                              .odddmdmddddddmdmddddddddddddddddmdddddddddddddmdddddo-                       
                                .+hddmdmddddmdddddddddddddddddddddddddddddddddddh+.                         
                                  `/yddddmddddmddddddddddddddddddmddddddddddddy/`                           
                                    `:sddddddddddddddddddddddddmddddddddmddds:                              
                                       ./ydddddddddddddddddddddddddddddddy/`                                
                                          -+hdddddddddddddddddddddddddy+.                                   
                                            `-ohdddddddddddddddddddy/.                                      
                                                -+yddddddddddddyo-`                                         
                                                   .:oyddddhs/.    
           ▄▄▄▄    ▄▄▄▄▄      ▄▄▄▄▄▄   ▄▄▄   ▄▄              ▄▄▄▄    ▄▄▄▄▄▄       ▄▄     ▄▄▄▄▄▄    ▄▄   ▄▄▄ 
          ██▀▀██   ██▀▀▀██    ▀▀██▀▀   ███   ██            ▄█▀▀▀▀█   ██▀▀▀▀█▄    ████    ██▀▀▀▀██  ██  ██▀  
         ██    ██  ██    ██     ██     ██▀█  ██            ██▄       ██    ██    ████    ██    ██  ██▄██    
         ██    ██  ██    ██     ██     ██ ██ ██             ▀████▄   ██████▀    ██  ██   ███████   █████    
         ██    ██  ██    ██     ██     ██  █▄██   █████         ▀██  ██         ██████   ██  ▀██▄  ██  ██▄  
          ██▄▄██   ██▄▄▄██    ▄▄██▄▄   ██   ███            █▄▄▄▄▄█▀  ██        ▄██  ██▄  ██    ██  ██   ██▄ 
           ▀▀▀▀    ▀▀▀▀▀      ▀▀▀▀▀▀   ▀▀   ▀▀▀             ▀▀▀▀▀    ▀▀        ▀▀    ▀▀  ▀▀    ▀▀▀ ▀▀    ▀▀ 

        [ OK ] Launching ODIN @2024-08-14T19:36:03.234524                                                                   
        
DEBUG:odin_eia_db.engine.odin_spark:[ OK ] Loading Spark Configuration For ODIN-Project...
DEBUG:odin_eia_db.engine.odin_spark:[ OK ] Loading Hadoop Configuration For ODIN-Project...
In [29]:
legacy_pricing.printSchema() 
root
 |-- gas_station: string (nullable = true)
 |-- timestamp: string (nullable = true)
 |-- transc_date: string (nullable = true)
 |-- store_loc: string (nullable = true)
 |-- price: double (nullable = true)
 |-- longitude: string (nullable = true)
 |-- latitude: string (nullable = true)
 |-- star_rating: string (nullable = true)
 |-- state: string (nullable = true)
 |-- season: string (nullable = true)

In [19]:
legacy_pricing.show(5) 
[Stage 30:>                                                         (0 + 1) / 1]
+-----------+--------------------+-----------+--------------------+-----+-----------------+---------------+-----------+-----+------+
|gas_station|           timestamp|transc_date|           store_loc|price|        longitude|       latitude|star_rating|state|season|
+-----------+--------------------+-----------+--------------------+-----+-----------------+---------------+-----------+-----+------+
|Hanson Mart|2023-05-19T16:42:...| 2023-05-19|1713 Hanson Rd, E...| 3.19| -76.312236785888|39.424574738401|        3.7|   MD|Spring|
|    Safeway|2023-05-19T16:45:...| 2023-05-19|91-1119 Keaunui D...| 4.57|-158.021469712257| 21.32794650025|        3.8|   HI|Spring|
|    Chevron|2023-05-19T21:28:...| 2023-05-19|4860 Bill Gardner...| 2.89| -84.120844602584|33.353470102796|        3.7|   GA|Spring|
|    Raceway|2023-05-20T12:30:...| 2023-05-20|641 US-1 S, Ediso...| 3.16|  -74.39227938652|40.507942465825|        4.0|   NJ|Spring|
| Royal Mart|2023-05-20T12:32:...| 2023-05-20|56 2nd St, Highsp...| 3.29|        -76.79547|       40.21131|        3.6|   PA|Spring|
+-----------+--------------------+-----------+--------------------+-----+-----------------+---------------+-----------+-----+------+
only showing top 5 rows

                                                                                

Merged Gasoline Pricing¶

In [7]:
odin_gasoline_pricing = gasoline_pricing.union(legacy_pricing)
odin_gasoline_pricing = odin_gasoline_pricing.withColumn('padd_district', get_padd_district('state') ) # PADD DISTRICT REGION 
odin_gasoline_pricing = odin_gasoline_pricing.withColumn('month', f.month('transc_date') )
odin_gasoline_pricing = odin_gasoline_pricing.withColumn('month_name', get_month_name('month') )
odin_gasodin_gasoline_pricingoline_pricing = odin_gasoline_pricing.withColumn('state_name', get_state_name('state') )
odin_gasoline_pricing.show(5) 
                                                                                
+--------------+-------------------+-----------+--------------------+-----+-----------+---------+-----------+-----+------+-------------+-----+----------+
|   gas_station|          timestamp|transc_date|           store_loc|price|  longitude| latitude|star_rating|state|season|padd_district|month|month_name|
+--------------+-------------------+-----------+--------------------+-----+-----------+---------+-----------+-----+------+-------------+-----+----------+
|Murphy Express|2024-08-02 00:00:14| 2024-08-02|221 S White Sands...| 2.86|-105.959781|32.888533|        0.0|   NM|Summer|       PADD 5|    8|    August|
|Murphy Express|2024-08-02 00:00:16| 2024-08-02|199 Maryport Dr, ...| 2.85| -78.956583|33.635091|        0.0|   SC|Summer|       PADD 3|    8|    August|
|Murphy Express|2024-08-02 00:00:16| 2024-08-02|13443 South Hamil...| 3.15|-111.976158|40.506858|        0.0|   UT|Summer|       PADD 5|    8|    August|
|    Murphy USA|2024-08-02 00:00:20| 2024-08-02|2010 West Grant S...| 2.78| -97.249409| 34.73856|        0.0|   OK|Summer|       PADD 3|    8|    August|
|    Murphy USA|2024-08-02 00:00:21| 2024-08-02|210 Haynes Street...| 2.82| -86.081775|33.439887|        0.0|   AL|Summer|       PADD 3|    8|    August|
+--------------+-------------------+-----------+--------------------+-----+-----------+---------+-----------+-----+------+-------------+-----+----------+
only showing top 5 rows

In [31]:
odin_gasoline_pricing.printSchema() 
root
 |-- gas_station: string (nullable = true)
 |-- timestamp: string (nullable = true)
 |-- transc_date: string (nullable = true)
 |-- store_loc: string (nullable = true)
 |-- price: double (nullable = true)
 |-- longitude: string (nullable = true)
 |-- latitude: string (nullable = true)
 |-- star_rating: string (nullable = true)
 |-- state: string (nullable = true)
 |-- season: string (nullable = true)
 |-- padd_district: string (nullable = true)
 |-- month: integer (nullable = true)
 |-- month_name: string (nullable = true)

Distribution of Gasoline Price in the US¶

In [7]:
gas_prices = odin_gasoline_pricing.select('price').rdd.flatMap(lambda row: row).collect()
                                                                                
In [28]:
fig = plt.figure(figsize=(12,4))
ax = fig.add_subplot()
sns.distplot(gas_prices, ax=ax)
ax.set_title("Distribution of Gasoline Price in the US", fontweight='bold', fontsize=14)

mean: float = round( np.mean(gas_prices) , 2 )
median: float = round( np.median(gas_prices) , 2 )
ymin, ymax = ax.get_ylim() 
ax.axvline(x=mean, ymin=ymin, ymax=ymax, linestyle="--", color='black', label=f"Mean Gas Price: ${mean}")
ax.axvline(x=median, ymin=ymin, ymax=ymax, linestyle="-.", color='blue', label=f"Median Gas Price: ${median}")
ax.legend(title='Stats Desc', fancybox=True, shadow=True)

plt.tight_layout() 
No description has been provided for this image

Daily Average Gasoline Price in the US¶

In [29]:
avg_daily_prices: List[Dict] = [ item.asDict() for item in odin_gasoline_pricing.groupby('transc_date').agg(f.round( f.avg('price').alias('avg_price') , 2).alias('avg_price') ).orderBy('transc_date').collect() ]
avg_price: 'DataFrame' = pd.DataFrame( avg_daily_prices ).iloc[1:] 
avg_price['transc_date'] = pd.to_datetime(avg_price['transc_date'] ) 
                                                                                
In [64]:
fig = plt.figure() 
ax = fig.add_subplot() 
avg_price.set_index('transc_date')['avg_price'].plot(figsize=(12,4), ax=ax) # some desc July,2023
ax.set_title("Daily Average Gasoline Price in the US", fontweight='bold', fontsize=14)
ax.set_ylabel("$/Gallon", fontweight='bold') 

plt.tight_layout() 
No description has been provided for this image

Average Gas Price by Padd District and Season¶

In [59]:
odin_gasoline_pricing.filter("padd_district != 'N/A' ").filter("padd_district != 'Not Applicable' ").groupby('padd_district').pivot('season').agg(f.round(f.avg('price') , 2).alias('avg_price') ).show()
[Stage 158:======================================>                  (2 + 1) / 3]
+-------------+----+------+------+------+
|padd_district|Fall|Spring|Summer|Winter|
+-------------+----+------+------+------+
|       PADD 1| 3.1|  3.26|  3.36|  2.92|
|       PADD 2|2.87|  3.08|  3.22|  2.64|
|       PADD 3|2.69|  2.91|  3.01|  2.61|
|       PADD 5|3.42|  3.56|  3.75|   2.9|
+-------------+----+------+------+------+

                                                                                

Average Gas Price by Padd District and Month¶

In [84]:
padd_district: 'DataFrame' = pd.DataFrame([ item.asDict() for item in  odin_gasoline_pricing.groupby('padd_district').pivot('month').mean('price').alias('avg_price').collect() ] )
                                                                                
In [93]:
padd_district.rename({ str(k):v for k,v in MONTH_LOOKUP.items()}, axis=1).set_index('padd_district').applymap(lambda item: round(item, 2)).iloc[:, 1:].sort_index().iloc[2:, :]  
# avg_price increase 
Out[93]:
January February March April May June July August September October November December
padd_district
PADD 1 2.84 2.95 3.06 3.24 3.30 3.26 3.24 3.41 3.45 3.21 3.02 2.91
PADD 2 2.47 2.68 2.97 3.08 3.06 3.10 3.10 3.27 3.35 3.04 2.78 2.54
PADD 3 2.47 2.65 2.85 2.95 2.91 2.87 2.86 3.06 3.12 2.83 2.59 2.46
PADD 5 2.77 2.86 3.26 3.62 3.54 3.60 3.68 3.73 3.92 3.70 3.25 2.92

Average Gasoline Price in Warm/Hot Weather for PADD 1 District¶

  • Connecticut
  • Delaware
  • Maine
  • Maryland
  • Massachusetts
  • New Hampshire
  • New Jersey
  • New York
  • Pennsylvania
  • Rhode Island
  • Vermont
In [114]:
padd1_pricing_df: 'DataFrame' = pd.read_excel("/mnt/odin_repo/misc/PET_PRI_GND_DCUS_R10_W.xls", sheet_name='Data 1')
padd1_df: 'DataFrame' = pd.DataFrame( [ item.asDict() for item in odin_gasoline_pricing.filter("padd_district == 'PADD 5' ").groupby('transc_date').agg(f.round( f.avg('price'), 2).alias('avg_price') ).orderBy('transc_date').collect() ] )
padd1_df['transc_date'] = pd.to_datetime( padd1_df['transc_date'] ) 

padd1_pricing_df.columns = padd1_pricing_df.iloc[1].tolist() 
padd1_pricing_df = padd1_pricing_df.iloc[2:]
padd1_pricing_df['Date'] = pd.to_datetime(padd1_pricing_df['Date'] ) 
padd1_pricing_df = padd1_pricing_df.set_index('Date').replace({np.nan:0.0} ).astype(float) 
cols: List[str] = ['Weekly East Coast Regular All Formulations Retail Gasoline Prices  (Dollars per Gallon)', 
                   'Weekly East Coast Midgrade All Formulations Retail Gasoline Prices  (Dollars per Gallon)', 
                   'Weekly East Coast Premium All Formulations Retail Gasoline Prices  (Dollars per Gallon)' ]

padd1_pricing_df = padd1_pricing_df[ cols].rename(dict( zip(cols, ['Regular', 'Midgrade', 'Premium' ],)), axis=1) 
padd1_pricing_df['month_name'] = pd.Series(padd1_pricing_df.index).apply(lambda row: row.month_name() )
padd1_pricing_df['month_name'] = pd.Series(padd1_pricing_df.index).apply(lambda row: row.month_name() ).tolist() 
padd1_pricing_df['season'] = pd.Series( padd1_pricing_df.index).apply(lambda row: 'Winter' if (row.month in SEASON_LOOKUP.get('Winter') or row.month == 12) else 'Fall' if row.month in SEASON_LOOKUP.get('Fall') else 'Spring'  if row.month in SEASON_LOOKUP.get('Spring') else 'Summer' ).tolist() 
padd1_pricing_df['year'] = pd.Series(padd1_pricing_df.index).apply(lambda row: row.year).tolist() 
                                                                                

Average Gas Price in PADD District by (Gasoline Formulation)¶

In [86]:
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot() 
padd1_pricing_df["2000":].plot(ax=ax) # TODO: decompose by season, month 
ax.set_title("Weekly Gas Price in PADD 1 District", fontweight='bold', fontsize=14)
ax.set_ylabel("$/Gallon", fontweight='bold' )
ax.legend(title="Gasoline Formulations", fancybox=True, shadow=True)


plt.tight_layout() 
No description has been provided for this image

Average Gas Price in PADD District¶

In [139]:
fig = plt.figure(figsize=(12,4)) 
ax = fig.add_subplot() 

padd1_df.set_index('transc_date')['avg_price'].plot(ax=ax)
ax.set_title("Average Gasoline Price in PADD 1 District", fontweight='bold', fontsize=14)
ax.set_ylabel("$/Gallon", fontweight='bold')
ax.axvspan(xmin=19482, xmax=19610, hatch='/', alpha=0.2, ec='k', label='Summer 2023 Price', color='grey') 
ax.axvspan(xmin=19850, xmax=ax.get_xlim()[-1], hatch='/', alpha=0.2, ec='k', label='Summer 2024 Price', color='grey') 

ax.legend(title="Average Gasoline Price", fancybox=True, shadow=True, bbox_to_anchor=(1.23,1))
plt.tight_layout() 
No description has been provided for this image

Weekly Gas Price Comparison in PADD 1 District¶

  • Year: 2020 - Present (Weekly Avg Price Inc during SPR - SUM 2022)
In [200]:
fig = plt.figure(figsize=(12,6))
ax = fig.add_subplot()
padd1_pricing_df.query("year >= 2020").groupby(['year', 'season'] )[padd1_pricing_df.columns.tolist()[:3]].mean().map(lambda row: round(row,2) ).plot(kind='bar', ax=ax, hatch='//', ec='k')
ax.set_title("Weekly Gas Price Comparison in PADD 1 District", fontweight='bold', fontsize=14)
ax.set_ylabel("$/Gallon",  fontweight='bold' )
ax.legend(bbox_to_anchor=(1.13, 1), fancybox=True, shadow=True )
ax.set_xticklabels(ax.get_xticklabels(), rotation=90, fontweight='bold' )
plt.tight_layout() 
No description has been provided for this image

PADD 1: Gas Price From 2022 - 2024¶

In [202]:
fig = plt.figure(figsize=(20,12))
ax = fig.add_subplot(2,2,1)
padd1_pricing_df["2021-01":"2021-12"][padd1_pricing_df.columns.tolist()[:3]].plot(ax=ax) 
ax.legend(title="Gasoline Formulation", fancybox=True, shadow=True)
ax.set_title("PADD 1: Gas Price 2021", fontweight='bold')
ax.set_ylabel("$/Gallon")

ax = fig.add_subplot(2,2,2)
padd1_pricing_df["2022-01":"2022-12"][padd1_pricing_df.columns.tolist()[:3]].plot(ax=ax) 
ax.legend(title="Gasoline Formulation", fancybox=True, shadow=True)
ax.set_title("PADD 1: Gas Price 2022", fontweight='bold')
ax.set_ylabel("$/Gallon")

ax = fig.add_subplot(2,2,3)
padd1_pricing_df["2023-01":"2023-12"][padd1_pricing_df.columns.tolist()[:3]].plot(ax=ax) 
ax.legend(title="Gasoline Formulation", fancybox=True, shadow=True)
ax.set_title("PADD 1: Gas Price 2023", fontweight='bold')
ax.set_ylabel("$/Gallon")

ax = fig.add_subplot(2,2,4)
padd1_pricing_df["2022-01":][padd1_pricing_df.columns.tolist()[:3]].plot(ax=ax) 
ax.legend(title="Gasoline Formulation", fancybox=True, shadow=True)
ax.set_title("PADD 1: Gas Price 2024", fontweight='bold')
ax.set_ylabel("$/Gallon")

plt.tight_layout() 
No description has been provided for this image

Average Gasoline Price by Season in PADD 1 District¶

Reid Vapor Pressure¶

The difference involves the fuel’s Reid Vapor Pressure (RVP), which is a measure of how easily the fuel evaporates at a given temperature. Winter-blend gas has a higher RVP because the fuel must be able to evaporate at low temperatures for the engine to operate properly. Summer-blend gas has a lower RVP to prevent excessive evaporation when outside temperatures rise. That evaporation can cause vapor lock in an engine on hot days and contributes to unhealthy ground-level ozone and smog levels.

It’s All About Butane, More or Less¶

Summer-blend gas contains 2 percent butane, but that percentage is higher in the winter blend. Butane is used as lighter fluid and is a secondary ingredient in fuel canisters used for gas grills and camping. As a fuel, butane is less expensive than other gasoline components, but its high volatility limits how much can be included in summer-grade fuel.

In [67]:
.odin_gasoline_pricing.filter("padd_district == 'PADD 1' ").groupby('state_name').pivot('season').agg(f.round(f.avg('price') , 2).alias('avg_price') ).orderBy('state_name').show()
[Stage 313:======================================>                  (2 + 1) / 3]
+-------------+----+------+------+------+
|   state_name|Fall|Spring|Summer|Winter|
+-------------+----+------+------+------+
|  Connecticut|3.15|  3.34|  3.44|  2.91|
|     Delaware| 3.0|  3.28|  3.41|  2.96|
|        Maine|3.13|   3.3|  3.43|  2.94|
|     Maryland|3.02|  3.29|  3.32|  2.97|
|Massachusetts| 3.1|  3.15|  3.29|   2.8|
|New Hampshire| 3.1|  3.21|  3.33|  2.86|
|   New Jersey|3.02|  3.18|  3.31|  2.84|
|     New York|2.99|  3.13|   3.3|  2.78|
| Pennsylvania|3.22|  3.31|  3.28|  3.04|
| Rhode Island|3.23|  3.31|  3.44|  2.95|
|      Vermont|3.39|  3.41|  3.56|  3.06|
+-------------+----+------+------+------+

                                                                                

Correlation between Season and Price in PADD 1 District¶

  1. Summer = 0
  2. Spring = 1
  3. Fall = 2
  4. Winter = 3
In [8]:
df1: 'DataFrame' = pd.DataFrame([ item.asDict() for item in odin_gasoline_pricing.select('season', 'price' ).collect() ] )
df1['encoded_season'] = pd.factorize(df1['season'] )[0] 
                                                                                
In [28]:
%%time
ax = sns.lmplot(data=df1, x='encoded_season', y='price', scatter_kws={'alpha': 0.3}, height=6, aspect=2,  palette='flare', x_jitter=0.3, y_jitter=0.3)
ax.ax.set_title("Correlations between Season and Gasoline Price in PADD 1 District", fontweight='bold', fontsize=14)

plt.tight_layout() 
CPU times: user 3min 19s, sys: 1min 45s, total: 5min 5s
Wall time: 2min 34s
No description has been provided for this image
In [39]:
seasonal_padd1: List[Dict] = odin_gasoline_pricing.filter("padd_district == 'PADD 1' ").select('transc_date', 'state', 'price', 'season' ).collect() 
seasonal_padd1_df: 'DataFrame' = pd.DataFrame([ item.asDict() for item in seasonal_padd1 ] ) 
seasonal_padd1_df['state_name'] = seasonal_padd1_df['state'].apply(lambda row: INVERSE_STATES.get(row)) 
seasonal_padd1_df['transc_date'] = pd.to_datetime(seasonal_padd1_df['transc_date'] ) 

Boxplot: Seasonal Gasoline Price in PADD 1 District¶

In [55]:
fig = plt.figure(figsize=(20,10))
for index,state_name in enumerate( seasonal_padd1_df['state_name'].unique().tolist() , 1): 
    ax = fig.add_subplot(4,3,index)
    seasonal_padd1_df.query(f"state_name == '{state_name}' ").boxplot(by='season', column='price', ax=ax)
    ax.set_title(f"Avg Price in {state_name}", fontweight='bold', )
    ax.set_ylabel("$/Gallon", fontweight='bold') 

plt.tight_layout() 
No description has been provided for this image

Monthly Gasoline Price in PADD 1 District¶

  • The average gasoline price slightly increases between June - September
In [34]:
odin_gasoline_pricing = odin_gasoline_pricing.withColumn('state_name', get_state_name('state'))
odin_gasoline_pricing.filter("padd_district == 'PADD 1'").groupby('month').pivot('state_name').agg(f.round(f.avg('price') , 2).alias('avg_price') ).orderBy('month').show() 
[Stage 73:===================>                                      (1 + 2) / 3]
+-----+-----------+--------+-----+--------+-------------+-------------+----------+--------+------------+------------+-------+
|month|Connecticut|Delaware|Maine|Maryland|Massachusetts|New Hampshire|New Jersey|New York|Pennsylvania|Rhode Island|Vermont|
+-----+-----------+--------+-----+--------+-------------+-------------+----------+--------+------------+------------+-------+
|    1|       2.82|    2.84| 2.88|    2.85|         2.74|         2.79|      2.77|    2.65|        2.98|        2.87|   3.01|
|    2|       2.95|    3.01| 2.97|    3.01|         2.81|         2.87|      2.88|    2.78|        3.07|        2.97|   3.06|
|    3|       3.07|     3.1| 3.06|    3.18|          2.9|         2.98|      2.97|    2.97|        3.15|        3.08|   3.17|
|    4|        3.3|    3.25| 3.28|    3.39|         3.11|         3.17|      3.12|    3.08|        3.33|        3.28|   3.36|
|    5|       3.39|    3.35| 3.34|    3.28|         3.19|         3.25|      3.24|    3.18|        3.34|        3.36|   3.46|
|    6|       3.35|    3.26| 3.33|    3.21|         3.18|         3.24|      3.21|    3.19|        3.26|        3.33|   3.46|
|    7|       3.38|    3.34| 3.36|    3.12|         3.21|         3.26|      3.25|    3.19|        3.07|        3.35|   3.49|
|    8|       3.48|    3.45| 3.43|    3.46|         3.29|         3.33|      3.32|     3.3|        3.41|        3.46|   3.56|
|    9|       3.53|    3.42| 3.53|    3.34|         3.41|         3.42|       3.4|    3.43|        3.49|        3.56|   3.68|
|   10|       3.33|    3.03| 3.28|    3.07|         3.23|         3.27|      3.13|    3.11|        3.29|        3.38|   3.52|
|   11|       3.02|    2.93| 3.06|    2.97|         2.98|         3.01|      2.95|    2.89|        3.15|        3.18|   3.31|
|   12|       2.89|     2.9| 2.91|    2.89|         2.88|         2.84|      2.85|    2.77|        3.08|        2.98|   3.16|
+-----+-----------+--------+-----+--------+-------------+-------------+----------+--------+------------+------------+-------+

                                                                                

Daily Average Gasoline Price Trends in PADD 1 District¶

In [72]:
fig = plt.figure(figsize=(20,10))
for index,state_name in enumerate( seasonal_padd1_df['state_name'].unique().tolist() , 1): 
    ax = fig.add_subplot(4,3,index)
    seasonal_padd1_df.query(f"state_name == '{state_name}' ").set_index('transc_date')['price'].resample('D').mean().interpolate().plot(ax=ax) 
    ax.set_title(f"Daily Avg Gasoline Price Trends in {state_name}", fontweight='bold', )
    ax.set_ylabel("$/Gallon", fontweight='bold') 

plt.tight_layout() 
No description has been provided for this image

Gas Price by City¶

In [5]:
query_execution_id: str = "64703f3f-0ac4-4941-bb53-116ec83e65d4"
s3_path: str = odin_athena.get_query_execution(query_execution_id=query_execution_id).get('s3_path') 
pricing_df: 'DataFrame' = pd.read_csv(s3_path)  
pricing_df['state_name'] = pricing_df['state'].apply(lambda row: INVERSE_STATES.get(row))
pricing_df['padd_district'] = pricing_df['state_name'].apply(lambda row: PADD_AREAS.get( INVERSE_US_REGIONS.get(row)) ) 
pricing_df = pricing_df.dropna(subset='transc_date') 

pricing_df['transc_date'] = pd.to_datetime(pricing_df['transc_date'] ) 
padd1_pricing: 'DataFrame' = pricing_df[ pricing_df['state_name'].apply(lambda row: row in US_REGIONS.get('Northeast') ) ]
padd1_pricing['month'] = padd1_pricing['transc_date'].apply(lambda row: row.month) 

padd1_pricing['season'] = padd1_pricing['month'].apply(lambda row: 'Summer' if row in SEASON_LOOKUP.get('Summer') else 'Fall' if row in SEASON_LOOKUP.get('Fall') else 'Spring' if row in SEASON_LOOKUP.get('Spring') else 'Winter' )
INFO:aiobotocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
In [203]:
fig = plt.figure(figsize=(25,25))
for index,state in enumerate( padd1_pricing['state_name'].unique() ,1):
    ax = fig.add_subplot(4,3,index)
    padd1_pricing.query(f"state_name == '{state}' ").groupby('city')['price'].mean().map(lambda row: round(row, 2)).sort_values(ascending=False).head(10).plot(kind='bar', hatch='/', ax=ax, ec='k')
    ax.set_title(f"Avg Gas Price: Top 10 City in {state}", fontweight='bold', fontsize=12)
    ax.set_ylabel("$/Gallon")

plt.tight_layout() 
No description has been provided for this image

Weekly Gas Price in Massachusetts¶

  • Conventional Premium Gasoline
In [41]:
query: str = """
SELECT 
    p.period, 
    p.value, 
    p.units, 
    CASE 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(p.period), '-', DAY(p.period)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-06-20", '%Y-%m-%d' ) AND DATE_FORMAT("2024-09-22", '%Y-%m-%d' ) THEN 'Summer' 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(p.period), '-', DAY(p.period)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-09-22", '%Y-%m-%d' ) AND DATE_FORMAT("2024-12-20", '%Y-%m-%d' ) THEN 'Fall' 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(p.period), '-', DAY(p.period)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-03-21", '%Y-%m-%d' ) AND DATE_FORMAT("2024-06-19", '%Y-%m-%d' ) THEN 'Spring' 
        ELSE 'Winter'
        
    END AS `season`
    
FROM pricing_massachusetts p 
"""

ma_weekly_pricing: 'DataFrame' = ODINSpark(app_name="Massachusetts Weekly Gas Pricing").exec_odin_mysql_query(query=query)
ma_weekly_pricing = ma_weekly_pricing.withColumn('month', f.month('period') )
ma_weekly_pricing = ma_weekly_pricing.withColumnRenamed('value', 'price' )
ma_weekly_pricing = ma_weekly_pricing.withColumn('year', f.year('period')) 
ma_weekly_pricing = ma_weekly_pricing.withColumn('price', f.col('price').cast('double') )
DEBUG:odin_eia_db.engine.odin_spark:
        
                  ..`                                 ./sdmds:`                                             
                +dmmds+-`                          -+ymmmmmmmmdo:`                            ./osyyy/`     
              `ommmmmmmmdhso:`                 ./shmmmmmmmmmmmmmmds/.                     ./ohmmmdmdmm/     
              +mmmmmmmmmmmmmmmhy+//:://///++shdmmmmmmmmmmhsdmmmmmmmmmddyso+/::--.-://+syhdmmmmmmmmmmmmh`    
             :dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm+    
            .hdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmsooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.   
            ommmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmh+osoymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm/   
           :dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+oohmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmms   
          `hdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdsoo+ososmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.  
          /dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmyoo++ooooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmo  
         `ydmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmhoooooooooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd` 
         :mmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+o+ooooosmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm/ 
         ommmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmy+o+o+o+o+oooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmy 
         ydmmmdddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmhooo+ooo+oo+ooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.
        .dmmmddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+oo+o+ooo+oosmmmmmmmmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmd:
        -dmmmmddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmsoo+oooo+oo+oooo+hmmmmmmmmmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmdo
        +dmmmmdmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmh+oo+oooooooooooooodmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdy
        sdmmmmddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+ooooo+o+oooooo+smmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdh
        ymmmmddsoosssssyyyyyyyhhhhhhhhhhhhhhhhhs+o+o++o+o++oooooooo+hhhddddddddddddddmdmmmddmdmdmdmdmmmmmmmd
        hmmmmdddyo+++++++++ooo+++o+oooooooooooooo+oosyyyhhhddhdddddyssssoooooooooooooooooooosoooooooymmmmmmd
        hmmmmddmmdhso++++++++o++o+++ooo++++o+++ohhdddmddddddddddddddddddhy+++o++oo+oooooooooo+++++shdmmmmmmh
        ymmmmdddmmmmdyso++++o+o+ooooooooooo+oshdmddddddddmddddddddddddhyso+oooooo++o++o+o++o++oshdmmmmmmmmdh
        ydmmmddmmmmmdmmdyso+o++oooooooo+++o+ddddddmddddmddddddddddddddddhyo+oo+o+++o+++++++oshddmmmmmmmmmmdy
        odmmmdddmdmddmmmmmhyoo++o+o+ooo+o+osddmmdddddddddddddddddddddddhyso++o++++++++++ooydmmmmmdmmmmmmmmdo
        /dmmmdddmddmdmmmmmmmdysoo++++oo+o+oddddmdddddddddddddddddddddddddyo++++o++++++oyddmmmmmmmmmmmmmmmmd/
        .dmmmmdddddmddmmmmmmmmmdyso+++o+ooyddmmdmddddddddmddddddddddddddysoooo++++ooyhdmmdmmmmmmmmmmmmmmmdd`
        `ydmmmdddddmmddmmmmmmmmmmdhso++++sdddmmmmdddddddmmddddddddddddmddyo+++++oyhdmmmmmmmmmmmmmmmmmmmmmds 
         +dmmmdddddddddmmmmmmmmmmmmmdysoohdmdmmmmmdddddddmmdddddddddddddhyo++oyhdmmmmmmmmmmmmmmmmmmmmmmmmd: 
         -dmmmddddddddmmmmmmmmmmmmmmmmdhooyddmdmmmddddmdddddddddddddddddy+oyhdmmmmmmmmmmmmmmmmmmmmmdmmmmdy` 
         `ydmmmdddddddddddmmmmmmmmmmmmmyoohddddmmmdddmmmddmddmdddddddddds+sdmmmmmmmmmmmmmmmmmmmmmmmmmmmmd/  
          :dmmmddddmddddddmmmmmmmmmmmddoo+yddddddmdmmmmmdddddddddddddddddy+ymmmmmmdmmmmmdmmmmmmmmmmmmmmdh`  
           sdmmmddddddmddmmmmmmmmmmmmdso+oosyyyydddmmddddddddddddddddddyos+odmmmmmddmmmmmmmmdmmmmmmmmmmdo   
           -dmmmdddddddmdmmmmmmmmdmmmyoo++o+o++oyddmdddddddddddddddddddh+o++sdmmmmdmmmmmmmmmmmmmmmmmmmmy`   
            odmmdddddddmmmmmmmmmmddmhoooooo+oo++ydddddhhyosssdddddddddso+++o+hmdmmmmmmmmmmmmdmmdmmmmmmd-    
            `hdmmmdddddmdmmmdmmmmdddoo++oo+++o++hddhso+++++++oydddddddyo+++o+oddmmmmmmdmmmmmmmmmmmmmmdo     
             -hdmmdddddmdddddmddmmdy++++oo++++o+hys++ooshyso+++yddddyso++++o++ydmmmmmmmmmdmmmmddmmmmdy`     
              /ddmdddddddmdddmmmmmh+o+++oo+++++++++oshdddddhyoooyhddy++++++++++ddmdmmmdmdmmmmmddmmmdh-      
              `sdmmmdddddddmdmmmmdo++++oo++++++++oydddmdddddddhyoosyo+++++o++o+ydddddddmdmmmmmmmmmmd:       
               `yddmmdddddmmmmmmms++++o++++++ooyddddmmdmdmdddmdddys+++++++o+++oodddmddddddmmddmmmdd+        
                -ydmmmddddddddmmh++++++o++oshddddddmmmdddddddddddddhyoo+++o+++o+sdddmddmmmddddmmmd+         
                 .hdmmmdmddddddds++++++osydmmdddddmmddddddddddddddddddhso++++++oohdmmmddddddmmmddo`         
                  -ydmmmmdddddds++++ooyhdmmdmmmddddmdddmdmddddddmddddddddyso+++++odddmddddddmmddo`          
                   .ydmmmddddddoo+oyhdmmmmmdmmmddddddddmdddmddddddddddddddddyo++++sddmdddddmmdd+`           
                    `sdmmmmmdds+shddmdmmmmmmddmdddddddddddddddddddddddddddddddhs+++hdddddmmmdd+`            
                     `+ddmmmddyhddmmmmmmmmmdddmdddddddddddddddddddddddddddddddmdhsosddddmmmdh/              
                       :hdmmmddddddmmmmdmmmddddddddddddddddddddddddddddddddddddddddddddmmmdh-               
                        .sdmmmdddmmmmmmmdmdmmddddddddddddddddddddddddddddddddddddddddmmmddy.                
                          /hdmmmmmmmmmddddddmddddddddddddddddddddddddddddddddddddddmmmmdh/`                 
                           .sddmmmmddddddddddddddddddddddddddddddddddddddddddddddddddddo.                   
                             /hdmdmmdmddddddmdmddddddddddddddddddddddddddddddddddddddo-                     
                              .odddmdmddddddmdmddddddddddddddddmdddddddddddddmdddddo-                       
                                .+hddmdmddddmdddddddddddddddddddddddddddddddddddh+.                         
                                  `/yddddmddddmddddddddddddddddddmddddddddddddy/`                           
                                    `:sddddddddddddddddddddddddmddddddddmddds:                              
                                       ./ydddddddddddddddddddddddddddddddy/`                                
                                          -+hdddddddddddddddddddddddddy+.                                   
                                            `-ohdddddddddddddddddddy/.                                      
                                                -+yddddddddddddyo-`                                         
                                                   .:oyddddhs/.    
           ▄▄▄▄    ▄▄▄▄▄      ▄▄▄▄▄▄   ▄▄▄   ▄▄              ▄▄▄▄    ▄▄▄▄▄▄       ▄▄     ▄▄▄▄▄▄    ▄▄   ▄▄▄ 
          ██▀▀██   ██▀▀▀██    ▀▀██▀▀   ███   ██            ▄█▀▀▀▀█   ██▀▀▀▀█▄    ████    ██▀▀▀▀██  ██  ██▀  
         ██    ██  ██    ██     ██     ██▀█  ██            ██▄       ██    ██    ████    ██    ██  ██▄██    
         ██    ██  ██    ██     ██     ██ ██ ██             ▀████▄   ██████▀    ██  ██   ███████   █████    
         ██    ██  ██    ██     ██     ██  █▄██   █████         ▀██  ██         ██████   ██  ▀██▄  ██  ██▄  
          ██▄▄██   ██▄▄▄██    ▄▄██▄▄   ██   ███            █▄▄▄▄▄█▀  ██        ▄██  ██▄  ██    ██  ██   ██▄ 
           ▀▀▀▀    ▀▀▀▀▀      ▀▀▀▀▀▀   ▀▀   ▀▀▀             ▀▀▀▀▀    ▀▀        ▀▀    ▀▀  ▀▀    ▀▀▀ ▀▀    ▀▀ 

        [ OK ] Launching ODIN @2024-08-24T15:49:29.080684                                                                   
        
DEBUG:odin_eia_db.engine.odin_spark:[ OK ] Loading Spark Configuration For ODIN-Project...
DEBUG:odin_eia_db.engine.odin_spark:[ OK ] Loading Hadoop Configuration For ODIN-Project...
In [57]:
ma_weekly_pricing.printSchema() 
root
 |-- period: string (nullable = true)
 |-- price: string (nullable = true)
 |-- units: string (nullable = true)
 |-- season: string (nullable = true)
 |-- month: integer (nullable = true)
 |-- year: integer (nullable = true)

Boxplot: Weekly Conventional Premium Gasoline¶

In [65]:
ma_weekly_pricing.groupby('year').pivot('season').agg(f.round(f.avg('price'), 2) ).orderBy('year').na.fill(0.0).show(50) 
+----+----+------+------+------+
|year|Fall|Spring|Summer|Winter|
+----+----+------+------+------+
|1994|1.29|   0.0|   0.0|  1.27|
|1995|1.25|  1.33|  1.33|  1.25|
|1996|1.38|   1.4|  1.37|  1.28|
|1997|1.34|  1.36|  1.36|  1.39|
|1998|1.15|   1.2|  1.17|  1.21|
|1999|1.41|  1.22|  1.32|  1.11|
|2000|1.63|  1.63|  1.67|  1.53|
|2001|1.35|   1.7|  1.54|  1.53|
|2002|1.56|  1.53|  1.52|  1.33|
|2003|1.67|  1.66|  1.72|  1.71|
|2004|2.13|  2.03|  2.04|  1.83|
|2005|2.69|  2.35|  2.67|  2.11|
|2006|2.47|  2.99|  3.05|  2.55|
|2007|3.18|  3.12|  3.06|  2.63|
|2008|2.78|  3.89|  4.12|  3.13|
|2009|2.84|  2.47|  2.81|  2.24|
|2010|3.09|  3.04|   2.9|  2.99|
|2011|3.66|  4.01|  3.88|  3.48|
|2012|3.88|  3.98|  3.86|  3.84|
|2013|3.71|  3.86|  3.89|  3.91|
|2014| 3.4|  4.03|  3.88|  3.66|
|2015|2.64|  2.95|  2.95|  2.67|
|2016|2.76|  2.67|  2.69|  2.41|
|2017|3.05|  2.88|  2.91|  2.86|
|2018|3.19|   3.3|  3.31|  3.05|
|2019|3.07|  3.24|  3.13|   2.9|
|2020|2.69|   2.5|  2.71|  2.95|
|2021|3.87|  3.44|  3.62|  3.13|
|2022|4.07|  4.87|  4.71|  4.07|
|2023| 4.0|  4.14|  4.28|  4.02|
|2024| 0.0|  4.21|  4.17|  3.93|
+----+----+------+------+------+

In [102]:
fig = plt.figure(figsize=(20,5)) 
for index,year in enumerate(sorted(ma_df.query("year >= 2020")['year'].unique()), 1):
    ax = fig.add_subplot(1,5,index)
    ma_df.query(f"year == {year}").boxplot(by='season', column='price', ax=ax)
    ax.set_ylabel("$/Gallon")
    ax.set_title(f"{year}", fontweight='bold', fontsize=12)

plt.tight_layout() 
No description has been provided for this image

Trends in Weekly Conventional Premium Gasoline¶

In [94]:
ma_df: 'DataFrame' = pd.DataFrame([ item.asDict() for item in ma_weekly_pricing.select('period', 'price', 'season', 'month').collect() ] ) 
ma_df['period'] = pd.to_datetime(ma_df['period'] ) 
ma_df['year'] = ma_df['period'].apply(lambda row: row.year ) 

fig = plt.figure(figsize=(12,4))
ax = fig.add_subplot()
ma_df.query("year > 2020").set_index("period")['price'].plot(ax=ax)
ax.set_title("Weekly Conventional Premium Gasoline in Massachusetts", fontweight='bold', fontsize=14)
ax.set_ylabel("$/Gallon")

plt.tight_layout() 
No description has been provided for this image

Crude Oils Imports to the US¶

In [4]:
query: str = """ 
SELECT 
    DISTINCT 
    c.originName, 
    c.destinationName,
    c.period, 
    c.gradeName, 
    c.quantity, 
    c.start_date, 
    c.end_date, 
    c.destinationTypeName,
    c.quantity_units
    
FROM crude_oil_imports c
"""
crude_oils: 'ODINSpark' = ODINSpark(app_name='Crude Oils Imports').exec_odin_mysql_query(query=query)
INFO:botocore.credentials:Found credentials in shared credentials file: ~/.aws/credentials
DEBUG:odin_eia_db.engine.odin_spark:
        
                  ..`                                 ./sdmds:`                                             
                +dmmds+-`                          -+ymmmmmmmmdo:`                            ./osyyy/`     
              `ommmmmmmmdhso:`                 ./shmmmmmmmmmmmmmmds/.                     ./ohmmmdmdmm/     
              +mmmmmmmmmmmmmmmhy+//:://///++shdmmmmmmmmmmhsdmmmmmmmmmddyso+/::--.-://+syhdmmmmmmmmmmmmh`    
             :dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm+    
            .hdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmsooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.   
            ommmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmh+osoymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm/   
           :dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+oohmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmms   
          `hdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdsoo+ososmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.  
          /dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmyoo++ooooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmo  
         `ydmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmhoooooooooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd` 
         :mmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+o+ooooosmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm/ 
         ommmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmy+o+o+o+o+oooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmy 
         ydmmmdddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmhooo+ooo+oo+ooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.
        .dmmmddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+oo+o+ooo+oosmmmmmmmmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmd:
        -dmmmmddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmsoo+oooo+oo+oooo+hmmmmmmmmmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmdo
        +dmmmmdmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmh+oo+oooooooooooooodmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdy
        sdmmmmddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+ooooo+o+oooooo+smmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdh
        ymmmmddsoosssssyyyyyyyhhhhhhhhhhhhhhhhhs+o+o++o+o++oooooooo+hhhddddddddddddddmdmmmddmdmdmdmdmmmmmmmd
        hmmmmdddyo+++++++++ooo+++o+oooooooooooooo+oosyyyhhhddhdddddyssssoooooooooooooooooooosoooooooymmmmmmd
        hmmmmddmmdhso++++++++o++o+++ooo++++o+++ohhdddmddddddddddddddddddhy+++o++oo+oooooooooo+++++shdmmmmmmh
        ymmmmdddmmmmdyso++++o+o+ooooooooooo+oshdmddddddddmddddddddddddhyso+oooooo++o++o+o++o++oshdmmmmmmmmdh
        ydmmmddmmmmmdmmdyso+o++oooooooo+++o+ddddddmddddmddddddddddddddddhyo+oo+o+++o+++++++oshddmmmmmmmmmmdy
        odmmmdddmdmddmmmmmhyoo++o+o+ooo+o+osddmmdddddddddddddddddddddddhyso++o++++++++++ooydmmmmmdmmmmmmmmdo
        /dmmmdddmddmdmmmmmmmdysoo++++oo+o+oddddmdddddddddddddddddddddddddyo++++o++++++oyddmmmmmmmmmmmmmmmmd/
        .dmmmmdddddmddmmmmmmmmmdyso+++o+ooyddmmdmddddddddmddddddddddddddysoooo++++ooyhdmmdmmmmmmmmmmmmmmmdd`
        `ydmmmdddddmmddmmmmmmmmmmdhso++++sdddmmmmdddddddmmddddddddddddmddyo+++++oyhdmmmmmmmmmmmmmmmmmmmmmds 
         +dmmmdddddddddmmmmmmmmmmmmmdysoohdmdmmmmmdddddddmmdddddddddddddhyo++oyhdmmmmmmmmmmmmmmmmmmmmmmmmd: 
         -dmmmddddddddmmmmmmmmmmmmmmmmdhooyddmdmmmddddmdddddddddddddddddy+oyhdmmmmmmmmmmmmmmmmmmmmmdmmmmdy` 
         `ydmmmdddddddddddmmmmmmmmmmmmmyoohddddmmmdddmmmddmddmdddddddddds+sdmmmmmmmmmmmmmmmmmmmmmmmmmmmmd/  
          :dmmmddddmddddddmmmmmmmmmmmddoo+yddddddmdmmmmmdddddddddddddddddy+ymmmmmmdmmmmmdmmmmmmmmmmmmmmdh`  
           sdmmmddddddmddmmmmmmmmmmmmdso+oosyyyydddmmddddddddddddddddddyos+odmmmmmddmmmmmmmmdmmmmmmmmmmdo   
           -dmmmdddddddmdmmmmmmmmdmmmyoo++o+o++oyddmdddddddddddddddddddh+o++sdmmmmdmmmmmmmmmmmmmmmmmmmmy`   
            odmmdddddddmmmmmmmmmmddmhoooooo+oo++ydddddhhyosssdddddddddso+++o+hmdmmmmmmmmmmmmdmmdmmmmmmd-    
            `hdmmmdddddmdmmmdmmmmdddoo++oo+++o++hddhso+++++++oydddddddyo+++o+oddmmmmmmdmmmmmmmmmmmmmmdo     
             -hdmmdddddmdddddmddmmdy++++oo++++o+hys++ooshyso+++yddddyso++++o++ydmmmmmmmmmdmmmmddmmmmdy`     
              /ddmdddddddmdddmmmmmh+o+++oo+++++++++oshdddddhyoooyhddy++++++++++ddmdmmmdmdmmmmmddmmmdh-      
              `sdmmmdddddddmdmmmmdo++++oo++++++++oydddmdddddddhyoosyo+++++o++o+ydddddddmdmmmmmmmmmmd:       
               `yddmmdddddmmmmmmms++++o++++++ooyddddmmdmdmdddmdddys+++++++o+++oodddmddddddmmddmmmdd+        
                -ydmmmddddddddmmh++++++o++oshddddddmmmdddddddddddddhyoo+++o+++o+sdddmddmmmddddmmmd+         
                 .hdmmmdmddddddds++++++osydmmdddddmmddddddddddddddddddhso++++++oohdmmmddddddmmmddo`         
                  -ydmmmmdddddds++++ooyhdmmdmmmddddmdddmdmddddddmddddddddyso+++++odddmddddddmmddo`          
                   .ydmmmddddddoo+oyhdmmmmmdmmmddddddddmdddmddddddddddddddddyo++++sddmdddddmmdd+`           
                    `sdmmmmmdds+shddmdmmmmmmddmdddddddddddddddddddddddddddddddhs+++hdddddmmmdd+`            
                     `+ddmmmddyhddmmmmmmmmmdddmdddddddddddddddddddddddddddddddmdhsosddddmmmdh/              
                       :hdmmmddddddmmmmdmmmddddddddddddddddddddddddddddddddddddddddddddmmmdh-               
                        .sdmmmdddmmmmmmmdmdmmddddddddddddddddddddddddddddddddddddddddmmmddy.                
                          /hdmmmmmmmmmddddddmddddddddddddddddddddddddddddddddddddddmmmmdh/`                 
                           .sddmmmmddddddddddddddddddddddddddddddddddddddddddddddddddddo.                   
                             /hdmdmmdmddddddmdmddddddddddddddddddddddddddddddddddddddo-                     
                              .odddmdmddddddmdmddddddddddddddddmdddddddddddddmdddddo-                       
                                .+hddmdmddddmdddddddddddddddddddddddddddddddddddh+.                         
                                  `/yddddmddddmddddddddddddddddddmddddddddddddy/`                           
                                    `:sddddddddddddddddddddddddmddddddddmddds:                              
                                       ./ydddddddddddddddddddddddddddddddy/`                                
                                          -+hdddddddddddddddddddddddddy+.                                   
                                            `-ohdddddddddddddddddddy/.                                      
                                                -+yddddddddddddyo-`                                         
                                                   .:oyddddhs/.    
           ▄▄▄▄    ▄▄▄▄▄      ▄▄▄▄▄▄   ▄▄▄   ▄▄              ▄▄▄▄    ▄▄▄▄▄▄       ▄▄     ▄▄▄▄▄▄    ▄▄   ▄▄▄ 
          ██▀▀██   ██▀▀▀██    ▀▀██▀▀   ███   ██            ▄█▀▀▀▀█   ██▀▀▀▀█▄    ████    ██▀▀▀▀██  ██  ██▀  
         ██    ██  ██    ██     ██     ██▀█  ██            ██▄       ██    ██    ████    ██    ██  ██▄██    
         ██    ██  ██    ██     ██     ██ ██ ██             ▀████▄   ██████▀    ██  ██   ███████   █████    
         ██    ██  ██    ██     ██     ██  █▄██   █████         ▀██  ██         ██████   ██  ▀██▄  ██  ██▄  
          ██▄▄██   ██▄▄▄██    ▄▄██▄▄   ██   ███            █▄▄▄▄▄█▀  ██        ▄██  ██▄  ██    ██  ██   ██▄ 
           ▀▀▀▀    ▀▀▀▀▀      ▀▀▀▀▀▀   ▀▀   ▀▀▀             ▀▀▀▀▀    ▀▀        ▀▀    ▀▀  ▀▀    ▀▀▀ ▀▀    ▀▀ 

        [ OK ] Launching ODIN @2024-08-17T12:46:27.371798                                                                   
        
DEBUG:odin_eia_db.engine.odin_spark:[ OK ] Loading Spark Configuration For ODIN-Project...
:: loading settings :: url = jar:file:/opt/spark-3.5.1-bin-hadoop3/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml
Ivy Default Cache set to: /home/ubuntu/.ivy2/cache
The jars for the packages stored in: /home/ubuntu/.ivy2/jars
org.apache.hadoop#hadoop-aws added as a dependency
com.amazonaws#aws-java-sdk-bundle added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-c7099faa-4fbb-4400-b4d8-c5dfb8a129a3;1.0
	confs: [default]
	found org.apache.hadoop#hadoop-aws;3.3.1 in central
	found com.amazonaws#aws-java-sdk-bundle;1.11.901 in central
	found org.wildfly.openssl#wildfly-openssl;1.0.7.Final in central
:: resolution report :: resolve 451ms :: artifacts dl 14ms
	:: modules in use:
	com.amazonaws#aws-java-sdk-bundle;1.11.901 from central in [default]
	org.apache.hadoop#hadoop-aws;3.3.1 from central in [default]
	org.wildfly.openssl#wildfly-openssl;1.0.7.Final from central in [default]
	---------------------------------------------------------------------
	|                  |            modules            ||   artifacts   |
	|       conf       | number| search|dwnlded|evicted|| number|dwnlded|
	---------------------------------------------------------------------
	|      default     |   3   |   0   |   0   |   0   ||   3   |   0   |
	---------------------------------------------------------------------
:: retrieving :: org.apache.spark#spark-submit-parent-c7099faa-4fbb-4400-b4d8-c5dfb8a129a3
	confs: [default]
	0 artifacts copied, 3 already retrieved (0kB/5ms)
24/08/17 12:46:33 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
DEBUG:odin_eia_db.engine.odin_spark:[ OK ] Loading Hadoop Configuration For ODIN-Project...
In [6]:
crude_oils.printSchema() 
root
 |-- originName: string (nullable = true)
 |-- destinationName: string (nullable = true)
 |-- gradeName: string (nullable = true)
 |-- quantity: long (nullable = true)
 |-- start_date: string (nullable = true)
 |-- end_date: string (nullable = true)
 |-- destinationTypeName: string (nullable = true)
 |-- quantity_units: string (nullable = true)

In [7]:
crude_oils.show(5) 
                                                                                
+----------+---------------+-----------+--------+-------------------+-------------------+-------------------+----------------+
|originName|destinationName|  gradeName|quantity|         start_date|           end_date|destinationTypeName|  quantity_units|
+----------+---------------+-----------+--------+-------------------+-------------------+-------------------+----------------+
|   Algeria|  United States|Light Sweet|     376|2024-05-01 00:00:00|2024-05-31 00:00:00|      United States|thousand barrels|
|     World|  United States|     Medium|   60583|2024-05-01 00:00:00|2024-05-31 00:00:00|      United States|thousand barrels|
|     World|  United States|Light Sweet|   13151|2024-05-01 00:00:00|2024-05-31 00:00:00|      United States|thousand barrels|
|     World|  United States| Light Sour|    9427|2024-05-01 00:00:00|2024-05-31 00:00:00|      United States|thousand barrels|
|     World|  United States|Heavy Sweet|   10436|2024-05-01 00:00:00|2024-05-31 00:00:00|      United States|thousand barrels|
+----------+---------------+-----------+--------+-------------------+-------------------+-------------------+----------------+
only showing top 5 rows

Top Crude Oils Importer to the US¶

  • Unit measurement: Thousand Barrels
In [19]:
crude_oils.groupby('originName').agg(f.sum('quantity').alias('total_quantity')).orderBy('total_quantity', ascending=False).show(25)  
+-------------------+--------------+
|         originName|total_quantity|
+-------------------+--------------+
|              World|      13418377|
|           Non-OPEC|      11295804|
|             Canada|       8051193|
|    Canada (Region)|       7937144|
|     Other Americas|       2902756|
|               OPEC|       2118304|
|        Middle East|       1516824|
|             Mexico|       1347899|
|       Saudi Arabia|        982260|
|             Africa|        654750|
|           Colombia|        495460|
|               Iraq|        493918|
|            Ecuador|        331919|
|             Brazil|        278898|
|            Nigeria|        266747|
|            Eurasia|        220633|
|          Venezuela|        185937|
|             Russia|        169785|
|             Europe|        165639|
|              Libya|        136668|
|             Guyana|        133373|
|     United Kingdom|         95560|
|          Argentina|         89782|
|Trinidad and Tobago|         84291|
|              Ghana|         77096|
+-------------------+--------------+
only showing top 25 rows

In [85]:
importer_df: 'DataFrame' = pd.DataFrame( [ item.asDict() for item in crude_oils.groupby('originName').agg(f.sum('quantity').alias('total_quantity')).orderBy('total_quantity', ascending=False).collect() ] )
countries, quantities = zip(*importer_df.iloc[7:29].head(10).set_index('originName').to_dict().get('total_quantity').items())
labels: List[str] = [ "%s: %s%%" % (item[0], round( (item[1]/importer_df['total_quantity'].sum()) *100,2)  ) for item in zip(countries, quantities) ]
regions_df: 'DataFrame' = importer_df[ importer_df['originName'].isin(['World', 'Non-OPEC', 'Other Americas', 'OPEC', 'Middle East'] )]
regions_df['labels'] = regions_df.apply(lambda row: "%s: %s %%" % (row['originName'] ,round(row['total_quantity']/importer_df['total_quantity'].sum() ,2 ) * 100), axis=1  )
countries, percentage = zip( *regions_df.set_index('labels')['total_quantity'].to_dict().items()) 
fig = plt.figure(figsize=(12,5))

ax = fig.add_subplot(1,2,1) 
ax.pie(x=list(percentage), labels=list(countries), colors=sns.color_palette(palette='flare', n_colors=10).as_hex() )
ax.set_title("Top Crude Oils Importer to the US", fontweight='bold', fontsize=14)
plt.gcf().gca().add_artist(plt.Circle(xy=(0,0), color='white', radius=0.45))
plt.tight_layout() 


ax = fig.add_subplot(1,2,2) 
ax.pie(x=list(quantities), labels=labels, colors=sns.color_palette(palette='flare', n_colors=10).as_hex() )
ax.set_title("Top 10 Crude Oils Importer Countries to the US", fontweight='bold', fontsize=14)
plt.gcf().gca().add_artist(plt.Circle(xy=(0,0), color='white', radius=0.45))
plt.tight_layout() 
No description has been provided for this image

Crude Oils import from the Middle East to the US¶

In [98]:
crude_oils.filter("originName == 'Middle East' ").groupby('gradename').count().orderBy('count', ascending=False).show() # type of crude oils imported to the US from the Middle East 
+-----------+-----+
|  gradename|count|
+-----------+-----+
|     Medium|   69|
| Heavy Sour|   69|
| Light Sour|   64|
|Light Sweet|    9|
|Heavy Sweet|    3|
+-----------+-----+

In [105]:
me_imports: 'DataFrame' = pd.DataFrame( [item.asDict()  for item in crude_oils.filter("originName == 'Middle East' ").select('start_date', 'quantity', 'gradeName' ).collect() ] ) 
me_imports['start_date'] = pd.to_datetime(me_imports['start_date'] ) 
me_imports = me_imports.rename({'start_date': 'period'}, axis=1)

fig = plt.figure(figsize=(20,10)) 

for index,grade_name in enumerate( me_imports['gradeName'].unique().tolist() , 1) :
    ax = fig.add_subplot(3,2,index) 
    me_imports.set_index('period').query(f"gradeName == '{grade_name}'")['quantity'].plot(ax=ax) 
    ax.set_title(f"Crude Oils {grade_name} import from the Middle East", fontweight='bold', fontsize=14)
    ax.set_ylabel("Thousand Barrels", fontweight='bold')

plt.tight_layout()
No description has been provided for this image

Petroleum Weekly Stocks¶

In [56]:
query: str = """
/* 
DESCRIPTION 
-----------
- Motor Gasoline related (i.e., Reformulated, Finished, Conventional) 

*/
SELECT 
    p.period, 
    MONTH(p.period) AS `month`,
    p.area_name, 
    p.product_name, 
    p.process_name, 
    p.series, 
    p.series_description, 
    p.value, 
    p.units, 
    CASE 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(p.period), '-', DAY(p.period)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-06-20", '%Y-%m-%d' ) AND DATE_FORMAT("2024-09-22", '%Y-%m-%d' ) THEN 'Summer' 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(p.period), '-', DAY(p.period)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-09-22", '%Y-%m-%d' ) AND DATE_FORMAT("2024-12-20", '%Y-%m-%d' ) THEN 'Fall' 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(p.period), '-', DAY(p.period)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-12-21", '%Y-%m-%d' ) AND DATE_FORMAT("2025-03-20", '%Y-%m-%d' ) THEN 'Winter' 
        WHEN DATE_FORMAT( CONCAT('2024', '-',  MONTH(p.period), '-', DAY(p.period)), '%Y-%m-%d' ) BETWEEN DATE_FORMAT("2024-03-21", '%Y-%m-%d' ) AND DATE_FORMAT("2024-06-19", '%Y-%m-%d' ) THEN 'Spring'
        ELSE 'Winter'
        
    END AS `season`
    
FROM petroleum_weekly_stocks p 
WHERE p.series_description LIKE '%motor%' AND p.area_name LIKE '%PADD 1%' 
"""
petrol_weekly_stocks: 'DataFrame' = ODINSpark(app_name="Weekly Petroleum Stocks").exec_odin_mysql_query(query=query)  
# exclude conv_motor_gas with alcohol/ finished_motor  > 55 
petrol_weekly_stocks = petrol_weekly_stocks.filter("product_name !=  'Conventional Motor Gasoline with Alcohol' ").filter("product_name != 'Finished Motor Gasoline Conventional >55' ") 
DEBUG:odin_eia_db.engine.odin_spark:
        
                  ..`                                 ./sdmds:`                                             
                +dmmds+-`                          -+ymmmmmmmmdo:`                            ./osyyy/`     
              `ommmmmmmmdhso:`                 ./shmmmmmmmmmmmmmmds/.                     ./ohmmmdmdmm/     
              +mmmmmmmmmmmmmmmhy+//:://///++shdmmmmmmmmmmhsdmmmmmmmmmddyso+/::--.-://+syhdmmmmmmmmmmmmh`    
             :dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm+    
            .hdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmsooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.   
            ommmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmh+osoymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm/   
           :dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+oohmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmms   
          `hdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdsoo+ososmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.  
          /dmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmyoo++ooooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmo  
         `ydmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmhoooooooooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd` 
         :mmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+o+ooooosmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmm/ 
         ommmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmy+o+o+o+o+oooymmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmy 
         ydmmmdddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmhooo+ooo+oo+ooodmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmd.
        .dmmmddddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+oo+o+ooo+oosmmmmmmmmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmd:
        -dmmmmddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmsoo+oooo+oo+oooo+hmmmmmmmmmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmdo
        +dmmmmdmmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmh+oo+oooooooooooooodmmmmdmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdy
        sdmmmmddmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdooo+ooooo+o+oooooo+smmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmdh
        ymmmmddsoosssssyyyyyyyhhhhhhhhhhhhhhhhhs+o+o++o+o++oooooooo+hhhddddddddddddddmdmmmddmdmdmdmdmmmmmmmd
        hmmmmdddyo+++++++++ooo+++o+oooooooooooooo+oosyyyhhhddhdddddyssssoooooooooooooooooooosoooooooymmmmmmd
        hmmmmddmmdhso++++++++o++o+++ooo++++o+++ohhdddmddddddddddddddddddhy+++o++oo+oooooooooo+++++shdmmmmmmh
        ymmmmdddmmmmdyso++++o+o+ooooooooooo+oshdmddddddddmddddddddddddhyso+oooooo++o++o+o++o++oshdmmmmmmmmdh
        ydmmmddmmmmmdmmdyso+o++oooooooo+++o+ddddddmddddmddddddddddddddddhyo+oo+o+++o+++++++oshddmmmmmmmmmmdy
        odmmmdddmdmddmmmmmhyoo++o+o+ooo+o+osddmmdddddddddddddddddddddddhyso++o++++++++++ooydmmmmmdmmmmmmmmdo
        /dmmmdddmddmdmmmmmmmdysoo++++oo+o+oddddmdddddddddddddddddddddddddyo++++o++++++oyddmmmmmmmmmmmmmmmmd/
        .dmmmmdddddmddmmmmmmmmmdyso+++o+ooyddmmdmddddddddmddddddddddddddysoooo++++ooyhdmmdmmmmmmmmmmmmmmmdd`
        `ydmmmdddddmmddmmmmmmmmmmdhso++++sdddmmmmdddddddmmddddddddddddmddyo+++++oyhdmmmmmmmmmmmmmmmmmmmmmds 
         +dmmmdddddddddmmmmmmmmmmmmmdysoohdmdmmmmmdddddddmmdddddddddddddhyo++oyhdmmmmmmmmmmmmmmmmmmmmmmmmd: 
         -dmmmddddddddmmmmmmmmmmmmmmmmdhooyddmdmmmddddmdddddddddddddddddy+oyhdmmmmmmmmmmmmmmmmmmmmmdmmmmdy` 
         `ydmmmdddddddddddmmmmmmmmmmmmmyoohddddmmmdddmmmddmddmdddddddddds+sdmmmmmmmmmmmmmmmmmmmmmmmmmmmmd/  
          :dmmmddddmddddddmmmmmmmmmmmddoo+yddddddmdmmmmmdddddddddddddddddy+ymmmmmmdmmmmmdmmmmmmmmmmmmmmdh`  
           sdmmmddddddmddmmmmmmmmmmmmdso+oosyyyydddmmddddddddddddddddddyos+odmmmmmddmmmmmmmmdmmmmmmmmmmdo   
           -dmmmdddddddmdmmmmmmmmdmmmyoo++o+o++oyddmdddddddddddddddddddh+o++sdmmmmdmmmmmmmmmmmmmmmmmmmmy`   
            odmmdddddddmmmmmmmmmmddmhoooooo+oo++ydddddhhyosssdddddddddso+++o+hmdmmmmmmmmmmmmdmmdmmmmmmd-    
            `hdmmmdddddmdmmmdmmmmdddoo++oo+++o++hddhso+++++++oydddddddyo+++o+oddmmmmmmdmmmmmmmmmmmmmmdo     
             -hdmmdddddmdddddmddmmdy++++oo++++o+hys++ooshyso+++yddddyso++++o++ydmmmmmmmmmdmmmmddmmmmdy`     
              /ddmdddddddmdddmmmmmh+o+++oo+++++++++oshdddddhyoooyhddy++++++++++ddmdmmmdmdmmmmmddmmmdh-      
              `sdmmmdddddddmdmmmmdo++++oo++++++++oydddmdddddddhyoosyo+++++o++o+ydddddddmdmmmmmmmmmmd:       
               `yddmmdddddmmmmmmms++++o++++++ooyddddmmdmdmdddmdddys+++++++o+++oodddmddddddmmddmmmdd+        
                -ydmmmddddddddmmh++++++o++oshddddddmmmdddddddddddddhyoo+++o+++o+sdddmddmmmddddmmmd+         
                 .hdmmmdmddddddds++++++osydmmdddddmmddddddddddddddddddhso++++++oohdmmmddddddmmmddo`         
                  -ydmmmmdddddds++++ooyhdmmdmmmddddmdddmdmddddddmddddddddyso+++++odddmddddddmmddo`          
                   .ydmmmddddddoo+oyhdmmmmmdmmmddddddddmdddmddddddddddddddddyo++++sddmdddddmmdd+`           
                    `sdmmmmmdds+shddmdmmmmmmddmdddddddddddddddddddddddddddddddhs+++hdddddmmmdd+`            
                     `+ddmmmddyhddmmmmmmmmmdddmdddddddddddddddddddddddddddddddmdhsosddddmmmdh/              
                       :hdmmmddddddmmmmdmmmddddddddddddddddddddddddddddddddddddddddddddmmmdh-               
                        .sdmmmdddmmmmmmmdmdmmddddddddddddddddddddddddddddddddddddddddmmmddy.                
                          /hdmmmmmmmmmddddddmddddddddddddddddddddddddddddddddddddddmmmmdh/`                 
                           .sddmmmmddddddddddddddddddddddddddddddddddddddddddddddddddddo.                   
                             /hdmdmmdmddddddmdmddddddddddddddddddddddddddddddddddddddo-                     
                              .odddmdmddddddmdmddddddddddddddddmdddddddddddddmdddddo-                       
                                .+hddmdmddddmdddddddddddddddddddddddddddddddddddh+.                         
                                  `/yddddmddddmddddddddddddddddddmddddddddddddy/`                           
                                    `:sddddddddddddddddddddddddmddddddddmddds:                              
                                       ./ydddddddddddddddddddddddddddddddy/`                                
                                          -+hdddddddddddddddddddddddddy+.                                   
                                            `-ohdddddddddddddddddddy/.                                      
                                                -+yddddddddddddyo-`                                         
                                                   .:oyddddhs/.    
           ▄▄▄▄    ▄▄▄▄▄      ▄▄▄▄▄▄   ▄▄▄   ▄▄              ▄▄▄▄    ▄▄▄▄▄▄       ▄▄     ▄▄▄▄▄▄    ▄▄   ▄▄▄ 
          ██▀▀██   ██▀▀▀██    ▀▀██▀▀   ███   ██            ▄█▀▀▀▀█   ██▀▀▀▀█▄    ████    ██▀▀▀▀██  ██  ██▀  
         ██    ██  ██    ██     ██     ██▀█  ██            ██▄       ██    ██    ████    ██    ██  ██▄██    
         ██    ██  ██    ██     ██     ██ ██ ██             ▀████▄   ██████▀    ██  ██   ███████   █████    
         ██    ██  ██    ██     ██     ██  █▄██   █████         ▀██  ██         ██████   ██  ▀██▄  ██  ██▄  
          ██▄▄██   ██▄▄▄██    ▄▄██▄▄   ██   ███            █▄▄▄▄▄█▀  ██        ▄██  ██▄  ██    ██  ██   ██▄ 
           ▀▀▀▀    ▀▀▀▀▀      ▀▀▀▀▀▀   ▀▀   ▀▀▀             ▀▀▀▀▀    ▀▀        ▀▀    ▀▀  ▀▀    ▀▀▀ ▀▀    ▀▀ 

        [ OK ] Launching ODIN @2024-08-20T17:29:48.504807                                                                   
        
DEBUG:odin_eia_db.engine.odin_spark:[ OK ] Loading Spark Configuration For ODIN-Project...
DEBUG:odin_eia_db.engine.odin_spark:[ OK ] Loading Hadoop Configuration For ODIN-Project...
In [59]:
petrol_weekly_stocks.withColumn('month_name', get_month_name('month')  ).select('month_name').distinct().show() 
+----------+
|month_name|
+----------+
|      July|
|  November|
|   October|
|       May|
|    August|
|      June|
|  December|
| September|
+----------+

In [41]:
petrol_weekly_stocks.printSchema() 
root
 |-- period: string (nullable = true)
 |-- month: long (nullable = true)
 |-- area_name: string (nullable = true)
 |-- product_name: string (nullable = true)
 |-- process_name: string (nullable = true)
 |-- series: string (nullable = true)
 |-- series_description: string (nullable = true)
 |-- value: double (nullable = true)
 |-- units: string (nullable = true)
 |-- season: string (nullable = true)

In [54]:
petrol_weekly_stocks.show(5)
+----------+-----+---------+--------------------+-------------+--------------------+--------------------+-------+-----+------+
|    period|month|area_name|        product_name| process_name|              series|  series_description|  value|units|season|
+----------+-----+---------+--------------------+-------------+--------------------+--------------------+-------+-----+------+
|2023-12-22|   12|   PADD 1|Reformulated Moto...|Ending Stocks|         WG1ST_R10_1|East Coast (PADD ...|    4.0| MBBL|Winter|
|2023-12-22|   12|   PADD 1|Finished Motor Ga...|Ending Stocks|W_EPM0F_SAE_R10_MBBL|East Coast (PADD ...| 2405.0| MBBL|Winter|
|2023-12-22|   12|   PADD 1|Other Conventiona...|Ending Stocks|         WG6ST_R10_1|East Coast (PADD ...| 2401.0| MBBL|Winter|
|2023-12-22|   12|   PADD 1|Reformulated Moto...|Ending Stocks|            WGRSTP11|East Coast (PADD ...|    4.0| MBBL|Winter|
|2023-12-22|   12|   PADD 1|Motor Gasoline Bl...|Ending Stocks|W_EPOBGRR_SAE_R10...|East Coast (PADD ...|18867.0| MBBL|Winter|
+----------+-----+---------+--------------------+-------------+--------------------+--------------------+-------+-----+------+
only showing top 5 rows

Conventional Motorl Gasoline Weekly Stocks (PADD 1)¶

In [60]:
conv_motor: 'DataFrame' = pd.DataFrame( [ item.asDict() for item in petrol_weekly_stocks.filter(" product_name == 'Conventional Motor Gasoline' ").select('period', 'value', 'season').orderBy('period').collect() ] ) 
conv_motor['period'] = pd.to_datetime(conv_motor['period']) 
conv_motor['month_name'] =conv_motor['period'].apply(lambda row: MONTH_LOOKUP.get( row.month ) )
conv_motor['month'] =conv_motor['period'].apply(lambda row: row.month  )
In [93]:
fig = plt.figure(figsize=(16,4))
ax = fig.add_subplot(1,2,1) 
conv_motor.set_index('period')['value'].plot(ax=ax, marker='s', mec='black')
ax.set_title("Conventional Motor Gasoline Weekly Stocks", fontweight='bold', fontsize=14)
ax.set_ylabel("MBBL")

ax = fig.add_subplot(1,2,2) 
months, avg_vals = zip(*conv_motor.groupby('month')['value'].mean().to_dict().items())
ax.bar(x=list(months), height=list(avg_vals), hatch='//', ec='k')
ax.set_xticklabels(['May'] + conv_motor.sort_values(by='month')['month_name'].unique().tolist() , fontweight='bold' ) 
ax.set_title("Conventional Motor Gasoline Stocks by Month Name", fontweight='bold', fontsize=14)
ax.set_ylabel("MBBL")

for index,val in enumerate(avg_vals, 5): 
    ax.annotate(text=val, xy=(index - 0.3,val + 50))
    
plt.tight_layout() 
No description has been provided for this image

Conventional Motorl Gasoline Stocks by Season¶

In [67]:
conv_motor.groupby('season')['value'].agg(['sum', 'mean']).applymap(lambda row: f"{row:.2f} MBBL" )
Out[67]:
sum mean
season
Fall 38233.00 MBBL 3186.08 MBBL
Spring 13953.00 MBBL 3488.25 MBBL
Summer 42657.00 MBBL 3046.93 MBBL
Winter 2401.00 MBBL 2401.00 MBBL
In [97]:
petrol_weekly_stocks.filter("product_name == 'Finished Motor Gasoline' ").select('period', 'value').orderBy('period').show(30) 
+----------+------+
|    period| value|
+----------+------+
|2023-06-02|3442.0|
|2023-06-09|3784.0|
|2023-06-16|3936.0|
|2023-06-23|3106.0|
|2023-06-30|2751.0|
|2023-07-07|3578.0|
|2023-07-14|3193.0|
|2023-07-21|2909.0|
|2023-07-28|2849.0|
|2023-08-04|2621.0|
|2023-08-11|2529.0|
|2023-08-18|3259.0|
|2023-08-25|3642.0|
|2023-09-01|3090.0|
|2023-09-08|2800.0|
|2023-09-15|3117.0|
|2023-09-22|3276.0|
|2023-09-29|3357.0|
|2023-10-06|3007.0|
|2023-10-13|3111.0|
|2023-10-20|2904.0|
|2023-10-27|3349.0|
|2023-11-03|3534.0|
|2023-11-10|3117.0|
|2023-11-17|3273.0|
|2023-11-24|3223.0|
|2023-12-01|3048.0|
|2023-12-08|3449.0|
|2023-12-15|2908.0|
|2023-12-22|2405.0|
+----------+------+