Josh Levy-Kramer joshlk

## faster_toPandas.py
import pandas as pd

def _map_to_pandas(rdds):
    """ Needs to be here due to pickling issues """
    return [pd.DataFrame(list(rdds))]

def toPandas(df, n_partitions=None):
    """
    Returns the contents of `df` as a local `pandas.DataFrame` in a speedy fashion. The DataFrame is
    repartitioned if `n_partitions` is passed.

## pandas_to_sqlite.py
import pandas as pd
from sqlalchemy import create_engine

df = pd.read_csv('<INPUT_FILE_PATH>.csv', encoding='utf')  # Needs to be unicode for sqlite
disk_engine = create_engine('sqlite:///<DB_PATH>.sqlite')
df.to_sql('<TABLE_NAME>', disk_engine, if_exists='append')

## rolling_stats.py
import pandas as pd

def nunique_rolling_time_series(data_series, step_freqency, window_size, output_name=''):
    """
    Calculate a rolling statistic of nunique of a time series. The input series has a DateTime index.
    """

    data_series = data_series.sort_index()

    min_date = data_series.index.min()

## acorn_segments_lookup.csv

          
            Acorn_Category
            Acorn_Category_Name
            Acorn_Group
            Acorn_Group_Name
            Acorn_Type
            Acorn_Type_Code
            Acorn_Type_Name

            
              1
              1 Affluent Achievers
              A
              1.A Lavish Lifestyles
              1
              1.A.1
              1.A.1 Exclusive enclaves

            
              1
              1 Affluent Achievers
              A
              1.A Lavish Lifestyles
              2
              1.A.2
              1.A.2 Metropolitan money

            
              1
              1 Affluent Achievers
              A
              1.A Lavish Lifestyles
              3
              1.A.3
              1.A.3 Large house luxury

            
              1
              1 Affluent Achievers
              B
              1.B Executive Wealth
              4
              1.B.4
              1.B.4 Asset rich families

            
              1
              1 Affluent Achievers
              B
              1.B Executive Wealth
              5
              1.B.5
              1.B.5 Wealthy countryside commuters

            
              1
              1 Affluent Achievers
              B
              1.B Executive Wealth
              6
              1.B.6
              1.B.6 Financially comfortable families

            
              1
              1 Affluent Achievers
              B
              1.B Executive Wealth
              7
              1.B.7
              1.B.7 Affluent professionals

            
              1
              1 Affluent Achievers
              B
              1.B Executive Wealth
              8
              1.B.8
              1.B.8 Prosperous suburban families

            
              1
              1 Affluent Achievers
              B
              1.B Executive Wealth
              9
              1.B.9
              1.B.9 Well-off edge of towners

## python_anaconda_3_and_2
To have both Anaconda Python installed with both Python 2 and 3 running simultaneously:
1. Download Python Anconda 3
2. Create a Python 2.7 environment `conda create -n py3k python=3.4 anaconda`
3. Navigate to the install directory. Something like `Anaconda3\envs\py2k`
4. Rename python to python2. And in scripts pip to pip2
5 Add those two directories to the path

## pandas_utils.py
import pandas as pd

def clean_str_cols(df, encoding='ascii'):
    """
    As string columns are stored as 'objects' it can cause many problems, especially when reading and writig CSVs. This function
    forces the columns to be strings and be encoded as a specified encoding.

    Solves `UnicodeEncodeError` errors when using `to_csv`.
    """
    df = df.copy()

## SQLAlchemy_quickstart.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                joshlk
                / SQLAlchemy_quickstart.md
            
            
              Last active
              December 8, 2017 14:43
            
              
                SQLAlchemy quickstart, tutorial
              
          
    SQLAlchemy has two parts:
* ORM: this maps table and relationships between tables to Python objects
* Core: alows you to write and execute SQL using Python exspressions

Core

Select statment:
import pandas as pd

  
## postgresql.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                joshlk
                / postgresql.md
            
            
              Last active
              August 15, 2018 13:23
            
              
                PostgreSQL codesheet
              
          
    Force cast conversion to integer or output null:
CREATE OR REPLACE FUNCTION convert_to_integer(v_input text)
RETURNS BIGINT AS $$
DECLARE v_int_value BIGINT DEFAULT NULL;
BEGIN
    BEGIN
        v_int_value := v_input::BIGINT;
    EXCEPTION WHEN OTHERS THEN
 RAISE NOTICE 'Invalid integer value: "%". Returning NULL.', v_input;

  
## graph_tool_jupyter_notebook_inline_draw.ipynb

      
              1 file
            
          
              1 fork
            
          
                0 comments
              
            
              0 stars
            
          
                joshlk
                / graph_tool_jupyter_notebook_inline_draw.ipynb
            
            
              Created
              April 20, 2018 14:01
            
              
                Inline graph-tool figures in Jupyter notebook (graph_draw)
              
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## or_tools_SimpleMinCostFlow_cython_benchmark.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              2 stars
            
          
                joshlk
                / or_tools_SimpleMinCostFlow_cython_benchmark.ipynb
            
            
              Created
              April 27, 2018 16:05
            
              
                Benchmark of Google's or-tools and a prototype cython interface
              
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	import pandas as pd

	def _map_to_pandas(rdds):
	""" Needs to be here due to pickling issues """
	return [pd.DataFrame(list(rdds))]

	def toPandas(df, n_partitions=None):
	"""
	Returns the contents of `df` as a local `pandas.DataFrame` in a speedy fashion. The DataFrame is
	repartitioned if `n_partitions` is passed.
	import pandas as pd
	from sqlalchemy import create_engine

	df = pd.read_csv('<INPUT_FILE_PATH>.csv', encoding='utf') # Needs to be unicode for sqlite
	disk_engine = create_engine('sqlite:///<DB_PATH>.sqlite')
	df.to_sql('<TABLE_NAME>', disk_engine, if_exists='append')
	import pandas as pd

	def nunique_rolling_time_series(data_series, step_freqency, window_size, output_name=''):
	"""
	Calculate a rolling statistic of nunique of a time series. The input series has a DateTime index.
	"""

	data_series = data_series.sort_index()

	min_date = data_series.index.min()
Acorn_Category	Acorn_Category_Name	Acorn_Group	Acorn_Group_Name	Acorn_Type	Acorn_Type_Code	Acorn_Type_Name
1	1 Affluent Achievers	A	1.A Lavish Lifestyles	1	1.A.1	1.A.1 Exclusive enclaves
1	1 Affluent Achievers	A	1.A Lavish Lifestyles	2	1.A.2	1.A.2 Metropolitan money
1	1 Affluent Achievers	A	1.A Lavish Lifestyles	3	1.A.3	1.A.3 Large house luxury
1	1 Affluent Achievers	B	1.B Executive Wealth	4	1.B.4	1.B.4 Asset rich families
1	1 Affluent Achievers	B	1.B Executive Wealth	5	1.B.5	1.B.5 Wealthy countryside commuters
1	1 Affluent Achievers	B	1.B Executive Wealth	6	1.B.6	1.B.6 Financially comfortable families
1	1 Affluent Achievers	B	1.B Executive Wealth	7	1.B.7	1.B.7 Affluent professionals
1	1 Affluent Achievers	B	1.B Executive Wealth	8	1.B.8	1.B.8 Prosperous suburban families
1	1 Affluent Achievers	B	1.B Executive Wealth	9	1.B.9	1.B.9 Well-off edge of towners
	To have both Anaconda Python installed with both Python 2 and 3 running simultaneously:
	1. Download Python Anconda 3
	2. Create a Python 2.7 environment `conda create -n py3k python=3.4 anaconda`
	3. Navigate to the install directory. Something like `Anaconda3\envs\py2k`
	4. Rename python to python2. And in scripts pip to pip2
	5 Add those two directories to the path
	import pandas as pd

	def clean_str_cols(df, encoding='ascii'):
	"""
	As string columns are stored as 'objects' it can cause many problems, especially when reading and writig CSVs. This function
	forces the columns to be strings and be encoded as a specified encoding.

	Solves `UnicodeEncodeError` errors when using `to_csv`.
	"""
	df = df.copy()