library(scales)
library(knitr)
library(ggplot2)
library(hutils)
#> 
#> Attaching package: 'hutils'
#> The following objects are masked from 'package:dplyr':
#> 
#>     coalesce, if_else
library(magrittr)
library(data.table)
if (requireNamespace("taxstats", quietly = TRUE)){
  library(taxstats)
} else {
  templib <- tempfile()
  hutils::provide.dir(templib)
  install.packages("taxstats",
                   lib = templib,
                   repos = "https://hughparsonage.github.io/drat/",
                   type = "source")
  library("taxstats", lib.loc = templib)
}
library(grattan)

Introduction

This vignette uses a recent op-ed to demonstrate several tools within the grattan package:

  1. projection of sample files for analysis over the budget forward estimates, including a flexible assumption about future wage growth
  2. calculating income tax, both under present settings and changes to thresholds or rates.

Average change of tax rates by decile

Say we want to compare the expected income tax paid by individuals in 2016-17 vs 2020-21. We have 2% sample files from 2012-13 and 2013-14. To do distributional analysis for years beyond 2013-14, we project a sample file as many years ahead as required. So for 2016-17, we use project with h = 4L. (The h stands for ‘’horizon’‘. The L means ’integer’ in R: project insists that the h value is strictly an integer as that is the only type of value that makes sense here.)

sample_file_1617 <-
  project(sample_file_1213,
          h = 4L,
          fy.year.of.sample.file = "2012-13")

and similarly for 2020-21 we use h = 8L:

sample_file_2021 <-
  project(sample_file_1213,
          h = 8L,
          fy.year.of.sample.file = "2012-13")

I use fy.year.of.sample.file = "2012-13" rather than = "2013-14" as the former seems to give more accurate forecasts when compared with final budget outcomes in the years that follow.

The next step is to calculate the tax paid for each entry in the sample file for those years. The grattan package provides income_tax for this purpose, with the optional argument .dots.ATO accepting a sample file to take care of the variables that are needed for the complex calculations of offsets and the Medicare levy. The argument fy.year specifies what tax scales to use; for future years, we assume the current settings, unless the Government has announced changes in a future year, such as the expected increase to the Medicare levy. Since the income_tax function only currently works for years as far ahead as 2019-20, we use that year for the 2020-21 projection of the sample file.

sample_file_1617[, tax_paid := income_tax(Taxable_Income,
                                          .dots.ATO = copy(sample_file_1617),
                                          fy.year = "2016-17")]
sample_file_1617[, avg_tax := tax_paid / Taxable_Income]
sample_file_2021[, tax_paid := income_tax(Taxable_Income,
                                          .dots.ATO = copy(sample_file_2021),
                                          fy.year = "2019-20")]
sample_file_2021[, avg_tax := tax_paid / Taxable_Income]

To calculate the average (average) tax by decile, we use weighted_ntile with the optional arguments weights left to the default (as the sample file is equiweighted).

avg_tax_by_decile_1617 <- 
  sample_file_1617 %>%
  .[, .(avg_tax = mean(avg_tax)),
    keyby = .(decile = weighted_ntile(Taxable_Income, n = 10))]

avg_tax_by_decile_2021 <- 
  sample_file_2021 %>%
  .[, .(avg_tax = mean(avg_tax)),
    keyby = .(decile = weighted_ntile(Taxable_Income, n = 10))]

We can then plot a comparison of these table by joining them and using ggplot2. Since the tables are already keyed by decile, we can use the X[Y] method from data.table to join by decile. This creates a three-column table: the first is decile, the second is avg_tax which is the avg_tax from the 2016-17 table and i.avg_tax which is the avg_tax from the 2020-21 table. (In the result of X[Y], any column in Y which has the same name as a column in X (but isn’t a key) is prefixed with i. to distinguish it.) We discard the lowest decile as the average tax is NaN for those with zero taxable income. Lastly, for cosmetic reasons, we convert decile to a factor so that the labels on the chart are 1, 2, 3, ... rather than 0, 2.5, 5, ....

avg_tax_by_decile_1617[avg_tax_by_decile_2021] %>%
  .[decile > 1] %>%
  .[, ppt_increase := 100*(i.avg_tax - avg_tax)] %>%
  .[, decile := factor(decile)] %>%
  ggplot(aes(x = decile, y = ppt_increase)) + 
  geom_col()

This chart and the underlying data are a reflection of the assumptions in the project function. In particular, the project function uses a particular forecast of the wage price index to uprate the salary column in the sample file. This differs from the assumptions in the 2017 Budget.

Budget_wage_series <-
  data.table(fy_year = c("2017-18", "2018-19", "2019-20", "2020-21"),
             r = c(0.025, 0.03, 0.035, 0.0375))

kable(Budget_wage_series)
fy_year r
2017-18 0.0250
2018-19 0.0300
2019-20 0.0350
2020-21 0.0375

The project function expects wages to grow by 7.69% over the period 2016-17 to 2020-21, whereas by the forecast in the Budget this number would be 13.4%. Whatever the merits of each forecast, the project function allows you to specify a particular wage series in the future through the argument wage.series =. We can then repeat the analysis above using those estimates:

sample_file_1617 <- project(sample_file_1213,
                            h = 4L,
                            fy.year.of.sample.file = "2012-13")

sample_file_2021 <- project(sample_file_1213,
                            fy.year.of.sample.file = "2012-13",
                            h = 8L,
                            wage.series = Budget_wage_series)

sample_file_1617[, tax_paid := income_tax(Taxable_Income,
                                          .dots.ATO = copy(sample_file_1617),
                                          fy.year = "2016-17")]
sample_file_1617[, avg_tax := tax_paid / Taxable_Income]
sample_file_2021[, tax_paid := income_tax(Taxable_Income,
                                          .dots.ATO = copy(sample_file_2021),
                                          fy.year = "2019-20")]
sample_file_2021[, avg_tax := tax_paid / Taxable_Income]

avg_tax_by_decile_1617 <- 
  sample_file_1617 %>%
  .[, .(avg_tax = mean(avg_tax)),
    keyby = .(decile = weighted_ntile(Taxable_Income, n = 10))]

avg_tax_by_decile_2021 <- 
  sample_file_2021 %>%
  .[, .(avg_tax = mean(avg_tax)),
    keyby = .(decile = weighted_ntile(Taxable_Income, n = 10))]

difference_2021_Budget <-
  avg_tax_by_decile_1617[avg_tax_by_decile_2021] %>%
  .[decile > 1] %>%
  .[, ppt_increase := 100*(i.avg_tax - avg_tax)]

difference_2021_Budget %>%
  copy %>%
  .[, decile := factor(decile)] %>%
  ggplot(aes(x = decile, y = ppt_increase)) + 
  geom_col()

Through some intermediate calculations, we can obtain the sentence that was used in the op-ed:

middle_income_avg_inc <-
  difference_2021_Budget %>%
  .[decile %between% c(3, 7)] %$%
  range(round(ppt_increase, 1))
sample_file_1617[, percentile := weighted_ntile(Taxable_Income, n = 100)]
stopifnot(56 %in% sample_file_1617[Taxable_Income %between% c(49500, 50500)][["percentile"]])

avg_tax_rate_2017_50k <- 
  sample_file_1617[percentile == 56] %$% 
  mean(avg_tax) %>%
  round(3)

avg_tax_rate_2021_50k <- 
  sample_file_2021 %>%
  .[, percentile := weighted_ntile(Taxable_Income, n = 100)] %>%
  .[percentile == 56] %$% 
  mean(avg_tax) %>%
  round(3)

Middle-income earners are particularly hurt by bracket creep. Based on the wages growth projected in the 2016 budget, the average tax rates for people in middle-income groups will increase by between 1.6 and 2.5 percentage points by 2021. For example, a person earning $50,000 a year will go from paying an average tax rate of 17.2 per cent in 2017 to 19.1 per cent in 2021.

Income tax with changes to rates

We can also use the package to estimate the revenue difference under changes to the marginal tax rates. The following function accepts two arguments: the bracket number to modified, and a rate increase to that bracket. The function returns the change in revenue (as a loss).

tax_delta <- function(bracket_number, rate_increase = -0.01) {
  current_tax <-
    sample_file_2021[, .(tax = sum(tax_paid), 
                         WEIGHT = WEIGHT[1])] %$% 
    sum(tax * WEIGHT)
  
  orig_rates <- c(0, 0.19, 0.325, 0.37, 0.45)
  new_rates <- orig_rates
  new_rates[bracket_number] <- new_rates[bracket_number] + rate_increase
    
  # rebate_income is an internal function
  .ri <- grattan:::rebate_income
  
  new_tax <- 
    sample_file_2021 %>%
    copy %>%
    .[, base_tax. := IncomeTax(Taxable_Income,
                              thresholds = c(0, 18200, 37000, 87000, 180e3),
                              rates = new_rates)] %>%
    .[, medicare_levy. := medicare_levy(income = Taxable_Income, fy.year = "2019-20",
                                       Spouse_income = Spouse_adjusted_taxable_inc,
                                       sapto.eligible = (age_range <= 1),
                                       family_status = if_else(Spouse_adjusted_taxable_inc > 0, "family", "individual"))] %>%
    .[, lito. := lito(Taxable_Income, max_lito = 445, lito_taper = 0.015, min_bracket = 37000)] %>%
    .[, rebate_income := .ri(Taxable_Income,
                             Rptbl_Empr_spr_cont_amt = Rptbl_Empr_spr_cont_amt,
                             Net_fincl_invstmt_lss_amt = Net_fincl_invstmt_lss_amt,
                             Net_rent_amt = Net_rent_amt,
                             Rep_frng_ben_amt = Rep_frng_ben_amt)] %>%
    .[, sapto. := sapto(rebate_income, fy.year = "2019-20", sapto.eligible = (age_range <= 1))] %>%
    .[, tax_payable := pmaxC(base_tax. - lito. - sapto., 0) + medicare_levy.] %>%
    .[, .(tax = sum(tax_payable), 
          WEIGHT = WEIGHT[1])] %$% 
    sum(tax * WEIGHT)
  
  current_tax - new_tax
}

which leads to the sentence and table in the op-ed:

For example, if the government was to reduce the tax rate only in the middle (37,000-87,000) bracket from 32.5% to 30%, the promised $7.8 billion surplus in 2021 would all but be swallowed up by the 2.4 bn revenue loss.

data.table(tax_bracket = c("<18,200",
                           "18,200-37,000",
                           "37,000-87,000",
                           "87,000-180,000",
                           "180,000+"),
           budget_impact = c(NA, round(vapply(2:5, tax_delta, FUN.VALUE = double(1)) / 1e9, 2))) %>%
  kable
tax_bracket budget_impact
<18,200 NA
18,200-37,000 -3.11
37,000-87,000 -2.10
87,000-180,000 -3.52
180,000+ -3.83