mgwrsar package estimates linear and local linear models with spatial autocorrelation of the following forms:
\[y=\beta_v(u_i,v_i) X_v+\,\epsilon_i\;\;\;\;\;\;\;\; (GWR)\]
\[y=\beta_c X_c+\beta_v(u_i,v_i) X_v+\,\epsilon_i\;\;\;\;\;\;\;\; (MGWR)\]
\[y=\lambda Wy+\beta_c X_c+\,\epsilon_i\;\;\;\;\;\;\;\; (MGWR-SAR(0,k,0))\]
\[y=\lambda Wy+\beta_v(u_i,v_i)X_v+\,\epsilon_i\;\;\;\;\;\;\;\; (MGWR-SAR(0,0,k))\]
\[y=\lambda Wy+\beta_c X_c+\beta_v(u_i,v_i)X_v+\,\epsilon_i\;\;\;\;\;\;\;\; (MGWR-SAR(0,k_c,k_v))\]
\[y=\lambda(u_i,v_i) Wy+\beta_c X_c+\,\epsilon_i\;\;\;\;\;\;\;\; (MGWR-SAR(1,k,0))\]
\[y=\lambda(u_i,v_i)Wy+\beta_v(u_i,v_i)X_v+\,\epsilon_i\;\;\;\;\;\;\;\; (MGWR-SAR(1,0,k))\]
\[y=\lambda(u_i,v_i)Wy+\beta_cX_c+\beta_v(u_i,v_i)X_v+\,\epsilon_i\;\;\;\;\;\;\;\; (MGWR-SAR(1,k_c,k_v))\]
For more details on the estimation method, see Geniaux, G. and Martinetti, D. (2017). A new method for dealing simultaneously with spatial autocorrelation and spatial heterogeneity in regression models. Regional Science and Urban Economics. (https://doi.org/10.1016/j.regsciurbeco.2017.04.001)
The estimation of the previous model can be done using the MGWRSAR wrapper function which requires a dataframe and a matrix of coordinates. Note that:
In addition to the ability of considering spatial autocorrelation in GWR/MGWR like models, MGWRSAR function introduces several useful technics for estimating local regression with space coordinates:
The MGWRSAR function requires a dataframe and a matrix of coordinates, and eventually a spatial weight matrix if model include spatiale dependence. The package includes a simulated data example which is used for this vignette.
library(mgwrsar)
## loading data example
data(mydata)
=as.matrix(mydata[,c("x_lat","y_lon")])
coord## Creating a spatial weight matrix (sparce dgCMatrix) of 8 nearest neighbors with 0 in diagonal
=kernel_matW(H=4,kernels='rectangle',coord_i=coord,NN=4,adaptive=TRUE,diagnull=TRUE,rowNorm=T) W
Estimation of a GWR with a gauss kernel without parallel computation:
### without parallel computing with distance computation for all points
<-proc.time()
ptm1<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=0.13, Model = 'GWR',control=list(SE=T))
model_GWR0proc.time()-ptm1)[3]
(#> elapsed
#> 0.423
summary_mgwrsar(model_GWR0)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord,
#> fixed_vars = NULL, kernels = c("gauss"), H = 0.13, Model = "GWR",
#> control = list(SE = T))
#> Model: GWR
#> Kernels function: gauss
#> Kernels adaptive: NO
#> Kernels type: GD
#> Bandwidth: 0.13
#> Computation time: 0.418
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO
#> Use of Target Points: NO
#> Number of data points: 1000
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3
#> Min. -2.63740 0.16880 -0.37306 -1.7588
#> 1st Qu. -1.46100 0.38590 1.52729 -1.3070
#> Median -1.08812 0.51409 1.94806 -1.0194
#> Mean -0.97597 0.48879 1.94778 -1.0289
#> 3rd Qu. -0.62307 0.59454 2.58871 -0.7439
#> Max. 1.06100 0.79570 3.37755 -0.3083
#> Effective degrees of freedom: 955.0674
#> AIC 135.0056
#> Residual sum of squares: 64.0684
#> RMSE: 0.2531174
### with parallel computing with distance computation for all points
<-proc.time()
ptm1<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=0.03, Model = 'GWR',control=list(SE=T,ncore=2,doMC=TRUE))
model_GWR0_parallelproc.time()-ptm1)[3]
(#> elapsed
#> 0.539
# W<-spdep::mat2listw(W)
#
# gp2mM <- lagsarlm(Y_gwr ~ X1+X2+X3,data=mydata, lW, method="Matrix")
#
# mdo<-s2sls(Y_gwr ~ X1+X2+X3,data=mydata,Produc,W)
#
# mm<-lagmess(Y_gwr ~ X1+X2+X3,data=mydata, lW)
## how to speed up computation using rough kernels (0 weights for nearest neighbors > 300th position)
<-proc.time()
ptm1<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=0.03, Model = 'GWR',control=list(SE=TRUE,NN=300))
model_GWRproc.time()-ptm1)[3]
(#> elapsed
#> 0.245
summary_mgwrsar(model_GWR)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord,
#> fixed_vars = NULL, kernels = c("gauss"), H = 0.03, Model = "GWR",
#> control = list(SE = TRUE, NN = 300))
#> Model: GWR
#> Kernels function: gauss
#> Kernels adaptive: NO
#> Kernels type: GD
#> Bandwidth: 0.03
#> Computation time: 0.244
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: YES, 300 neighbors / 1000
#> Use of Target Points: NO
#> Number of data points: 1000
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3
#> Min. -3.793217 0.016931 -0.765588 -2.0329
#> 1st Qu. -1.480176 0.351177 1.095174 -1.2921
#> Median -0.835614 0.542020 1.631293 -1.0081
#> Mean -0.830668 0.500057 1.674643 -1.0011
#> 3rd Qu. -0.236283 0.637046 2.453404 -0.7241
#> Max. 2.102565 0.942520 4.035453 0.0457
#> Effective degrees of freedom: 572.4888
#> AIC -3055.735
#> Residual sum of squares: 1.797923
#> RMSE: 0.04240192
## how to speed up computation using rough kernels ( 0 weights for nearest neighbors > 300th position)
=find_TP(formula = 'Y_gwr~X1+X2+X3', data =mydata,coord=coord,K=10,type='residuals')
TP# only 90 targets points are used
length(TP)
#> [1] 31
## how to speed up computation using Target points
<-proc.time()
ptm1<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=0.03, Model = 'GWR',control=list(SE=TRUE,TP=TP))
model_GWRproc.time()-ptm1)[3]
(#> elapsed
#> 0.036
## how to speed up computation using rough kernels ( 0 weights for nearest neighbors > 300th position) and Target points
<-proc.time()
ptm1<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=0.1, Model = 'GWR',control=list(SE=TRUE,NN=300,TP=TP))
model_GWRproc.time()-ptm1)[3]
(#> elapsed
#> 0.023
## how to speed up computation using adapative kernels
<-proc.time()
ptm1<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('bisq'),H=20, Model = 'GWR',control=list(SE=TRUE,adaptive=TRUE))
model_GWRproc.time()-ptm1)[3]
(#> elapsed
#> 0.166
## how to speed up computation using adapative kernels and Target points
<-proc.time()
ptm1<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('bisq'),H=20, Model = 'GWR',control=list(SE=TRUE,adaptive=TRUE,TP=TP))
model_GWRproc.time()-ptm1)[3]
(#> elapsed
#> 0.02
library(microbenchmark)
=microbenchmark(MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=0.1, Model = 'GWR',control=list(SE=TRUE)),MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=0.1, Model = 'GWR',control=list(SE=TRUE,NN=300,TP=TP)),times=2)
res#> Warning in microbenchmark(MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, :
#> less accurate nanosecond times to avoid potential integer overflows
res#> Unit: milliseconds
#> expr
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord, fixed_vars = NULL, kernels = c("gauss"), H = 0.1, Model = "GWR", control = list(SE = TRUE))
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord, fixed_vars = NULL, kernels = c("gauss"), H = 0.1, Model = "GWR", control = list(SE = TRUE, NN = 300, TP = TP))
#> min lq mean median uq max neval cld
#> 405.26647 405.26647 527.16998 527.16998 649.07350 649.07350 2 a
#> 22.69682 22.69682 22.72083 22.72083 22.74483 22.74483 2 a
=microbenchmark(MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=0.1, Model = 'GWR',control=list(SE=TRUE)),MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('bisq'),H=20, Model = 'GWR',control=list(SE=TRUE,adaptive=T,TP=TP)),times=2)
res
res#> Unit: milliseconds
#> expr
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord, fixed_vars = NULL, kernels = c("gauss"), H = 0.1, Model = "GWR", control = list(SE = TRUE))
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord, fixed_vars = NULL, kernels = c("bisq"), H = 20, Model = "GWR", control = list(SE = TRUE, adaptive = T, TP = TP))
#> min lq mean median uq max neval cld
#> 399.78526 399.78526 520.51419 520.51419 641.24312 641.24312 2 a
#> 18.55939 18.55939 18.59155 18.59155 18.62372 18.62372 2 a
Summary and plot of mgwrsar object using leaflet:
summary_mgwrsar(model_GWR0)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord,
#> fixed_vars = NULL, kernels = c("gauss"), H = 0.13, Model = "GWR",
#> control = list(SE = T))
#> Model: GWR
#> Kernels function: gauss
#> Kernels adaptive: NO
#> Kernels type: GD
#> Bandwidth: 0.13
#> Computation time: 0.418
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO
#> Use of Target Points: NO
#> Number of data points: 1000
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3
#> Min. -2.63740 0.16880 -0.37306 -1.7588
#> 1st Qu. -1.46100 0.38590 1.52729 -1.3070
#> Median -1.08812 0.51409 1.94806 -1.0194
#> Mean -0.97597 0.48879 1.94778 -1.0289
#> 3rd Qu. -0.62307 0.59454 2.58871 -0.7439
#> Max. 1.06100 0.79570 3.37755 -0.3083
#> Effective degrees of freedom: 955.0674
#> AIC 135.0056
#> Residual sum of squares: 64.0684
#> RMSE: 0.2531174
plot_mgwrsar(model_GWR0,type='B_coef',var='X2')
Plot of the effects of spatially varying coefficients:
plot_effect(model_GWR0,title='Effects')
Estimation of a GWR with spgwr package:
library(spgwr)
=mydata
mydataSPcoordinates(mydataSP)=c('x_lat','y_lon')
<-proc.time()
ptm1<- gwr(Y_gwr~X1+X2+X3, data=mydataSP, bandwidth=0.13,hatmatrix=TRUE)
model_spgwr proc.time()-ptm1)[3]
(
head(model_spgwr$SDF@data$gwr.e)
model_spgwr
all(abs(model_GWR0$residuals-model_spgwr$SDF@data$gwr.e)<0.00000000001)
[1] -0.1585824 -0.1861144 -0.4450174 -0.1767796 -0.2014330
[6] 0.2670349
Call:
gwr(formula = Y_gwr ~ X1 + X2 + X3, data = mydataSP, bandwidth = 0.13,
hatmatrix = TRUE)
Kernel function: gwr.Gauss
Fixed bandwidth: 0.13
Summary of GWR coefficient estimates at data points:
Min. 1st Qu. Median 3rd Qu. Max. Global
X.Intercept. -2.63740 -1.46100 -1.08812 -0.62307 1.06100 -1.4845
X1 0.16880 0.38590 0.51409 0.59454 0.79570 0.5000
X2 -0.37306 1.52729 1.94806 2.58871 3.37755 2.5481
X3 -1.75876 -1.30704 -1.01941 -0.74386 -0.30828 -1.0762
Number of data points: 1000
Effective number of parameters (residual: 2traceS - traceS'S): 62.85371
Effective degrees of freedom (residual: 2traceS - traceS'S): 937.1463
Sigma (residual: 2traceS - traceS'S): 0.2614678
Effective number of parameters (model: traceS): 44.93259
Effective degrees of freedom (model: traceS): 955.0674
Sigma (model: traceS): 0.2590031
Sigma (ML): 0.2531174
AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 186.462
AIC (GWR p. 96, eq. 4.22): 135.0056
Residual sum of squares: 64.0684
Quasi-global R2: 0.9813492
[1] TRUE
Estimation of a GWR with leave-one out CV, using an adaptive bisquare kernel (based on 20 nearest neighbors) and a 1% local Outlier Detection/Removal procedure:
<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('bisq'),H=20, Model = 'GWR',control=list(isgcv=TRUE,adaptive=TRUE,remove_local_outlier=TRUE,outv=0.01))
model_GWR_loo_no_outliersummary_mgwrsar(model_GWR_loo_no_outlier)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord,
#> fixed_vars = NULL, kernels = c("bisq"), H = 20, Model = "GWR",
#> control = list(isgcv = TRUE, adaptive = TRUE, remove_local_outlier = TRUE,
#> outv = 0.01))
#> Model: GWR
#> Kernels function: bisq
#> Kernels adaptive: YES
#> Kernels type: GD
#> Bandwidth: 20
#> Computation time: 0.472
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO
#> Use of Target Points: NO
#> Number of data points: 1000
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3
#> Min. -3.987250 0.014183 -1.178085 -2.1323
#> 1st Qu. -1.448621 0.344246 1.043053 -1.2999
#> Median -0.771106 0.537214 1.667348 -1.0234
#> Mean -0.795571 0.497221 1.664745 -1.0090
#> 3rd Qu. -0.178540 0.640381 2.456413 -0.7320
#> Max. 2.245736 0.910930 4.000195 -0.0957
#> Residual sum of squares: 16.30561
#> RMSE: 0.1276934
### leave-one out CV estimation
<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=20, Model = 'GWR',control=list(isgcv=TRUE,adaptive=T))
model_GWR_loosummary_mgwrsar(model_GWR_loo)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord,
#> fixed_vars = NULL, kernels = c("gauss"), H = 20, Model = "GWR",
#> control = list(isgcv = TRUE, adaptive = T))
#> Model: GWR
#> Kernels function: gauss
#> Kernels adaptive: YES
#> Kernels type: GD
#> Bandwidth: 20
#> Computation time: 0.44
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO
#> Use of Target Points: NO
#> Number of data points: 1000
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3
#> Min. -2.93954 0.11195 -0.71370 -1.7989
#> 1st Qu. -1.45727 0.36087 1.27737 -1.2853
#> Median -0.93246 0.53507 1.79654 -1.0009
#> Mean -0.84962 0.49566 1.74331 -1.0134
#> 3rd Qu. -0.39856 0.61670 2.52555 -0.7331
#> Max. 1.43824 0.84881 3.50148 -0.2385
#> Residual sum of squares: 24.73994
#> RMSE: 0.1572894
Estimation of a MGWR with stationary intercept using an adapative gaussian kernel (based on 20 nearest neighbors):
<-MGWRSAR(formula = 'Y_mgwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars='X3',kernels=c('gauss'),H=20, Model = 'MGWR',control=list(SE=T,adaptive=T))
model_MGWRsummary_mgwrsar(model_MGWR)
#> Call:
#> MGWRSAR(formula = "Y_mgwr~X1+X2+X3", data = mydata, coord = coord,
#> fixed_vars = "X3", kernels = c("gauss"), H = 20, Model = "MGWR",
#> control = list(SE = T, adaptive = T))
#> Model: MGWR
#> Kernels function: gauss
#> Kernels adaptive: YES
#> Kernels type: GD
#> Bandwidth: 20
#> Computation time: 0.533
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO
#> Use of Target Points: NO
#> Number of data points: 1000
#> Constant parameters: X3
#> -1.00471
#> Varying parameters: Intercept X1 X2
#> Intercept X1 X2
#> Min. -2.81276 0.11756 -0.4245
#> 1st Qu. -1.52541 0.36076 1.3433
#> Median -0.99545 0.53148 1.7245
#> Mean -0.86301 0.49545 1.7449
#> 3rd Qu. -0.38685 0.61695 2.4882
#> Max. 1.22304 0.84347 3.3954
#> Effective degrees of freedom: 932.5146
#> AIC -1067.641
#> Residual sum of squares: 18.81683
#> RMSE: 0.1371744
Estimation of a mgwrsar(0,kc,kv) model with stationary intercept and stationnary spatial multiplier using an adapative gaussian kernel (based on 20 nearest neighbors):
<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars='X2',kernels=c('gauss'),H=20, Model = 'MGWRSAR_0_kc_kv',control=list(SE=F,adaptive=T,W=W))
mgwrsar_0_kc_kvsummary_mgwrsar(mgwrsar_0_kc_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_0_kc_kv~X1+X2+X3", data = mydata,
#> coord = coord, fixed_vars = "X2", kernels = c("gauss"), H = 20,
#> Model = "MGWRSAR_0_kc_kv", control = list(SE = F, adaptive = T,
#> W = W))
#> Model: MGWRSAR_0_kc_kv
#> Method for spatial autocorrelation: 2SLS
#> Kernels function: gauss
#> Kernels adaptive: YES
#> Kernels type: GD
#> Bandwidth: 20
#> Computation time: 0.584
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO
#> Use of Target Points: NO
#> Number of data points: 1000
#> Constant parameters: X2 lambda
#> 0.870593 0.4120387
#> Varying parameters: Intercept X1 X3
#> Intercept X1 X3
#> Min. -1.254418 0.029588 -1.2702
#> 1st Qu. -0.444425 0.366633 -1.0810
#> Median -0.033372 0.540500 -1.0026
#> Mean 0.065257 0.498703 -1.0016
#> 3rd Qu. 0.445573 0.627798 -0.9243
#> Max. 2.099437 0.873585 -0.6744
#> Residual sum of squares: 137.9687
#> RMSE: 0.3714414
Estimation of a mgwrsar(0,0,kv) model with stationnary spatial multiplier using an adapative gaussian kernel (based on 20 nearest neighbors):
<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=20, Model = 'MGWRSAR_0_0_kv',control=list(SE=F,adaptive=T,W=W))
mgwrsar_0_0_kvsummary_mgwrsar(mgwrsar_0_0_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_0_kc_kv~X1+X2+X3", data = mydata,
#> coord = coord, fixed_vars = NULL, kernels = c("gauss"), H = 20,
#> Model = "MGWRSAR_0_0_kv", control = list(SE = F, adaptive = T,
#> W = W))
#> Model: MGWRSAR_0_0_kv
#> Method for spatial autocorrelation: 2SLS
#> Kernels function: gauss
#> Kernels adaptive: YES
#> Kernels type: GD
#> Bandwidth: 20
#> Computation time: 0.482
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO
#> Use of Target Points: NO
#> Number of data points: 1000
#> Constant parameters: lambda
#> 0.3676871
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3
#> Min. -3.763567 0.080428 -2.209824 -1.2308
#> 1st Qu. -1.219268 0.357217 0.706989 -1.0647
#> Median -0.618269 0.536381 1.425772 -0.9997
#> Mean -0.513883 0.499074 1.398492 -1.0014
#> 3rd Qu. -0.017472 0.627690 2.638912 -0.9437
#> Max. 2.108716 0.889871 4.369973 -0.6752
#> Residual sum of squares: 151.7064
#> RMSE: 0.3894951
Estimation of a mgwrsar(1,0,kv) model with all parameter spatially varying using an adapative gaussian kernel (based on 20 nearest neighbors):
<-MGWRSAR(formula = 'Y_mgwrsar_1_0_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=20, Model = 'MGWRSAR_1_0_kv',control=list(SE=F,adaptive=T,W=W))
mgwrsar_1_0_kvsummary_mgwrsar(mgwrsar_1_0_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_0_kv~X1+X2+X3", data = mydata,
#> coord = coord, fixed_vars = NULL, kernels = c("gauss"), H = 20,
#> Model = "MGWRSAR_1_0_kv", control = list(SE = F, adaptive = T,
#> W = W))
#> Model: MGWRSAR_1_0_kv
#> Method for spatial autocorrelation: 2SLS
#> Kernels function: gauss
#> Kernels adaptive: YES
#> Kernels type: GD
#> Bandwidth: 20
#> Computation time: 0.446
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO
#> Use of Target Points: NO
#> Number of data points: 1000
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3 lambda
#> Min. -5.55249 0.11207 -1.20584 -1.86027 -0.2375
#> 1st Qu. -1.47283 0.36956 0.64156 -1.24644 0.1831
#> Median -0.75406 0.54369 1.72877 -0.99875 0.3845
#> Mean -0.79631 0.50721 1.75803 -1.00349 0.3568
#> 3rd Qu. -0.15439 0.63505 2.86983 -0.71468 0.5382
#> Max. 2.06110 0.95748 5.14466 -0.27205 0.8808
#> Residual sum of squares: 868.1985
#> RMSE: 0.9317717
Estimation of a mgwrsar(1,kc,kv) model with stationary X1 using an adapative gaussian kernel (based on 20 nearest neighbors):
<-MGWRSAR(formula = 'Y_mgwrsar_1_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars='X2',kernels=c('gauss'),H=20, Model = 'MGWRSAR_1_kc_kv',control=list(SE=F,adaptive=T,W=W))
mgwrsar_1_kc_kvsummary_mgwrsar(mgwrsar_1_kc_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_kc_kv~X1+X2+X3", data = mydata,
#> coord = coord, fixed_vars = "X2", kernels = c("gauss"), H = 20,
#> Model = "MGWRSAR_1_kc_kv", control = list(SE = F, adaptive = T,
#> W = W))
#> Model: MGWRSAR_1_kc_kv
#> Method for spatial autocorrelation: 2SLS
#> Kernels function: gauss
#> Kernels adaptive: YES
#> Kernels type: GD
#> Bandwidth: 20
#> Computation time: 1.441
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO
#> Use of Target Points: NO
#> Number of data points: 1000
#> Constant parameters: X2
#> 2.459628
#> Varying parameters: Intercept X1 X3
#> Intercept X1 X3 lambda
#> Min. -2.785597 0.064134 -1.373242 -0.6412
#> 1st Qu. -0.783444 0.371202 -1.092831 0.0467
#> Median -0.454995 0.534294 -1.006845 0.3538
#> Mean -0.435127 0.501815 -1.009762 0.3553
#> 3rd Qu. -0.111683 0.633466 -0.933401 0.8027
#> Max. 2.392965 0.910200 -0.617579 0.9900
#> Residual sum of squares: 6519.053
#> RMSE: 2.553244
Estimation of a mgwrsar(1,kc,0) model with stationary X1 using an adapative gaussian kernel (based on 20 nearest neighbors):
<-MGWRSAR(formula = 'Y_mgwrsar_1_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept','X1','X2','X3'),kernels=c('gauss'),H=20, Model = 'MGWRSAR_1_kc_0',control=list(SE=F,adaptive=T,W=W))
mgwrsar_1_kc_0summary_mgwrsar(mgwrsar_1_kc_0)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_kc_kv~X1+X2+X3", data = mydata,
#> coord = coord, fixed_vars = c("Intercept", "X1", "X2", "X3"),
#> kernels = c("gauss"), H = 20, Model = "MGWRSAR_1_kc_0", control = list(SE = F,
#> adaptive = T, W = W))
#> Model: MGWRSAR_1_kc_0
#> Method for spatial autocorrelation: 2SLS
#> Kernels function: gauss
#> Kernels adaptive: YES
#> Kernels type: GD
#> Bandwidth: 20
#> Computation time: 1.057
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO
#> Use of Target Points: NO
#> Number of data points: 1000
#> Constant parameters: Intercept X1 X2 X3
#> -0.8205521 0.509378 1.69077 -1.035463
#> Varying parameters:
#> lambda
#> Min. -0.9137
#> 1st Qu. 0.2291
#> Median 0.6009
#> Mean 0.4822
#> 3rd Qu. 0.9900
#> Max. 0.9995
#> Residual sum of squares: 2736.795
#> RMSE: 1.654326
In this example, we use an in-sample of size 800 for model estimation and an out-sample of size 200 for prediction. Note that for model with spatial autocorrelation you must provide a global weight matrix of size 1000, ordered by in-sample then out-sample locations.
For GWR and MGWR_1_0_kv, where all coefficients vary in space, the predictions are carried out using the corresponding model estimate with the out-sample location as target points, so the estimated coefficients are not used: only the call and the data of the estimated model are used. For mixed model that have some constant coefficients (MGWR, MGXWR_0_0_kv, MGXWR_1_kc_kv, MGXWR_1_kc_0), the estimated coefficients are extrapolated using a weighting matrix. By default, the weighting matrix for prediction is based on the expected weights of outsample data if they were had been added to insample data to estimate the corresponding MGWRSAR (see Geniaux 2022 for further detail). The user can also choose to extrapolate the model coefficients using a shepperd kernel with custom number of neighbours or using the same kernel and bandwidth as the estimated model.
Prediction of GWR model using sheppard kernel with 8 neighbors:
=800
length_out=sample(1:1000,length_out)
index_in=(1:1000)[-index_in]
index_out
<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata[index_in,],coord=coord[index_in,], fixed_vars=NULL,kernels=c('gauss'),H=8, Model = 'GWR',control=list(adaptive=T))
model_GWR_insamplesummary_mgwrsar(model_GWR_insample)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata[index_in, ],
#> coord = coord[index_in, ], fixed_vars = NULL, kernels = c("gauss"),
#> H = 8, Model = "GWR", control = list(adaptive = T))
#> Model: GWR
#> Kernels function: gauss
#> Kernels adaptive: YES
#> Kernels type: GD
#> Bandwidth: 8
#> Computation time: 0.36
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO
#> Use of Target Points: NO
#> Number of data points: 800
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3
#> Min. -3.431046 0.057831 -0.613649 -1.9259
#> 1st Qu. -1.481228 0.347417 1.190307 -1.2686
#> Median -0.888871 0.545087 1.748743 -1.0012
#> Mean -0.822152 0.499287 1.704531 -1.0095
#> 3rd Qu. -0.272541 0.628847 2.489687 -0.7335
#> Max. 1.585348 0.897832 3.579135 -0.1883
#> Residual sum of squares: 5.988363
#> RMSE: 0.08651852
=mydata[index_out,]
newdata=coord[index_out,]
newdata_coord$Y_mgwrsar_1_0_kv=0
newdata
=predict_mgwrsar(model_GWR_insample, newdata=newdata, newdata_coord=newdata_coord)
Y_predhead(Y_pred)
#> [1] 0.4664548 2.3094268 4.5104714 1.8733030 3.9355001 2.9333140
head(mydata$Y_gwr[index_out])
#> [1] 0.3224649 2.0656554 4.5648040 1.9159209 3.8348415 2.9941564
sqrt(mean((mydata$Y_gwr[index_out]-Y_pred)^2)) # RMSE
#> [1] 0.13121
## to get prediction without computing the initial model
=as.numeric((MGWRSAR( 'Y_gwr~X1+X2+X3', data = mydata[index_in,],coord=coord[index_in,], fixed_vars=NULL,kernels=c('gauss'),H=8, Model = 'GWR',control=list(adaptive=T, S_out = coord[index_out,], new_data = mydata[index_out,])))$pred)
Y_pred2
<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata[index_in,],coord=coord[index_in,], fixed_vars='X3',kernels=c('gauss'),H=8, Model = 'MGWR',control=list(adaptive=T))
model_MGWR_insamplesummary_mgwrsar(model_MGWR_insample)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata[index_in, ],
#> coord = coord[index_in, ], fixed_vars = "X3", kernels = c("gauss"),
#> H = 8, Model = "MGWR", control = list(adaptive = T))
#> Model: MGWR
#> Kernels function: gauss
#> Kernels adaptive: YES
#> Kernels type: GD
#> Bandwidth: 8
#> Computation time: 0.321
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO
#> Use of Target Points: NO
#> Number of data points: 800
#> Constant parameters: X3
#> -1.006481
#> Varying parameters: Intercept X1 X2
#> Intercept X1 X2
#> Min. -3.936929 0.060461 -0.6137
#> 1st Qu. -1.498654 0.346186 1.0902
#> Median -0.948519 0.541705 1.7289
#> Mean -0.855135 0.498004 1.7338
#> 3rd Qu. -0.235306 0.644812 2.6282
#> Max. 1.640384 0.897495 3.7508
#> Residual sum of squares: 36.93015
#> RMSE: 0.214855
=mydata[index_out,]
newdata=coord[index_out,]
newdata_coord$Y_mgwrsar_1_0_kv=0
newdata
## Prediction with method_pred='tWtp'
=predict_mgwrsar(model_MGWR_insample, newdata=newdata, newdata_coord=newdata_coord)
Y_pred#> Warnings: method_pred=='TP' is not implemented for Model in ('MGWR','MGWRSAR_1_kc_0','MGWRSAR_1_kc_kv','MGWRSAR_0_kc_kv','MGWRSAR_0_0_kv'): automatic swith to method_pred='tWtp_model'
head(Y_pred)
#> [1] 0.4585569 2.7000676 4.7308344 1.7981054 4.0253231 3.0013471
head(mydata$Y_gwr[index_out])
#> [1] 0.3224649 2.0656554 4.5648040 1.9159209 3.8348415 2.9941564
sqrt(mean((mydata$Y_gwr[index_out]-Y_pred)^2)) # RMSE
#> [1] 0.2826883
Prediction of MGWRSAR_1_0_kv model using sheppard kernel with 4 neighbors and Best Linear Unbiased Predictor (BLUP) :
=800
length_out=sample(1:1000,length_out)
index_in=(1:1000)[-index_in]
index_out
### Global Spatial Weight matrix W should be ordered by in_sample (S) then out_sample
=kernel_matW(H=4,kernels='rectangle',coord_i=rbind(coord[index_in,],coord[index_out,]),NN=4,adaptive=TRUE,diagnull=TRUE,rowNorm=T)
W_in_out=W_in_out[1:length(index_in),1:length(index_in)]
W_in=mgwrsar::normW(W_in)
W_in=mydata[index_out,]
newdata=coord[index_out,]
newdata_coord
### SAR Model
<-MGWRSAR(formula = 'Y_sar~X1+X2+X3', data = mydata[index_in,],coord=coord[index_in,], fixed_vars=NULL,kernels=NULL,H=11, Model = 'SAR',control=list(W=W_in))
model_SAR_insample$RMSE
model_SAR_insample#> [1] 0.1303956
summary_mgwrsar(model_SAR_insample)
#> Call:
#> MGWRSAR(formula = "Y_sar~X1+X2+X3", data = mydata[index_in, ],
#> coord = coord[index_in, ], fixed_vars = NULL, kernels = NULL,
#> H = 11, Model = "SAR", control = list(W = W_in))
#> Model: SAR
#> Method for spatial autocorrelation: 2SLS
#> Kernels function:
#> Kernels adaptive: NO
#> Kernels type: GD
#> Bandwidth: 11
#> Computation time: 0.002
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel:
#> Use of Target Points: NO
#> Number of data points: 800
#> Constant parameters: Intercept X1 X2 X3 lambda
#> 0.1515706 0.5040549 1.153973 -1.012715 0.3147484
#> Effective degrees of freedom: 795
#> AIC
#> Residual sum of squares: 13.6024
#> RMSE: 0.1303956
## without Best Linear Unbiased Predictor
# prediction YTC
=predict_mgwrsar(model_SAR_insample, newdata=newdata, newdata_coord=newdata_coord,W=W_in_out,type='YTC')
Y_predhead(Y_pred)
#> [1] 2.027912 3.731384 5.910859 2.949735 4.971490 5.277246
=sqrt(mean((mydata$Y_sar[index_out]-Y_pred)^2))
RMSE_YTC
RMSE_YTC#> [1] 0.08441952
## Using Best Linear Unbiased Predictor
=predict_mgwrsar(model_SAR_insample, newdata=newdata, newdata_coord=newdata_coord,W=W_in_out,type='BPN')
Y_predhead(Y_pred)
#> [1] 2.015560 3.680499 5.923186 2.923057 4.940062 5.313581
=sqrt(mean((mydata$Y_sar[index_out]-Y_pred)^2))
RMSE_BPN
RMSE_BPN#> [1] 0.06039921
#### MGWRSAR_1_0_kv
<-MGWRSAR(formula = 'Y_mgwrsar_1_0_kv~X1+X2+X3', data = mydata[index_in,],coord=coord[index_in,], fixed_vars=NULL,kernels=c('gauss'),H=11, Model = 'MGWRSAR_1_0_kv',control=list(W=W_in,adaptive=TRUE,isgcv=F))
model_MGWRSAR_1_0_kv_insample$RMSE
model_MGWRSAR_1_0_kv_insample#> [1] 0.708789
summary_mgwrsar(model_MGWRSAR_1_0_kv_insample)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_0_kv~X1+X2+X3", data = mydata[index_in,
#> ], coord = coord[index_in, ], fixed_vars = NULL, kernels = c("gauss"),
#> H = 11, Model = "MGWRSAR_1_0_kv", control = list(W = W_in,
#> adaptive = TRUE, isgcv = F))
#> Model: MGWRSAR_1_0_kv
#> Method for spatial autocorrelation: 2SLS
#> Kernels function: gauss
#> Kernels adaptive: YES
#> Kernels type: GD
#> Bandwidth: 11
#> Computation time: 0.223
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO
#> Use of Target Points: NO
#> Number of data points: 800
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3 lambda
#> Min. -12.479607 0.077377 -1.241909 -1.984773 -0.4086
#> 1st Qu. -1.793509 0.356913 0.973790 -1.258653 0.1056
#> Median -0.874141 0.547879 1.882220 -1.014936 0.2745
#> Mean -1.075548 0.515188 2.245951 -0.997859 0.2801
#> 3rd Qu. -0.145199 0.658945 3.482105 -0.688769 0.4424
#> Max. 2.143243 1.040762 12.859764 -0.202572 0.8068
#> Residual sum of squares: 401.9055
#> RMSE: 0.708789
# prediction with method_pred='tWtp'
=predict_mgwrsar(model_MGWRSAR_1_0_kv_insample, newdata=newdata, newdata_coord=newdata_coord,W=W_in_out,type='BPN',method_pred='tWtp')
Y_predhead(Y_pred)
#> [1] 0.5246521 3.4805124 5.6597370 2.6974158 6.5664039 4.4513093
=sqrt(mean((mydata$Y_mgwrsar_1_0_kv[index_out]-Y_pred)^2))
RMSE_tWtp_BPN
RMSE_tWtp_BPN#> [1] 0.5200496
## Using Best Linear Unbiased Predictor with method_pred='model'
=predict_mgwrsar(model_MGWRSAR_1_0_kv_insample, newdata=newdata, newdata_coord=newdata_coord,W=W,type='BPN',method_pred='model' )
Y_predhead(Y_pred)
#> [1] 0.9431835 2.4151135 5.1652828 2.1911860 6.2417757 4.9202018
=sqrt(mean((mydata$Y_mgwrsar_1_0_kv[index_out]-Y_pred)^2))
RMSE_model_BPN
RMSE_model_BPN#> [1] 0.9717156
## Using Best Linear Unbiased Predictor with method_pred='sheppard'
=kernel_matW(H=4,kernels='rectangle',coord_i=coord[index_out,],NN=4,adaptive=TRUE,diagnull=TRUE,rowNorm=T)
W_out=predict_mgwrsar(model_MGWRSAR_1_0_kv_insample, newdata=newdata, newdata_coord=newdata_coord,W=W,type='BPN',method_pred='sheppard',k_extra=8)
Y_predhead(Y_pred)
#> [1] 1.318562 2.380605 5.191412 2.228458 6.211813 4.734835
=sqrt(mean((mydata$Y_mgwrsar_1_0_kv[index_out]-Y_pred)^2))
RMSE_sheppard_BPN
RMSE_sheppard_BPN#> [1] 0.9695924
Prediction of MGWRSAR_1_kc_kv model using sheppard kernel with 4 neighbors and Best Linear Unbiased Predictor (BLUP) :
=800
length_out=sample(1:1000,length_out)
index_in=(1:1000)[-index_in]
index_out
### Global Spatial Weight matrix W should be ordered by in_ sample (S) then out_sample
=kernel_matW(H=4,kernels='rectangle',coord_i=rbind(coord[index_in,],coord[index_out,]),NN=4,adaptive=TRUE,diagnull=TRUE,rowNorm=T)
W
=W[index_in,index_in]
W_in=mgwrsar::normW(W_in)
W_in
<-MGWRSAR(formula = 'Y_mgwrsar_1_kc_kv~X1+X2+X3', data = mydata[index_in,],coord=coord[index_in,], fixed_vars='X3',kernels=c('gauss'),H=11, Model = 'MGWRSAR_1_kc_kv',control=list(W=W_in,adaptive=TRUE,isgcv=F))
model_MGWRSAR_1_kc_kv_insample$RMSE
model_MGWRSAR_1_kc_kv_insample#> [1] 0.3502106
summary_mgwrsar(model_MGWRSAR_1_kc_kv_insample)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_1_kc_kv~X1+X2+X3", data = mydata[index_in,
#> ], coord = coord[index_in, ], fixed_vars = "X3", kernels = c("gauss"),
#> H = 11, Model = "MGWRSAR_1_kc_kv", control = list(W = W_in,
#> adaptive = TRUE, isgcv = F))
#> Model: MGWRSAR_1_kc_kv
#> Method for spatial autocorrelation: 2SLS
#> Kernels function: gauss
#> Kernels adaptive: YES
#> Kernels type: GD
#> Bandwidth: 11
#> Computation time: 1.151
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO
#> Use of Target Points: NO
#> Number of data points: 800
#> Constant parameters: X3
#> -1.042863
#> Varying parameters: Intercept X1 X2
#> Intercept X1 X2 lambda
#> Min. -10.72384 0.08488 -0.77331 -0.2800
#> 1st Qu. -1.70402 0.35135 2.04683 -0.0410
#> Median -0.78341 0.53193 3.01252 -0.0047
#> Mean -0.99026 0.50042 3.42218 -0.0116
#> 3rd Qu. 0.06569 0.63682 5.18079 0.0268
#> Max. 1.62136 0.98688 12.43667 0.1318
#> Residual sum of squares: 98.11797
#> RMSE: 0.3502106
## without Best Linear Unbiased Predictor
=mydata[index_out,]
newdata=coord[index_out,]
newdata_coord$Y_mgwrsar_1_kc_kv=0
newdata
=predict_mgwrsar(model_MGWRSAR_1_kc_kv_insample, newdata=newdata, newdata_coord=newdata_coord,W=W,type='YTC')
Y_pred#> Warnings: method_pred=='TP' is not implemented for Model in ('MGWR','MGWRSAR_1_kc_0','MGWRSAR_1_kc_kv','MGWRSAR_0_kc_kv','MGWRSAR_0_0_kv'): automatic swith to method_pred='tWtp_model'
head(Y_pred)
#> [1] 0.6663112 3.6869125 5.8678272 2.6071807 7.9223965 4.2593302
=sqrt(mean((mydata$Y_mgwrsar_1_kc_kv[index_out]-Y_pred)^2))
RMSE_YTC
RMSE_YTC#> [1] 0.4522916
## Using Best Linear Unbiased Predictor
=predict_mgwrsar(model_MGWRSAR_1_kc_kv_insample, newdata=newdata, newdata_coord=newdata_coord,W=W,type='BPN')#,method_pred='sheppard')
Y_pred#> Warnings: method_pred=='TP' is not implemented for Model in ('MGWR','MGWRSAR_1_kc_0','MGWRSAR_1_kc_kv','MGWRSAR_0_kc_kv','MGWRSAR_0_0_kv'): automatic swith to method_pred='tWtp_model'
head(Y_pred)
#> [1] 0.667970 3.849695 5.867610 2.607104 7.938369 4.259173
=sqrt(mean((mydata$Y_mgwrsar_1_kc_kv[index_out]-Y_pred)^2))
RMSE_BPN
RMSE_BPN#> [1] 0.4636561
In the following example, we show how to find optimal bandwidth by hand.
######################
#### Finding bandwidth by hand
#####################
<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=0.03, Model = 'GWR',control=list(isgcv=TRUE,adaptive=FALSE))
model_GWRsummary_mgwrsar(model_GWR)
<-function(H){
myCV<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H, Model = 'GWR',control=list(isgcv=TRUE,adaptive=FALSE))
model_GWRcat('... Try h=',H,' ')
=model_GWR$RMSE
CV
CV
}
=optimize(myCV,lower=0.01,upper=0.4)
resCV
resCV
## model with optimal bandwith with gaussian kernel
<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss'),H=resCV$minimum, Model = 'GWR',control=list(isgcv=F,adaptive=F))
model_GWRsummary_mgwrsar(model_GWR)
Call:
MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord,
fixed_vars = NULL, kernels = c("gauss"), H = 0.03, Model = "GWR",
control = list(isgcv = TRUE, adaptive = FALSE))
Model: GWR
Kernels function: gauss
Kernels adaptive: NO
Kernels type: GD
Bandwidth: 0.03
Computation time: 0.319
Use of parallel computing: FALSE ncore = 1
Use of rough kernel: NO
Use of Target Points: NO
Number of data points: 1000
Varying parameters: Intercept X1 X2 X3
Intercept X1 X2 X3
Min. -4.396980 0.016334 -0.745746 -2.0461
1st Qu. -1.483364 0.350953 1.087729 -1.2907
Median -0.827288 0.543547 1.637012 -1.0072
Mean -0.827471 0.499977 1.673425 -1.0023
3rd Qu. -0.211782 0.636800 2.446022 -0.7158
Max. 2.077957 0.950236 4.189594 0.0364
Residual sum of squares: 8.096586
RMSE: 0.08998103
... Try h= 0.1589667 ... Try h= 0.2510333 ... Try h= 0.1020665 ... Try h= 0.06690023 ... Try h= 0.04516628 ... Try h= 0.03173396 ... Try h= 0.01560834 ... Try h= 0.02557452 ... Try h= 0.03239781 ... Try h= 0.03102658 ... Try h= 0.02894408 ... Try h= 0.03069048 ... Try h= 0.0307406 ... Try h= 0.03078129 ... Try h= 0.0307406 $minimum
[1] 0.0307406
$objective
[1] 0.0899334
Call:
MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord,
fixed_vars = NULL, kernels = c("gauss"), H = resCV$minimum,
Model = "GWR", control = list(isgcv = F, adaptive = F))
Model: GWR
Kernels function: gauss
Kernels adaptive: NO
Kernels type: GD
Bandwidth: 0.0307406
Computation time: 0.316
Use of parallel computing: FALSE ncore = 1
Use of rough kernel: NO
Use of Target Points: NO
Number of data points: 1000
Varying parameters: Intercept X1 X2 X3
Intercept X1 X2 X3
Min. -3.775736 0.017713 -0.751288 -2.0367
1st Qu. -1.490655 0.351921 1.104662 -1.2919
Median -0.830822 0.541941 1.629693 -1.0085
Mean -0.831229 0.500069 1.675354 -1.0013
3rd Qu. -0.235924 0.637217 2.450903 -0.7251
Max. 2.079699 0.941899 3.938823 0.0426
Residual sum of squares: 1.900295
RMSE: 0.04359237
mgwrsar package provides a wrapper that allows to find optimal bandwidth for a selection of model and kernel types. It is designed only for spatial kernel (type=‘GD’). It stores results for all models and kernels using an optimal bandwidth (leave-one out CV criteria) and provides both results with and without leave-one-out sampling. It also allows to find an optimal spatial weight matrix for a given kernel when the model includes spatial dependence.
Example of automatic search of optimal bandwidth for GWR and MGWR (stationnary intercept) models using either an adaptive gaussian kernel or adapative bisquare kernel or gaussian kernel, with at least 8 neigbhors for adaptive kernel and at least a bandwidth of 0.03 for gaussian kernel:
<-bandwidths_mgwrsar(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept','X1'),Models=c('GWR','MGWR'),Kernels=c('bisq','gauss'),control=list(NN=300,adaptive=TRUE),control_search=list())
mytab
names(mytab)
names(mytab[['GWR_bisq_adaptive']])
'GWR_bisq_adaptive']]$config_model
mytab[['GWR_bisq_adaptive']]$CV
mytab[[summary(mytab[['GWR_bisq_adaptive']]$model$Betav)
=mytab[['GWR_gauss_adaptive']]$model mybestmodel
##### GWR #####
##### bisq adaptive= TRUE #####
########
Search bandwidth stage
########
....kernel = bisq adaptive objective = 0.098356 minimum = 21
##### gauss adaptive= TRUE #####
########
Search bandwidth stage
########
....kernel = gauss adaptive objective = 0.09718914 minimum = 5
##### MGWR #####
##### bisq adaptive= TRUE #####
########
Search bandwidth stage
########
.....
...try larger suppport
........
Border solution !!! Try to increase NNkernel = bisq adaptive objective = 0.4496839 minimum = 94
##### gauss adaptive= TRUE #####
########
Search bandwidth stage
########
....kernel = gauss adaptive objective = 0.4523508 minimum = 16
[1] "GWR_bisq_adaptive" "GWR_gauss_adaptive"
[3] "MGWR_bisq_adaptive" "MGWR_gauss_adaptive"
[1] "config_model" "CV" "SSR" "model"
$Model
[1] "GWR"
$kernels
[1] "bisq"
$adaptive
[1] TRUE
$H
[1] 21
$kernel_w
[1] "rectangle"
$h_w
NULL
[1] 0.098356
Intercept X1 X2
Min. :-3.6889 Min. :0.0165 Min. :-0.8843
1st Qu.:-1.4614 1st Qu.:0.3526 1st Qu.: 1.0836
Median :-0.8098 Median :0.5425 Median : 1.6525
Mean :-0.8239 Mean :0.4992 Mean : 1.6717
3rd Qu.:-0.2008 3rd Qu.:0.6358 3rd Qu.: 2.4624
Max. : 2.1706 Max. :0.9191 Max. : 3.7134
X3
Min. :-2.12121
1st Qu.:-1.29786
Median :-1.00958
Mean :-1.00068
3rd Qu.:-0.71484
Max. :-0.03991
Example of automatic search of optimal bandwidth for MGWRSAR_0_kc_kv (stationnary intercept) model using an adaptive gaussian kernel + Automatic search of optimal spatial weight matrix for SAR part of the model using either a bisquare or adpative bisquare kernel, not run.
<-bandwidths_mgwrsar(formula='Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,Models=c('MGWRSAR_0_0_kv'),Kernels_candidates=c('bisq'),control=list(adaptive=TRUE,NN=500),control_search=list(search_W=TRUE,kernel_w='rectangle',adaptive_W=TRUE))
mytab2
names(mytab2)
'MGWRSAR_0_0_kv_bisq_adaptive']]$config_model
mytab2[['MGWRSAR_0_0_kv_bisq_adaptive']]$CV
mytab2[['MGWRSAR_0_0_kv_bisq_adaptive']]$model$Betac
mytab2[[summary(mytab2[['MGWRSAR_0_0_kv_bisq_adaptive']]$model$Betav)
##### MGWRSAR_0_0_kv #####
##### bisq adaptive= TRUE #####
########
Search W stage
########
.........
.....
W : kernel = rectangle adaptive : objective = 0.226692 minimum = 16
########
Search bandwidth stage
########
.....kernel = bisq adaptive objective = 0.1016582 minimum = 22
[1] "MGWRSAR_0_0_kv_bisq_adaptive"
$Model
[1] "MGWRSAR_0_0_kv"
$kernels
[1] "bisq"
$adaptive
[1] TRUE
$H
[1] 22
$kernel_w
[1] "rectangle"
$h_w
NULL
[1] 0.1016582
lambda
0.0419462
Intercept X1 X2
Min. :-3.7090 Min. :0.02378 Min. :-0.9346
1st Qu.:-1.5217 1st Qu.:0.35387 1st Qu.: 1.0550
Median :-0.8644 Median :0.54467 Median : 1.5860
Mean :-0.8622 Mean :0.50028 Mean : 1.6214
3rd Qu.:-0.2208 3rd Qu.:0.63670 3rd Qu.: 2.3904
Max. : 2.1615 Max. :0.91634 Max. : 3.6352
X3
Min. :-2.12126
1st Qu.:-1.29790
Median :-1.01142
Mean :-1.00342
3rd Qu.:-0.71371
Max. :-0.05576
The package provides additional functions that allow to estimate locally linear model with other dimensions than space. Using the control variable ‘type’, it is possible to add time in the kernel and a limited set of other variables (continuous or categorical). If several dimensions are involved in the kernel, a general product kernel is used and you need to provide a list of bandwidths and a list of kernel types. For time kernel, it uses asymetric kernel, eventually with a decay. For categorical variable, it uses the propositions of Li and Racine (2010); see also np package. Optimization of bandwidths has to be done by yourself.
Note that when time or other additional variables are used for kernels, then two small bandwidths could lead to empty local subsamples. We recommend to use gauss and gauss_adapt kernels that suffering less this issue.
## space + time kernel
=sort(rep(1:500,2))
mytime_index1:150]
mytime_index[#> [1] 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 10 10 11 11 12 12 13
#> [26] 13 14 14 15 15 16 16 17 17 18 18 19 19 20 20 21 21 22 22 23 23 24 24 25 25
#> [51] 26 26 27 27 28 28 29 29 30 30 31 31 32 32 33 33 34 34 35 35 36 36 37 37 38
#> [76] 38 39 39 40 40 41 41 42 42 43 43 44 44 45 45 46 46 47 47 48 48 49 49 50 50
#> [101] 51 51 52 52 53 53 54 54 55 55 56 56 57 57 58 58 59 59 60 60 61 61 62 62 63
#> [126] 63 64 64 65 65 66 66 67 67 68 68 69 69 70 70 71 71 72 72 73 73 74 74 75 75
=kernel_matW(H=8,kernels='rectangle',coord_i=coord,NN=10,adaptive=TRUE,diagnull=TRUE,rowNorm=T)
W
<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X2+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss','gauss'),H=c(50,50), Model = 'MGWRSAR_0_kc_kv',control=list(Z=mytime_index,W=W,adaptive=c(TRUE,TRUE),Type='GDT'))
model_MGWRSART_0_kc_kv
summary_mgwrsar(model_MGWRSART_0_kc_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_0_kc_kv~X1+X2+X3", data = mydata,
#> coord = coord, fixed_vars = c("Intercept"), kernels = c("gauss",
#> "gauss"), H = c(50, 50), Model = "MGWRSAR_0_kc_kv", control = list(Z = mytime_index,
#> W = W, adaptive = c(TRUE, TRUE), Type = "GDT"))
#> Model: MGWRSAR_0_kc_kv
#> Method for spatial autocorrelation: 2SLS
#> Kernels function: gauss gauss
#> Kernels adaptive: YES YES
#> Kernels type: GDT
#> Bandwidth: 50 50
#> Computation time: 0.529
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO NO
#> Use of Target Points: NO
#> Number of data points: 1000
#> Constant parameters: Intercept lambda
#> -0.4460581 0.2657839
#> Varying parameters: X1 X2 X3
#> X1 X2 X3
#> Min. 0.0034395 -0.8162181 -2.6362
#> 1st Qu. 0.3715499 1.4244360 -1.3558
#> Median 0.4585512 2.1506881 -1.1208
#> Mean 0.4571082 2.0970701 -1.1146
#> 3rd Qu. 0.5556836 2.8302374 -0.8747
#> Max. 0.9022406 5.0097027 0.4869
#> Residual sum of squares: 107.8133
#> RMSE: 0.3283494
### space + continuous variable kernel
<-MGWRSAR(formula = 'Y_gwr~X1+X2+X3', data = mydata,coord=coord, fixed_vars=NULL,kernels=c('gauss','gauss'),H=c(120,120), Model = 'GWR',control=list(Z=mydata$X2,Type='GDX',adaptive=c(T,T)))
model_GWRXsummary_mgwrsar(model_GWRX)
#> Call:
#> MGWRSAR(formula = "Y_gwr~X1+X2+X3", data = mydata, coord = coord,
#> fixed_vars = NULL, kernels = c("gauss", "gauss"), H = c(120,
#> 120), Model = "GWR", control = list(Z = mydata$X2, Type = "GDX",
#> adaptive = c(T, T)))
#> Model: GWR
#> Kernels function: gauss gauss
#> Kernels adaptive: YES YES
#> Kernels type: GDX
#> Bandwidth: 120 120
#> Computation time: 0.505
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO NO
#> Use of Target Points: NO
#> Number of data points: 1000
#> Varying parameters: Intercept X1 X2 X3
#> Intercept X1 X2 X3
#> Min. -6.45292 0.15257 -0.84387 -1.8089
#> 1st Qu. -2.30342 0.32481 1.00002 -1.2329
#> Median -1.13830 0.45537 1.91297 -0.9922
#> Mean -1.30274 0.46953 2.06651 -1.0200
#> 3rd Qu. -0.03019 0.62710 3.05773 -0.7868
#> Max. 1.54070 0.73745 6.44482 -0.2804
#> Residual sum of squares: 202.5684
#> RMSE: 0.4500759
### space + categorical kernel (Li and Racine 2010)
=1+as.numeric(mydata$X2>quantile(mydata$X2,0.9))*2+as.numeric(mydata$X2<=quantile(mydata$X2,0.1))
Ztable(Z)
#> Z
#> 1 2 3
#> 800 100 100
<-MGWRSAR(formula = 'Y_mgwrsar_0_kc_kv~X1+X3', data = mydata,coord=coord, fixed_vars=c('Intercept'),kernels=c('gauss','li','li','li'),H=c(120,0.1,0.9,0.9), Model = 'MGWRSAR_0_kc_kv',control=list(Z=Z,W=W,Type='GDC',adaptive=c(T,F,F,F)))
model_MGWRSARC_0_kc_kvsummary_mgwrsar(model_MGWRSARC_0_kc_kv)
#> Call:
#> MGWRSAR(formula = "Y_mgwrsar_0_kc_kv~X1+X3", data = mydata, coord = coord,
#> fixed_vars = c("Intercept"), kernels = c("gauss", "li", "li",
#> "li"), H = c(120, 0.1, 0.9, 0.9), Model = "MGWRSAR_0_kc_kv",
#> control = list(Z = Z, W = W, Type = "GDC", adaptive = c(T,
#> F, F, F)))
#> Model: MGWRSAR_0_kc_kv
#> Method for spatial autocorrelation: 2SLS
#> Kernels function: gauss li li li
#> Kernels adaptive: YES NO NO NO
#> Kernels type: GDC
#> Bandwidth: 120 0.1 0.9 0.9
#> Computation time: 0.461
#> Use of parallel computing: FALSE ncore = 1
#> Use of rough kernel: NO NO NO NO
#> Use of Target Points: NO
#> Number of data points: 1000
#> Constant parameters: Intercept lambda
#> 1.149469 0.3528613
#> Varying parameters: X1 X3
#> X1 X3
#> Min. 0.13802 -1.3391
#> 1st Qu. 0.34306 -1.2704
#> Median 0.45967 -1.2161
#> Mean 0.43625 -1.1279
#> 3rd Qu. 0.53071 -1.0092
#> Max. 0.65289 -0.6043
#> Residual sum of squares: 1067.886
#> RMSE: 1.033386