Skip to content

Commit e707c2e

Browse files
author
Jon Schlipf
committed
Taylor done
1 parent c91fd54 commit e707c2e

File tree

3 files changed

+44
-71
lines changed

3 files changed

+44
-71
lines changed

README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,13 @@ The CUDA implementation is heavily inspired by [this fork](https://github.com/al
143143
A major exception is the solution of the non-hermitian Eigenvalue problems, which is not implemented in CUDA. Here, a fallback CPU routine (`LinearAlgebra.eigen`) is used.
144144
For a truncation order of `N=10`, GPU acceleration achieves a speedup of a factor of 5 in a system with dual AMD EPYC 7713 64-Core Processor and NVIDIA H100 accelerator.
145145

146+
## Eigenvalue-free algorithm
147+
148+
Xu and Charlton (references below) have introduced an alternative way, computing the matrix exponential by a Taylor series instead of an Eigentransform. This allows for efficient parallelism and thus improves performance drastically, about two orders of magnitude compared to ETM using high-end GPU. Only reflection and transmission are implemented so far, accessed via:
149+
150+
```julia
151+
E,H=taylor_reftra(mdl,grd,xypoints,zpoints,λ,ste) #compute the electric and magnetic field
152+
```
146153

147154
## Mathematics
148155

@@ -163,3 +170,5 @@ Marco Liscidini, Dario Gerace, Lucio Claudio Andreani, and J. E. Sipe, "Scatteri
163170
M. G. Moharam and T. K. Gaylord, "Rigorous coupled-wave analysis of planar-grating diffraction," J. Opt. Soc. Am. 71, 811-818 (1981)
164171

165172
Raymond Rumpf, "Improved formulation of scattering matrices for semi-analytical methods thatis consistent with convention," Progress In Electromagnetics Research B35, 241–261 (2011)
173+
174+
J. Xu and M. D. B. Charlton, "Highly Efficient Rigorous Coupled-Wave Analysis Implementation Without Eigensystem Calculation," IEEE Access 12, 127966-127975 (2024)

src/Taylor/Taylor.jl

Lines changed: 12 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,18 @@
1-
using LinearAlgebra
21
using LinearAlgebra,CUDA
32
using ..Common
43

5-
export squarescale_reftra
6-
taylorx(dnx,dny,Kx,Ky,λ,l::SimpleLayer)=eigenmodes(dnx,dny,Kx,Ky,λ,l).X
4+
function taylorx(dnx,dny,Kx,Ky,λ,l::SimpleLayer)
5+
#Xa=(eigenmodes(dnx,dny,Kx,Ky,λ,l).X
6+
#return [Xa 0Xa;0Xa Xa]
7+
IMa=Diagonal(Kx*0 .+1)
8+
k0=2π/λ
9+
za=0*IMa
10+
ε=get_permittivity(l.material,λ)
11+
m1=sqrt.(Complex.(Kx.*Kx+Ky.*Ky-ε*I))
12+
m2=exp.(m1*k0*l.thickness)
13+
lm=[m2 za za za; za m2 za za;za za m2 za;za za za m2]
14+
return lm
15+
end
716
function taylorx(dnx,dny,Kx,Ky,λ,l::AnisotropicLayer)
817
# probably doable without taylor as well?
918
IMa=Diagonal(Kx*0 .+1)
@@ -156,49 +165,6 @@ function taylorx(dnx,dny,Kx,Ky,λ,l::PatternedLayer)
156165
X=B2+(B3+A9)*A9
157166
return X^(2^m)
158167
end
159-
function squarescalex(dnx,dny,Kx,Ky,λ,l::PatternedLayer)
160-
IMa=Diagonal(Kx*0 .+1)
161-
IM=[IMa 0*IMa;0*IMa IMa]
162-
k0=2π/real(λ)
163-
#get the base permittivity
164-
εxx=get_permittivity(l.materials[1],λ,1)*I
165-
#add the permittivity for all inclusions
166-
if minimum([typeof(m)<:Common.Isotropic for m in l.materials])
167-
#all isotropic
168-
εxx=get_permittivity(l.materials[1],λ)*I
169-
for ct=1:length(l.geometries)
170-
rec=reciprocal(l.geometries[ct],dnx,dny)
171-
εxx+=rec*(get_permittivity(l.materials[ct+1],λ)-get_permittivity(l.materials[ct],λ))
172-
end
173-
εzz=εyy=εxx
174-
εxy=εyx=0I
175-
else
176-
#anisotropic
177-
εxx=get_permittivity(l.materials[1],λ,1)*I
178-
εxy=get_permittivity(l.materials[1],λ,2)*I
179-
εyx=get_permittivity(l.materials[1],λ,3)*I
180-
εyy=get_permittivity(l.materials[1],λ,4)*I
181-
εzz=get_permittivity(l.materials[1],λ,5)*I
182-
for ct=1:length(l.geometries)
183-
rec=reciprocal(l.geometries[ct],dnx,dny)
184-
εxx+=rec*(get_permittivity(l.materials[ct+1],λ,1)-get_permittivity(l.materials[ct],λ,1))
185-
εxy+=rec*(get_permittivity(l.materials[ct+1],λ,2)-get_permittivity(l.materials[ct],λ,2))
186-
εyx+=rec*(get_permittivity(l.materials[ct+1],λ,3)-get_permittivity(l.materials[ct],λ,3))
187-
εyy+=rec*(get_permittivity(l.materials[ct+1],λ,4)-get_permittivity(l.materials[ct],λ,4))
188-
εzz+=rec*(get_permittivity(l.materials[ct+1],λ,5)-get_permittivity(l.materials[ct],λ,5))
189-
end
190-
end
191-
η=inv(εzz)
192-
P=[Kx*η*Ky I-Kx*η*Kx;Ky*η*Ky-I -Ky*η*Kx]
193-
Q=[Kx*Ky+εyx εyy-Kx*Kx;Ky*Ky-εxx -εxy-Ky*Kx]
194-
A0=[0IM P;Q 0IM]*k0*l.thickness
195-
nrm=maximum(sum(abs.(A0),dims=1))
196-
m=Int(ceil(log2(nrm)))
197-
m=0
198-
A=A0*2.0^-m
199-
X=exp(A)
200-
return X^(2^m)
201-
end
202168
function taylor_reftra(ψin,m::RCWAModel,grd::RCWAGrid,λ)
203169
IMa=Diagonal(grd.Kx*0 .+1)
204170
IM=[IMa 0*IMa;0*IMa IMa]
@@ -221,25 +187,3 @@ function taylor_reftra(ψin,m::RCWAModel,grd::RCWAGrid,λ)
221187
return R,T
222188

223189
end
224-
function squarescale_reftra(ψin,m::RCWAModel,grd::RCWAGrid,λ)
225-
IMa=Diagonal(grd.Kx*0 .+1)
226-
IM=[IMa 0*IMa;0*IMa IMa]
227-
IMb=[IM 0*IM;0*IM IM]
228-
X=[squarescalex(grd.dnx,grd.dny,grd.Kx,grd.Ky,λ,l) for l in m.layers]
229-
Xp=IMb
230-
for X in X
231-
Xp*=X
232-
end
233-
ref=halfspace(grd.Kx,grd.Ky,m.εsup,λ) #superstrate and substrate
234-
tra=halfspace(grd.Kx,grd.Ky,m.εsub,λ)
235-
Y=Xp*[IM;-tra.V]
236-
S=[Y [-IM;-ref.V]]\[IM;-ref.V]*ψin
237-
to,ro=slicehalf(S)
238-
239-
kzin=grd.k0[3]#*real(sqrt(get_permittivity(m.εsup,λ)))
240-
R=a2p(0ro,ro,ref.V,IM,kzin) #compute amplitudes to powers
241-
T=-a2p(to,0to,tra.V,IM,kzin)
242-
243-
return R,T
244-
245-
end

test/runtests.jl

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ n1=1+10rand()
5959
θ=88rand()+1
6060
α=360rand()
6161
grd=rcwagrid(0,0,100rand(),100rand(),θ,α,λ,ConstantPerm(n1^2))
62-
ste,stm=rcwasource(grd,real(n1))
62+
ste,stm=rcwasource(grd,(n1))
6363
ref=RigorousCoupledWaveAnalysis.halfspace(grd.Kx,grd.Ky,ConstantPerm(n1^2),λ)
6464
P1=-RigorousCoupledWaveAnalysis.a2p(ste,0ste,ref.V,I,grd.k0[3])
6565
P2=-RigorousCoupledWaveAnalysis.a2p(stm,0ste,ref.V,I,grd.k0[3])
@@ -75,7 +75,7 @@ eps4=10rand()+10im*rand()
7575
λ=1000rand()
7676
mdl=RCWAModel([PatternedLayer(100rand(),[ConstantPerm(eps2),ConstantPerm(eps3)],[Circle(rand())])],ConstantPerm(eps1),ConstantPerm(eps4))
7777
grd=rcwagrid(1,1,100rand(),100rand(),88rand()+.1,360rand(),λ,ConstantPerm(eps1))
78-
ste,stm=rcwasource(grd,real(eps1))
78+
ste,stm=rcwasource(grd,(eps1))
7979
Rte1,Tte1=etm_reftra(ste,mdl,grd,λ)
8080
Rtm1,Ttm1=etm_reftra(stm,mdl,grd,λ)
8181
Rte2,Tte2=srcwa_reftra(ste,mdl,grd,λ)
@@ -95,7 +95,7 @@ eps4=10rand()
9595
λ=1000rand()
9696
mdl=RCWAModel([PatternedLayer(100rand(),[ConstantPerm(eps2),ConstantPerm(eps3)],[Circle(rand())])],ConstantPerm(eps1),ConstantPerm(eps4))
9797
grd=rcwagrid(1,1,100rand(),100rand(),88rand()+.1,360rand(),λ,ConstantPerm(eps1))
98-
ste,stm=rcwasource(grd,real(eps1))
98+
ste,stm=rcwasource(grd,(eps1))
9999
Rte1,Tte1=etm_reftra(ste,mdl,grd,λ)
100100
Rtm1,Ttm1=etm_reftra(stm,mdl,grd,λ)
101101
Rte2,Tte2=srcwa_reftra(ste,mdl,grd,λ)
@@ -111,4 +111,24 @@ Rtm2,Ttm2=srcwa_reftra(stm,mdl,grd,λ)
111111
end
112112

113113

114+
@testset "RT_TaylorVSETM_simple" begin
115+
eps1=10rand()+10im*rand()
116+
eps2=10rand()+10im*rand()
117+
eps3=10rand()+10im*rand()
118+
eps4=10rand()+10im*rand()
119+
λ=1000rand()
120+
px=100rand()
121+
py=100rand()
122+
mdl=RCWAModel([PatternedLayer(minimum([px,py])*rand(),[ConstantPerm(eps2),ConstantPerm(eps3)],[Circle(rand())])],ConstantPerm(eps1),ConstantPerm(eps4)) # thickness > pitch for sufficiently accurate Taylor
123+
grd=rcwagrid(1,1,px,py,88rand()+.1,360rand(),λ,ConstantPerm(eps1))
124+
ste,stm=rcwasource(grd,(eps1))
125+
Rte1,Tte1=etm_reftra(ste,mdl,grd,λ)
126+
Rtm1,Ttm1=etm_reftra(stm,mdl,grd,λ)
127+
Rte2,Tte2=taylor_reftra(ste,mdl,grd,λ)
128+
Rtm2,Ttm2=taylor_reftra(stm,mdl,grd,λ)
129+
@test isapprox(Rte1,Rte2,atol=1E-5)
130+
@test isapprox(Rtm1,Rtm2,atol=1E-5)
131+
@test isapprox(Tte1,Tte2,atol=1E-5)
132+
@test isapprox(Ttm1,Ttm2,atol=1E-5)
133+
end
114134

0 commit comments

Comments
 (0)