This small patch series introduces a generic iommu_num_pages function into the IOMMU helper code. This function can be used to calculate the number of pages an IOMMU has to map for a specific mapping request. The patchset converts the x86 IOMMU implementations for AMD IOMMU and the GART to use this function. Other IOMMU implementations are not changed because I can't test them. diffstat: arch/x86/kernel/amd_iommu.c | 13 +++++-------- arch/x86/kernel/pci-gart_64.c | 11 ++++------- include/linux/iommu-helper.h | 1 + lib/iommu-helper.c | 8 ++++++++ 4 files changed, 18 insertions(+), 15 deletions(-) --
This patch removes the to_pages macro from AMD IOMMU code and calls the generic
iommu_num_pages function instead.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kernel/amd_iommu.c | 13 +++++--------
1 files changed, 5 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 7469740..22d7d05 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -29,9 +29,6 @@
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
-#define to_pages(addr, size) \
- (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
-
#define EXIT_LOOP_COUNT 10000000
static DEFINE_RWLOCK(amd_iommu_devtable_lock);
@@ -185,7 +182,7 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid,
u64 address, size_t size)
{
int s = 0;
- unsigned pages = to_pages(address, size);
+ unsigned pages = iommu_num_pages(address, size);
address &= PAGE_MASK;
@@ -557,8 +554,8 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
if (iommu->exclusion_start &&
iommu->exclusion_start < dma_dom->aperture_size) {
unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT;
- int pages = to_pages(iommu->exclusion_start,
- iommu->exclusion_length);
+ int pages = iommu_num_pages(iommu->exclusion_start,
+ iommu->exclusion_length);
dma_ops_reserve_addresses(dma_dom, startpage, pages);
}
@@ -767,7 +764,7 @@ static dma_addr_t __map_single(struct device *dev,
unsigned int pages;
int i;
- pages = to_pages(paddr, size);
+ pages = iommu_num_pages(paddr, size);
paddr &= PAGE_MASK;
address = dma_ops_alloc_addresses(dev, dma_dom, pages);
@@ -802,7 +799,7 @@ static void __unmap_single(struct amd_iommu *iommu,
if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size))
return;
- pages = to_pages(dma_addr, size);
+ pages = iommu_num_pages(dma_addr, size);
dma_addr &= PAGE_MASK;
start = ...Calculating the number of pages from given address and length numbers is a task
required in multiple IOMMU implementations. So implement this as a generic
function into the IOMMU helper code.
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
include/linux/iommu-helper.h | 1 +
lib/iommu-helper.c | 8 ++++++++
2 files changed, 9 insertions(+), 0 deletions(-)
diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index c975caf..f8598f5 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -8,3 +8,4 @@ extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
unsigned long align_mask);
extern void iommu_area_free(unsigned long *map, unsigned long start,
unsigned int nr);
+extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len);
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index a3b8d4c..889ddce 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -80,3 +80,11 @@ void iommu_area_free(unsigned long *map, unsigned long start, unsigned int nr)
}
}
EXPORT_SYMBOL(iommu_area_free);
+
+unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
+{
+ unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
+
+ return size >> PAGE_SHIFT;
+}
+EXPORT_SYMBOL(iommu_num_pages);
--
1.5.3.7
--
On Fri, 25 Jul 2008 14:57:58 +0200 This doesn't work since PAGE_SIZE != IOMMU_PAGE_SIZE on the majority of architectures. --
A quick grep in include/ tells me that only powerpc is declaring
IOMMU_PAGE_SIZE by now.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
--
On Fri, 25 Jul 2008 15:21:21 +0200 Ah, I just meant the I/O size of IOMMUs. This macro doesn't work for IA64, POWER, and SPARC. --
Is there any other architecture independent way to find out the page
size the IOMMU is using? If yes I can rework that patchset. If not, I
suggest that we (you?) introduce one in a follow-on patch series and fix
the iommu_num_pages function then.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
--
On Fri, 25 Jul 2008 15:41:59 +0200 That's fine by me. Or I could try to find a solution and submit new patchset with your AMD and GART patches. BTW, I prefer to push this patchset via -mm rather than x86 since after all we need to clean up all the IOMMUs. --
Thats also possible. But I prefer to fix that incrementally and merge
Push it through -mm would make sense since it touches generic code in
lib/. On the other side in x86 we would avoid merge conflicts with
updates in AMD IOMMU and GART code.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
--
On Fri, 25 Jul 2008 16:14:25 +0200 Ok, I have no preference. It's not urgent and I can work on this later on. I leave it to you and x86 maintainers. Thanks, --
ok, i'll apply the 3 patches to tip/x86/iommu. As usual, feel free to send IOMMU cleanups/generalizations/fixes against tip/master: http://people.redhat.com/mingo/tip.git/README Ingo --
Hi Ingo, So, I assume a patch created on July 25 is not destined for 2.6.27, right? Especially since it breaks another architecture build and has only made it to linux-next on July 29 ... and needs more work given the comments already on this patch. --=20 Cheers, Stephen Rothwell sfr@canb.auug.org.au http://www.canb.auug.org.au/~sfr/
On Tue, 29 Jul 2008 11:07:49 +1000 Hmm, seems that they will go into 2.6.27: It doesn't break another architecture build since only x86 IOMMUs use the helper function. But as I pointed out, it doesn't work for some architectures. --
Hi,=20 On Tue, 29 Jul 2008 10:46:31 +0900 FUJITA Tomonori <fujita.tomonori@lab.ntt= Um, did you see the linux-next build report I posted (possibly not): arch/powerpc/kernel/iommu.c:56: error: static declaration of 'iommu_num_pag= es' follows non-static declaration include/linux/iommu-helper.h:11: error: previous declaration of 'iommu_num_= pages' was here This is now a build of 2.6.27-rc1 for powerpc64 ppc64_defconfig ... So we have a patch first posted to lkml on July 25 first hits linux-next on July 29 and is no in Linus' tree on July 29. A grep would have found the problem. More care required ... --=20 Cheers, Stephen Rothwell sfr@canb.auug.org.au http://www.canb.auug.org.au/~sfr/
On Tue, 29 Jul 2008 13:56:52 +1000 Agreed... As I said before, this is not urgent at all so we didn't need to rush. A fix has already been posted? == From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Subject: [PATCH] move iommu_num_pages helper to x86 This IOMMU helper function doesn't work for some architectures: http://marc.info/?l=linux-kernel&m=121699304403202&w=2 It also breaks POWER and SPARC builds: http://marc.info/?l=linux-kernel&m=121730388001890&w=2 Currently, only x86 IOMMUs use this so let's move it to x86 for now. Reported-by: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> --- arch/x86/kernel/pci-dma.c | 8 ++++++++ include/asm-x86/iommu.h | 2 ++ include/linux/iommu-helper.h | 1 - lib/iommu-helper.c | 8 -------- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 8dbffb8..87d4d69 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -123,6 +123,14 @@ void __init pci_iommu_alloc(void) pci_swiotlb_init(); } + +unsigned long iommu_num_pages(unsigned long addr, unsigned long len) +{ + unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); + + return size >> PAGE_SHIFT; +} +EXPORT_SYMBOL(iommu_num_pages); #endif /* diff --git a/include/asm-x86/iommu.h b/include/asm-x86/iommu.h index ecc8061..5f888cc 100644 --- a/include/asm-x86/iommu.h +++ b/include/asm-x86/iommu.h @@ -7,6 +7,8 @@ extern struct dma_mapping_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; +extern unsigned long iommu_num_pages(unsigned long addr, unsigned long len); + #ifdef CONFIG_GART_IOMMU extern int gart_iommu_aperture; extern int gart_iommu_aperture_allowed; diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h index f8598f5..c975caf 100644 --- a/include/linux/iommu-helper.h +++ ...
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Please, if you are going to add such interface, add "const unsigned long io_page_size" argument so that powerpc and sparc can use it too. --
On Mon, 28 Jul 2008 22:26:18 -0700 (PDT) I didn't add this function: http://marc.info/?l=linux-kernel&m=121699080831750&w=2 I pointed out that it doesn't work for some architectures: http://marc.info/?l=linux-kernel&m=121699304403202&w=2 But sorry about not noticing that it breaks SPARC and POWER. --
From: Stephen Rothwell <sfr@canb.auug.org.au> It breaks sparc64 the same exact way. :-/ sparc64: Fix build after addition of iommu_num_pages() helper. It would have been much more pleasant if that new interface: 1) Took an "io_page_size" argument so powerpc and sparc64 could make use of it. 2) Was build tested on something other than x86. Signed-off-by: David S. Miller <davem@davemloft.net> diff --git a/arch/sparc64/kernel/iommu.c b/arch/sparc64/kernel/iommu.c index 2a37a6c..600f712 100644 --- a/arch/sparc64/kernel/iommu.c +++ b/arch/sparc64/kernel/iommu.c @@ -575,7 +575,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, } /* Allocate iommu entries for that segment */ paddr = (unsigned long) SG_ENT_PHYS_ADDRESS(s); - npages = iommu_num_pages(paddr, slen); + npages = sparc64_iommu_num_pages(paddr, slen); entry = iommu_range_alloc(dev, iommu, npages, &handle); /* Handle failure */ @@ -647,7 +647,7 @@ iommu_map_failed: iopte_t *base; vaddr = s->dma_address & IO_PAGE_MASK; - npages = iommu_num_pages(s->dma_address, s->dma_length); + npages = sparc64_iommu_num_pages(s->dma_address, s->dma_length); iommu_range_free(iommu, vaddr, npages); entry = (vaddr - iommu->page_table_map_base) @@ -715,7 +715,7 @@ static void dma_4u_unmap_sg(struct device *dev, struct scatterlist *sglist, if (!len) break; - npages = iommu_num_pages(dma_handle, len); + npages = sparc64_iommu_num_pages(dma_handle, len); iommu_range_free(iommu, dma_handle, npages); entry = ((dma_handle - iommu->page_table_map_base) diff --git a/arch/sparc64/kernel/iommu_common.h b/arch/sparc64/kernel/iommu_common.h index 53b19c8..e64152b 100644 --- a/arch/sparc64/kernel/iommu_common.h +++ b/arch/sparc64/kernel/iommu_common.h @@ -35,8 +35,8 @@ #define SG_ENT_PHYS_ADDRESS(SG) (__pa(sg_virt((SG)))) -static inline unsigned long iommu_num_pages(unsigned long vaddr, - unsigned long slen) +static inline ...
On Mon, 28 Jul 2008 22:24:39 -0700 (PDT) Yeah, it breaks both. It's not a proper helper function that all the architectures can use so how about making it x86 stuff? http://marc.info/?l=linux-kernel&m=121730644704448&w=2 --
Hi Dave, On Mon, 28 Jul 2008 22:24:39 -0700 (PDT) David Miller <davem@davemloft.net>= I added this to the end of linux-next for today. Hopefully we will have a fix in Linus' tree tomorrow. --=20 Cheers, Stephen Rothwell sfr@canb.auug.org.au http://www.canb.auug.org.au/~sfr/
Yes, my fault, sorry for that breakage :-/
I think moving that function to x86-only code is the best solution for
now. Before moving it back to generic code we need an architecture
independent way to find out the IOMMU page size.
FUJITA,
do you want to work on that or should I try to do this and send it to
you for review?
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
--
On Tue, 29 Jul 2008 11:50:27 +0200 I've already sent a patch that fixes the breakages in such way: http://marc.info/?l=linux-kernel&m=121730644704448&w=2 --
Yes I have seen this patch. I meant the patches that introduce something
like IOMMU_PAGE_SIZE for all architectures so that we can make this
function generic and move it back to lib/.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
--
That will not work in the light of some new stuff coming on powerpc in the future. The iommu page size may/will be dynamic per instance of iommu. IE, it will be mostly decided at runtime, I don't expect machines to have multiple iommus of different page sizes off hand but it -can- happen. Ben. --
On Thu, 31 Jul 2008 08:44:14 +1000 You could do something like this: #define IOMMU_PAGE_SIZE power_iommu_page_size But I prefer the current proposal to have the size as an argument. --
From: Joerg Roedel <joerg.roedel@amd.com> My suggestion (again) is to pass it in as a "const unsigned long io_page_size" argument and update the callers. --
Ok, this has the advantage that it supports more than one IOMMU page
size per architecture and its also less intrusive.
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
--
On Tue, 29 Jul 2008 03:10:05 -0700 (PDT) Either is fine by me. Joerg, if you like, please finish this (for all the architectures, please). As I said before, I planed to work on this cleanup after rc1 (and this should have been 2.6.28 stuff). But I'd appreciate if you would do. Thanks, --
i've queued up your quick fix in tip/x86/urgent to resolve the immediate -git build breakage. The wider patch can then be done on top of that, without being rushed. Any objections to this approach? Ingo --
From: Ingo Molnar <mingo@elte.hu> I do not object :-) --
On Tue, 29 Jul 2008 03:33:42 -0700 (PDT) David Miller <davem@davemloft.net>= Neither do I. :-) --=20 Cheers, Stephen Rothwell sfr@canb.auug.org.au http://www.canb.auug.org.au/~sfr/
Ok, after setting up cross compile environments for sparc64 and
powerpc64 I wrote a patch which adds the io_page_size argument to the
iommu_num pages function and moves the function back to
lib/iommu-helper.c. The patch touches all 3 architectures and can not be
split in a bisectable way.
Who is the best person to send this patch to?
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
--
On Wed, 30 Jul 2008 14:18:05 +0200 You need ia64, parisc, and alpha too. :) I have cross compile environments for them so I can take care of them How about naming a new helper function iommu_nr_pages (or something)? Then you can do the conversion independently. If we like, we use The -mm is an appropriate tree, I think. --
Ok, so you suggest to create a patchset to convert the other IOMMU
implementations too. Thats fine for me, I will try to do it :)
Do you have hardware to do runtime tests of the code? I have hardware
Ok, this would mean only 3 renaming patches to keep the patch series
Ok, then I submit it to Andrew once its tested.
Thanks,
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
--
Ok, I looked into the different IOMMU drivers for pa-risc and ia64. The
changes needed there are non-trival and I can't test them on real
hardware (I only have an pa-risc 1.1). So I think I skip these 2
architectures for now. They can be changed by somebody who has the
hardware :)
(Alpha instead was trival. I changed this driver)
Joerg
--
| AMD Saxony Limited Liability Company & Co. KG
Operating | Wilschdorfer Landstr. 101, 01109 Dresden, Germany
System | Register Court Dresden: HRA 4896
Research | General Partner authorized to represent:
Center | AMD Saxony LLC (Wilmington, Delaware, US)
| General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy
--
Correct, they were not intended for immediate upstream merging, they werent even in auto-x86-next so there was no meaningful linux-next exposure. I was surprised they went upstream via the PCI tree yesterday, just hours after i sent them over to Jesse and about an hour before -rc1 was released. [ Nor did intend the core/generic-dma-coherent bits to go upstream right before -rc1 - although those seem to be problem-free so far. ] I stopped sending non-urgent patches to Linus days ago, to let things calm down for a smooth -rc1 release. Ingo --
Yeah I had a few things that I wanted to get upstream before -rc1, so I was probably a bit hasty with these bits (I thought they had had more soak time). Sounds like things are all worked out now though... Thanks, Jesse --
This patch removes the to_pages macro from x86 GART code and calls the generic
iommu_num_pages function instead.
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
arch/x86/kernel/pci-gart_64.c | 11 ++++-------
1 files changed, 4 insertions(+), 7 deletions(-)
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 1062dc1..31bc068 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -67,9 +67,6 @@ static u32 gart_unmapped_entry;
(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
-#define to_pages(addr, size) \
- (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
-
#define EMERGENCY_PAGES 32 /* = 128KB */
#ifdef CONFIG_AGP
@@ -241,7 +238,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size)
static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem,
size_t size, int dir)
{
- unsigned long npages = to_pages(phys_mem, size);
+ unsigned long npages = iommu_num_pages(phys_mem, size);
unsigned long iommu_page = alloc_iommu(dev, npages);
int i;
@@ -304,7 +301,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
return;
iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT;
- npages = to_pages(dma_addr, size);
+ npages = iommu_num_pages(dma_addr, size);
for (i = 0; i < npages; i++) {
iommu_gatt_base[iommu_page + i] = gart_unmapped_entry;
CLEAR_LEAK(iommu_page + i);
@@ -387,7 +384,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start,
}
addr = phys_addr;
- pages = to_pages(s->offset, s->length);
+ pages = iommu_num_pages(s->offset, s->length);
while (pages--) {
iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr);
SET_LEAK(iommu_page);
@@ -470,7 +467,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
seg_size += s->length;
need = ...if it's straightforward and expected to work with a high likelyhood then please do it nevertheless. As long as it's finegrained patches we ought to be able to filter out broken changes. Ingo --
