VisionFive2 Linux kernel

StarFive Tech Linux Kernel for VisionFive (JH7110) boards (mirror)

More than 9999 Commits   32 Branches   54 Tags
author: Sean Paul <seanpaul@chromium.org> 2017-06-20 11:50:41 -0400 committer: Sean Paul <seanpaul@chromium.org> 2017-06-20 11:50:41 -0400 commit: b15cdca5b5de52dc2262c41917e8727b96b30fb0 parent: d4e0045c4ed300781d2d4cbab57d05ed5e665a37
Commit Summary:
Merge remote-tracking branch 'airlied/drm-next' into drm-misc-next-fixes
Diffstat:
11 files changed, 358 insertions, 146 deletions
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 561831e1ae2c..a048e3ac523d 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -40,6 +40,9 @@ struct host1x_subdev {
 
 /**
  * host1x_subdev_add() - add a new subdevice with an associated device node
+ * @device: host1x device to add the subdevice to
+ * @driver: host1x driver
+ * @np: device node
  */
 static int host1x_subdev_add(struct host1x_device *device,
 			     struct device_node *np)
@@ -62,6 +65,7 @@ static int host1x_subdev_add(struct host1x_device *device,
 
 /**
  * host1x_subdev_del() - remove subdevice
+ * @subdev: subdevice to remove
  */
 static void host1x_subdev_del(struct host1x_subdev *subdev)
 {
@@ -72,6 +76,8 @@ static void host1x_subdev_del(struct host1x_subdev *subdev)
 
 /**
  * host1x_device_parse_dt() - scan device tree and add matching subdevices
+ * @device: host1x logical device
+ * @driver: host1x driver
  */
 static int host1x_device_parse_dt(struct host1x_device *device,
 				  struct host1x_driver *driver)
@@ -166,6 +172,16 @@ static void host1x_subdev_unregister(struct host1x_device *device,
 	mutex_unlock(&device->subdevs_lock);
 }
 
+/**
+ * host1x_device_init() - initialize a host1x logical device
+ * @device: host1x logical device
+ *
+ * The driver for the host1x logical device can call this during execution of
+ * its &host1x_driver.probe implementation to initialize each of its clients.
+ * The client drivers access the subsystem specific driver data using the
+ * &host1x_client.parent field and driver data associated with it (usually by
+ * calling dev_get_drvdata()).
+ */
 int host1x_device_init(struct host1x_device *device)
 {
 	struct host1x_client *client;
@@ -192,6 +208,15 @@ int host1x_device_init(struct host1x_device *device)
 }
 EXPORT_SYMBOL(host1x_device_init);
 
+/**
+ * host1x_device_exit() - uninitialize host1x logical device
+ * @device: host1x logical device
+ *
+ * When the driver for a host1x logical device is unloaded, it can call this
+ * function to tear down each of its clients. Typically this is done after a
+ * subsystem-specific data structure is removed and the functionality can no
+ * longer be used.
+ */
 int host1x_device_exit(struct host1x_device *device)
 {
 	struct host1x_client *client;
@@ -446,6 +471,14 @@ static void host1x_detach_driver(struct host1x *host1x,
 	mutex_unlock(&host1x->devices_lock);
 }
 
+/**
+ * host1x_register() - register a host1x controller
+ * @host1x: host1x controller
+ *
+ * The host1x controller driver uses this to register a host1x controller with
+ * the infrastructure. Note that all Tegra SoC generations have only ever come
+ * with a single host1x instance, so this function is somewhat academic.
+ */
 int host1x_register(struct host1x *host1x)
 {
 	struct host1x_driver *driver;
@@ -464,6 +497,13 @@ int host1x_register(struct host1x *host1x)
 	return 0;
 }
 
+/**
+ * host1x_unregister() - unregister a host1x controller
+ * @host1x: host1x controller
+ *
+ * The host1x controller driver uses this to remove a host1x controller from
+ * the infrastructure.
+ */
 int host1x_unregister(struct host1x *host1x)
 {
 	struct host1x_driver *driver;
@@ -513,6 +553,16 @@ static void host1x_device_shutdown(struct device *dev)
 		driver->shutdown(device);
 }
 
+/**
+ * host1x_driver_register_full() - register a host1x driver
+ * @driver: host1x driver
+ * @owner: owner module
+ *
+ * Drivers for host1x logical devices call this function to register a driver
+ * with the infrastructure. Note that since these drive logical devices, the
+ * registration of the driver actually triggers tho logical device creation.
+ * A logical device will be created for each host1x instance.
+ */
 int host1x_driver_register_full(struct host1x_driver *driver,
 				struct module *owner)
 {
@@ -541,6 +591,13 @@ int host1x_driver_register_full(struct host1x_driver *driver,
 }
 EXPORT_SYMBOL(host1x_driver_register_full);
 
+/**
+ * host1x_driver_unregister() - unregister a host1x driver
+ * @driver: host1x driver
+ *
+ * Unbinds the driver from each of the host1x logical devices that it is
+ * bound to, effectively removing the subsystem devices that they represent.
+ */
 void host1x_driver_unregister(struct host1x_driver *driver)
 {
 	driver_unregister(&driver->driver);
@@ -551,6 +608,17 @@ void host1x_driver_unregister(struct host1x_driver *driver)
 }
 EXPORT_SYMBOL(host1x_driver_unregister);
 
+/**
+ * host1x_client_register() - register a host1x client
+ * @client: host1x client
+ *
+ * Registers a host1x client with each host1x controller instance. Note that
+ * each client will only match their parent host1x controller and will only be
+ * associated with that instance. Once all clients have been registered with
+ * their parent host1x controller, the infrastructure will set up the logical
+ * device and call host1x_device_init(), which will in turn call each client's
+ * &host1x_client_ops.init implementation.
+ */
 int host1x_client_register(struct host1x_client *client)
 {
 	struct host1x *host1x;
@@ -576,6 +644,13 @@ int host1x_client_register(struct host1x_client *client)
 }
 EXPORT_SYMBOL(host1x_client_register);
 
+/**
+ * host1x_client_unregister() - unregister a host1x client
+ * @client: host1x client
+ *
+ * Removes a host1x client from its host1x controller instance. If a logical
+ * device has already been initialized, it will be torn down.
+ */
 int host1x_client_unregister(struct host1x_client *client)
 {
 	struct host1x_client *c;
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index ec170a78f4e1..286d49386be9 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -88,7 +88,6 @@ struct host1x_cdma {
 
 int host1x_cdma_init(struct host1x_cdma *cdma);
 int host1x_cdma_deinit(struct host1x_cdma *cdma);
-void host1x_cdma_stop(struct host1x_cdma *cdma);
 int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job);
 void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2);
 void host1x_cdma_end(struct host1x_cdma *cdma, struct host1x_job *job);
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 8f437d924c10..db9b91d1384c 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -24,19 +24,33 @@
 #include "job.h"
 
 /* Constructor for the host1x device list */
-int host1x_channel_list_init(struct host1x *host)
+int host1x_channel_list_init(struct host1x_channel_list *chlist,
+			     unsigned int num_channels)
 {
-	INIT_LIST_HEAD(&host->chlist.list);
-	mutex_init(&host->chlist_mutex);
-
-	if (host->info->nb_channels > BITS_PER_LONG) {
-		WARN(1, "host1x hardware has more channels than supported by the driver\n");
-		return -ENOSYS;
+	chlist->channels = kcalloc(num_channels, sizeof(struct host1x_channel),
+				   GFP_KERNEL);
+	if (!chlist->channels)
+		return -ENOMEM;
+
+	chlist->allocated_channels =
+		kcalloc(BITS_TO_LONGS(num_channels), sizeof(unsigned long),
+			GFP_KERNEL);
+	if (!chlist->allocated_channels) {
+		kfree(chlist->channels);
+		return -ENOMEM;
 	}
 
+	bitmap_zero(chlist->allocated_channels, num_channels);
+
 	return 0;
 }
 
+void host1x_channel_list_free(struct host1x_channel_list *chlist)
+{
+	kfree(chlist->allocated_channels);
+	kfree(chlist->channels);
+}
+
 int host1x_job_submit(struct host1x_job *job)
 {
 	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
@@ -47,86 +61,107 @@ EXPORT_SYMBOL(host1x_job_submit);
 
 struct host1x_channel *host1x_channel_get(struct host1x_channel *channel)
 {
-	int err = 0;
+	kref_get(&channel->refcount);
 
-	mutex_lock(&channel->reflock);
+	return channel;
+}
+EXPORT_SYMBOL(host1x_channel_get);
 
-	if (channel->refcount == 0)
-		err = host1x_cdma_init(&channel->cdma);
+/**
+ * host1x_channel_get_index() - Attempt to get channel reference by index
+ * @host: Host1x device object
+ * @index: Index of channel
+ *
+ * If channel number @index is currently allocated, increase its refcount
+ * and return a pointer to it. Otherwise, return NULL.
+ */
+struct host1x_channel *host1x_channel_get_index(struct host1x *host,
+						unsigned int index)
+{
+	struct host1x_channel *ch = &host->channel_list.channels[index];
 
-	if (!err)
-		channel->refcount++;
+	if (!kref_get_unless_zero(&ch->refcount))
+		return NULL;
 
-	mutex_unlock(&channel->reflock);
+	return ch;
+}
+
+static void release_channel(struct kref *kref)
+{
+	struct host1x_channel *channel =
+		container_of(kref, struct host1x_channel, refcount);
+	struct host1x *host = dev_get_drvdata(channel->dev->parent);
+	struct host1x_channel_list *chlist = &host->channel_list;
+
+	host1x_hw_cdma_stop(host, &channel->cdma);
+	host1x_cdma_deinit(&channel->cdma);
 
-	return err ? NULL : channel;
+	clear_bit(channel->id, chlist->allocated_channels);
 }
-EXPORT_SYMBOL(host1x_channel_get);
 
 void host1x_channel_put(struct host1x_channel *channel)
 {
-	mutex_lock(&channel->reflock);
+	kref_put(&channel->refcount, release_channel);
+}
+EXPORT_SYMBOL(host1x_channel_put);
 
-	if (channel->refcount == 1) {
-		struct host1x *host = dev_get_drvdata(channel->dev->parent);
+static struct host1x_channel *acquire_unused_channel(struct host1x *host)
+{
+	struct host1x_channel_list *chlist = &host->channel_list;
+	unsigned int max_channels = host->info->nb_channels;
+	unsigned int index;
 
-		host1x_hw_cdma_stop(host, &channel->cdma);
-		host1x_cdma_deinit(&channel->cdma);
+	index = find_first_zero_bit(chlist->allocated_channels, max_channels);
+	if (index >= max_channels) {
+		dev_err(host->dev, "failed to find free channel\n");
+		return NULL;
 	}
 
-	channel->refcount--;
+	chlist->channels[index].id = index;
 
-	mutex_unlock(&channel->reflock);
+	set_bit(index, chlist->allocated_channels);
+
+	return &chlist->channels[index];
 }
-EXPORT_SYMBOL(host1x_channel_put);
 
+/**
+ * host1x_channel_request() - Allocate a channel
+ * @device: Host1x unit this channel will be used to send commands to
+ *
+ * Allocates a new host1x channel for @device. If there are no free channels,
+ * this will sleep until one becomes available. May return NULL if CDMA
+ * initialization fails.
+ */
 struct host1x_channel *host1x_channel_request(struct device *dev)
 {
 	struct host1x *host = dev_get_drvdata(dev->parent);
-	unsigned int max_channels = host->info->nb_channels;
-	struct host1x_channel *channel = NULL;
-	unsigned long index;
+	struct host1x_channel_list *chlist = &host->channel_list;
+	struct host1x_channel *channel;
 	int err;
 
-	mutex_lock(&host->chlist_mutex);
+	channel = acquire_unused_channel(host);
+	if (!channel)
+		return NULL;
 
-	index = find_first_zero_bit(&host->allocated_channels, max_channels);
-	if (index >= max_channels)
-		goto fail;
+	kref_init(&channel->refcount);
+	mutex_init(&channel->submitlock);
+	channel->dev = dev;
 
-	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
-	if (!channel)
+	err = host1x_hw_channel_init(host, channel, channel->id);
+	if (err < 0)
 		goto fail;
 
-	err = host1x_hw_channel_init(host, channel, index);
+	err = host1x_cdma_init(&channel->cdma);
 	if (err < 0)
 		goto fail;
 
-	/* Link device to host1x_channel */
-	channel->dev = dev;
-
-	/* Add to channel list */
-	list_add_tail(&channel->list, &host->chlist.list);
-
-	host->allocated_channels |= BIT(index);
-
-	mutex_unlock(&host->chlist_mutex);
 	return channel;
 
 fail:
-	dev_err(dev, "failed to init channel\n");
-	kfree(channel);
-	mutex_unlock(&host->chlist_mutex);
-	return NULL;
-}
-EXPORT_SYMBOL(host1x_channel_request);
+	clear_bit(channel->id, chlist->allocated_channels);
 
-void host1x_channel_free(struct host1x_channel *channel)
-{
-	struct host1x *host = dev_get_drvdata(channel->dev->parent);
+	dev_err(dev, "failed to initialize channel\n");
 
-	host->allocated_channels &= ~BIT(channel->id);
-	list_del(&channel->list);
-	kfree(channel);
+	return NULL;
 }
-EXPORT_SYMBOL(host1x_channel_free);
+EXPORT_SYMBOL(host1x_channel_request);
diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
index df767cf90d51..7068e42d42df 100644
--- a/drivers/gpu/host1x/channel.h
+++ b/drivers/gpu/host1x/channel.h
@@ -20,17 +20,21 @@
 #define __HOST1X_CHANNEL_H
 
 #include <linux/io.h>
+#include <linux/kref.h>
 
 #include "cdma.h"
 
 struct host1x;
+struct host1x_channel;
 
-struct host1x_channel {
-	struct list_head list;
+struct host1x_channel_list {
+	struct host1x_channel *channels;
+	unsigned long *allocated_channels;
+};
 
-	unsigned int refcount;
+struct host1x_channel {
+	struct kref refcount;
 	unsigned int id;
-	struct mutex reflock;
 	struct mutex submitlock;
 	void __iomem *regs;
 	struct device *dev;
@@ -38,9 +42,10 @@ struct host1x_channel {
 };
 
 /* channel list operations */
-int host1x_channel_list_init(struct host1x *host);
-
-#define host1x_for_each_channel(host, channel)				\
-	list_for_each_entry(channel, &host->chlist.list, list)
+int host1x_channel_list_init(struct host1x_channel_list *chlist,
+			     unsigned int num_channels);
+void host1x_channel_list_free(struct host1x_channel_list *chlist);
+struct host1x_channel *host1x_channel_get_index(struct host1x *host,
+						unsigned int index);
 
 #endif
diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index d9330fcc62ad..2aae0e63214c 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -43,24 +43,19 @@ void host1x_debug_output(struct output *o, const char *fmt, ...)
 	o->fn(o->ctx, o->buf, len);
 }
 
-static int show_channels(struct host1x_channel *ch, void *data, bool show_fifo)
+static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
 {
 	struct host1x *m = dev_get_drvdata(ch->dev->parent);
 	struct output *o = data;
 
-	mutex_lock(&ch->reflock);
+	mutex_lock(&ch->cdma.lock);
 
-	if (ch->refcount) {
-		mutex_lock(&ch->cdma.lock);
+	if (show_fifo)
+		host1x_hw_show_channel_fifo(m, ch, o);
 
-		if (show_fifo)
-			host1x_hw_show_channel_fifo(m, ch, o);
+	host1x_hw_show_channel_cdma(m, ch, o);
 
-		host1x_hw_show_channel_cdma(m, ch, o);
-		mutex_unlock(&ch->cdma.lock);
-	}
-
-	mutex_unlock(&ch->reflock);
+	mutex_unlock(&ch->cdma.lock);
 
 	return 0;
 }
@@ -94,28 +89,22 @@ static void show_syncpts(struct host1x *m, struct output *o)
 	host1x_debug_output(o, "\n");
 }
 
-static void show_all(struct host1x *m, struct output *o)
+static void show_all(struct host1x *m, struct output *o, bool show_fifo)
 {
-	struct host1x_channel *ch;
+	int i;
 
 	host1x_hw_show_mlocks(m, o);
 	show_syncpts(m, o);
 	host1x_debug_output(o, "---- channels ----\n");
 
-	host1x_for_each_channel(m, ch)
-		show_channels(ch, o, true);
-}
-
-static void show_all_no_fifo(struct host1x *host1x, struct output *o)
-{
-	struct host1x_channel *ch;
-
-	host1x_hw_show_mlocks(host1x, o);
-	show_syncpts(host1x, o);
-	host1x_debug_output(o, "---- channels ----\n");
+	for (i = 0; i < m->info->nb_channels; ++i) {
+		struct host1x_channel *ch = host1x_channel_get_index(m, i);
 
-	host1x_for_each_channel(host1x, ch)
-		show_channels(ch, o, false);
+		if (ch) {
+			show_channel(ch, o, show_fifo);
+			host1x_channel_put(ch);
+		}
+	}
 }
 
 static int host1x_debug_show_all(struct seq_file *s, void *unused)
@@ -125,7 +114,7 @@ static int host1x_debug_show_all(struct seq_file *s, void *unused)
 		.ctx = s
 	};
 
-	show_all(s->private, &o);
+	show_all(s->private, &o, true);
 
 	return 0;
 }
@@ -137,7 +126,7 @@ static int host1x_debug_show(struct seq_file *s, void *unused)
 		.ctx = s
 	};
 
-	show_all_no_fifo(s->private, &o);
+	show_all(s->private, &o, false);
 
 	return 0;
 }
@@ -216,7 +205,7 @@ void host1x_debug_dump(struct host1x *host1x)
 		.fn = write_to_printk
 	};
 
-	show_all(host1x, &o);
+	show_all(host1x, &o, true);
 }
 
 void host1x_debug_dump_syncpts(struct host1x *host1x)
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index ac65f52850a6..2c58a390123a 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -198,7 +198,8 @@ static int host1x_probe(struct platform_device *pdev)
 		host->iova_end = geometry->aperture_end;
 	}
 
-	err = host1x_channel_list_init(host);
+	err = host1x_channel_list_init(&host->channel_list,
+				       host->info->nb_channels);
 	if (err) {
 		dev_err(&pdev->dev, "failed to initialize channel list\n");
 		goto fail_detach_device;
@@ -207,7 +208,7 @@ static int host1x_probe(struct platform_device *pdev)
 	err = clk_prepare_enable(host->clk);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to enable clock\n");
-		goto fail_detach_device;
+		goto fail_free_channels;
 	}
 
 	err = reset_control_deassert(host->rst);
@@ -244,6 +245,8 @@ fail_reset_assert:
 	reset_control_assert(host->rst);
 fail_unprepare_disable:
 	clk_disable_unprepare(host->clk);
+fail_free_channels:
+	host1x_channel_list_free(&host->channel_list);
 fail_detach_device:
 	if (host->domain) {
 		put_iova_domain(&host->iova);
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 229d08b6a45e..ffdbc15b749b 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -129,10 +129,8 @@ struct host1x {
 	struct host1x_syncpt *nop_sp;
 
 	struct mutex syncpt_mutex;
-	struct mutex chlist_mutex;
-	struct host1x_channel chlist;
-	unsigned long allocated_channels;
-	unsigned int num_allocated_channels;
+
+	struct host1x_channel_list channel_list;
 
 	struct dentry *debugfs;
 
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 5e8df78b7acd..8447a56c41ca 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -181,10 +181,6 @@ error:
 static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
 			       unsigned int index)
 {
-	ch->id = index;
-	mutex_init(&ch->reflock);
-	mutex_init(&ch->submitlock);
-
 	ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
 	return 0;
 }
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 5f5f8ee6143d..bee504406cfc 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -31,6 +31,8 @@
 #include "job.h"
 #include "syncpt.h"
 
+#define HOST1X_WAIT_SYNCPT_OFFSET 0x8
+
 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
 				    u32 num_cmdbufs, u32 num_relocs,
 				    u32 num_waitchks)
@@ -137,8 +139,9 @@ static void host1x_syncpt_patch_offset(struct host1x_syncpt *sp,
  * avoid a wrap condition in the HW).
  */
 static int do_waitchks(struct host1x_job *job, struct host1x *host,
-		       struct host1x_bo *patch)
+		       struct host1x_job_gather *g)
 {
+	struct host1x_bo *patch = g->bo;
 	int i;
 
 	/* compare syncpt vs wait threshold */
@@ -165,7 +168,8 @@ static int do_waitchks(struct host1x_job *job, struct host1x *host,
 				wait->syncpt_id, sp->name, wait->thresh,
 				host1x_syncpt_read_min(sp));
 
-			host1x_syncpt_patch_offset(sp, patch, wait->offset);
+			host1x_syncpt_patch_offset(sp, patch,
+						   g->offset + wait->offset);
 		}
 
 		wait->bo = NULL;
@@ -269,11 +273,12 @@ unpin:
 	return err;
 }
 
-static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
+static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
 {
 	int i = 0;
 	u32 last_page = ~0;
 	void *cmdbuf_page_addr = NULL;
+	struct host1x_bo *cmdbuf = g->bo;
 
 	/* pin & patch the relocs for one gather */
 	for (i = 0; i < job->num_relocs; i++) {
@@ -286,6 +291,13 @@ static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
 		if (cmdbuf != reloc->cmdbuf.bo)
 			continue;
 
+		if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
+			target = (u32 *)job->gather_copy_mapped +
+					reloc->cmdbuf.offset / sizeof(u32) +
+						g->offset / sizeof(u32);
+			goto patch_reloc;
+		}
+
 		if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) {
 			if (cmdbuf_page_addr)
 				host1x_bo_kunmap(cmdbuf, last_page,
@@ -302,6 +314,7 @@ static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
 		}
 
 		target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK);
+patch_reloc:
 		*target = reloc_addr;
 	}
 
@@ -319,6 +332,21 @@ static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
 	if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
 		return false;
 
+	/* relocation shift value validation isn't implemented yet */
+	if (reloc->shift)
+		return false;
+
+	return true;
+}
+
+static bool check_wait(struct host1x_waitchk *wait, struct host1x_bo *cmdbuf,
+		       unsigned int offset)
+{
+	offset *= sizeof(u32);
+
+	if (wait->bo != cmdbuf || wait->offset != offset)
+		return false;
+
 	return true;
 }
 
@@ -329,6 +357,9 @@ struct host1x_firewall {
 	unsigned int num_relocs;
 	struct host1x_reloc *reloc;
 
+	unsigned int num_waitchks;
+	struct host1x_waitchk *waitchk;
+
 	struct host1x_bo *cmdbuf;
 	unsigned int offset;
 
@@ -341,6 +372,9 @@ struct host1x_firewall {
 
 static int check_register(struct host1x_firewall *fw, unsigned long offset)
 {
+	if (!fw->job->is_addr_reg)
+		return 0;
+
 	if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
 		if (!fw->num_relocs)
 			return -EINVAL;
@@ -352,6 +386,33 @@ static int check_register(struct host1x_firewall *fw, unsigned long offset)
 		fw->reloc++;
 	}
 
+	if (offset == HOST1X_WAIT_SYNCPT_OFFSET) {
+		if (fw->class != HOST1X_CLASS_HOST1X)
+			return -EINVAL;
+
+		if (!fw->num_waitchks)
+			return -EINVAL;
+
+		if (!check_wait(fw->waitchk, fw->cmdbuf, fw->offset))
+			return -EINVAL;
+
+		fw->num_waitchks--;
+		fw->waitchk++;
+	}
+
+	return 0;
+}
+
+static int check_class(struct host1x_firewall *fw, u32 class)
+{
+	if (!fw->job->is_valid_class) {
+		if (fw->class != class)
+			return -EINVAL;
+	} else {
+		if (!fw->job->is_valid_class(fw->class))
+			return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -428,11 +489,9 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
 {
 	u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
 		(g->offset / sizeof(u32));
+	u32 job_class = fw->class;
 	int err = 0;
 
-	if (!fw->job->is_addr_reg)
-		return 0;
-
 	fw->words = g->words;
 	fw->cmdbuf = g->bo;
 	fw->offset = 0;
@@ -452,7 +511,9 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
 			fw->class = word >> 6 & 0x3ff;
 			fw->mask = word & 0x3f;
 			fw->reg = word >> 16 & 0xfff;
-			err = check_mask(fw);
+			err = check_class(fw, job_class);
+			if (!err)
+				err = check_mask(fw);
 			if (err)
 				goto out;
 			break;
@@ -480,7 +541,6 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
 				goto out;
 			break;
 		case 4:
-		case 5:
 		case 14:
 			break;
 		default:
@@ -504,7 +564,9 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 	fw.dev = dev;
 	fw.reloc = job->relocarray;
 	fw.num_relocs = job->num_relocs;
-	fw.class = 0;
+	fw.waitchk = job->waitchk;
+	fw.num_waitchks = job->num_waitchk;
+	fw.class = job->class;
 
 	for (i = 0; i < job->num_gathers; i++) {
 		struct host1x_job_gather *g = &job->gathers[i];
@@ -512,12 +574,20 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 		size += g->words * sizeof(u32);
 	}
 
+	/*
+	 * Try a non-blocking allocation from a higher priority pools first,
+	 * as awaiting for the allocation here is a major performance hit.
+	 */
 	job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
-					       GFP_KERNEL);
-	if (!job->gather_copy_mapped) {
-		job->gather_copy_mapped = NULL;
+					       GFP_NOWAIT);
+
+	/* the higher priority allocation failed, try the generic-blocking */
+	if (!job->gather_copy_mapped)
+		job->gather_copy_mapped = dma_alloc_wc(dev, size,
+						       &job->gather_copy,
+						       GFP_KERNEL);
+	if (!job->gather_copy_mapped)
 		return -ENOMEM;
-	}
 
 	job->gather_copy_size = size;
 
@@ -542,8 +612,8 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 		offset += g->words * sizeof(u32);
 	}
 
-	/* No relocs should remain at this point */
-	if (fw.num_relocs)
+	/* No relocs and waitchks should remain at this point */
+	if (fw.num_relocs || fw.num_waitchks)
 		return -EINVAL;
 
 	return 0;
@@ -573,6 +643,12 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 	if (err)
 		goto out;
 
+	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
+		err = copy_gathers(job, dev);
+		if (err)
+			goto out;
+	}
+
 	/* patch gathers */
 	for (i = 0; i < job->num_gathers; i++) {
 		struct host1x_job_gather *g = &job->gathers[i];
@@ -581,7 +657,9 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 		if (g->handled)
 			continue;
 
-		g->base = job->gather_addr_phys[i];
+		/* copy_gathers() sets gathers base if firewall is enabled */
+		if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+			g->base = job->gather_addr_phys[i];
 
 		for (j = i + 1; j < job->num_gathers; j++) {
 			if (job->gathers[j].bo == g->bo) {
@@ -590,24 +668,18 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 			}
 		}
 
-		err = do_relocs(job, g->bo);
+		err = do_relocs(job, g);
 		if (err)
 			break;
 
-		err = do_waitchks(job, host, g->bo);
+		err = do_waitchks(job, host, g);
 		if (err)
 			break;
 	}
 
-	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !err) {
-		err = copy_gathers(job, dev);
-		if (err) {
-			host1x_job_unpin(job);
-			return err;
-		}
-	}
-
 out:
+	if (err)
+		host1x_job_unpin(job);
 	wmb();
 
 	return err;
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index 878239c476d2..4bda51d503ec 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -27,20 +27,6 @@ struct host1x_job_gather {
 	bool handled;
 };
 
-struct host1x_cmdbuf {
-	u32 handle;
-	u32 offset;
-	u32 words;
-	u32 pad;
-};
-
-struct host1x_waitchk {
-	struct host1x_bo *bo;
-	u32 offset;
-	u32 syncpt_id;
-	u32 thresh;
-};
-
 struct host1x_job_unpin_data {
 	struct host1x_bo *bo;
 	struct sg_table *sgt;
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 0ac026cdc30c..048ac9e344ce 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -99,14 +99,24 @@ unlock:
 	return NULL;
 }
 
+/**
+ * host1x_syncpt_id() - retrieve syncpoint ID
+ * @sp: host1x syncpoint
+ *
+ * Given a pointer to a struct host1x_syncpt, retrieves its ID. This ID is
+ * often used as a value to program into registers that control how hardware
+ * blocks interact with syncpoints.
+ */
 u32 host1x_syncpt_id(struct host1x_syncpt *sp)
 {
 	return sp->id;
 }
 EXPORT_SYMBOL(host1x_syncpt_id);
 
-/*
- * Updates the value sent to hardware.
+/**
+ * host1x_syncpt_incr_max() - update the value sent to hardware
+ * @sp: host1x syncpoint
+ * @incrs: number of increments
  */
 u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs)
 {
@@ -175,8 +185,9 @@ u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp)
 	return sp->base_val;
 }
 
-/*
- * Increment syncpoint value from cpu, updating cache
+/**
+ * host1x_syncpt_incr() - increment syncpoint value from CPU, updating cache
+ * @sp: host1x syncpoint
  */
 int host1x_syncpt_incr(struct host1x_syncpt *sp)
 {
@@ -195,8 +206,12 @@ static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
 	return host1x_syncpt_is_expired(sp, thresh);
 }
 
-/*
- * Main entrypoint for syncpoint value waits.
+/**
+ * host1x_syncpt_wait() - wait for a syncpoint to reach a given value
+ * @sp: host1x syncpoint
+ * @thresh: threshold
+ * @timeout: maximum time to wait for the syncpoint to reach the given value
+ * @value: return location for the syncpoint value
  */
 int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
 		       u32 *value)
@@ -402,6 +417,16 @@ int host1x_syncpt_init(struct host1x *host)
 	return 0;
 }
 
+/**
+ * host1x_syncpt_request() - request a syncpoint
+ * @dev: device requesting the syncpoint
+ * @flags: flags
+ *
+ * host1x client drivers can use this function to allocate a syncpoint for
+ * subsequent use. A syncpoint returned by this function will be reserved for
+ * use by the client exclusively. When no longer using a syncpoint, a host1x
+ * client driver needs to release it using host1x_syncpt_free().
+ */
 struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
 					    unsigned long flags)
 {
@@ -411,6 +436,16 @@ struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
 }
 EXPORT_SYMBOL(host1x_syncpt_request);
 
+/**
+ * host1x_syncpt_free() - free a requested syncpoint
+ * @sp: host1x syncpoint
+ *
+ * Release a syncpoint previously allocated using host1x_syncpt_request(). A
+ * host1x client driver should call this when the syncpoint is no longer in
+ * use. Note that client drivers must ensure that the syncpoint doesn't remain
+ * under the control of hardware after calling this function, otherwise two
+ * clients may end up trying to access the same syncpoint concurrently.
+ */
 void host1x_syncpt_free(struct host1x_syncpt *sp)
 {
 	if (!sp)
@@ -438,9 +473,12 @@ void host1x_syncpt_deinit(struct host1x *host)
 		kfree(sp->name);
 }
 
-/*
- * Read max. It indicates how many operations there are in queue, either in
- * channel or in a software thread.
+/**
+ * host1x_syncpt_read_max() - read maximum syncpoint value
+ * @sp: host1x syncpoint
+ *
+ * The maximum syncpoint value indicates how many operations there are in
+ * queue, either in channel or in a software thread.
  */
 u32 host1x_syncpt_read_max(struct host1x_syncpt *sp)
 {
@@ -450,8 +488,12 @@ u32 host1x_syncpt_read_max(struct host1x_syncpt *sp)
 }
 EXPORT_SYMBOL(host1x_syncpt_read_max);
 
-/*
- * Read min, which is a shadow of the current sync point value in hardware.
+/**
+ * host1x_syncpt_read_min() - read minimum syncpoint value
+ * @sp: host1x syncpoint
+ *
+ * The minimum syncpoint value is a shadow of the current sync point value in
+ * hardware.
  */
 u32 host1x_syncpt_read_min(struct host1x_syncpt *sp)
 {
@@ -461,6 +503,10 @@ u32 host1x_syncpt_read_min(struct host1x_syncpt *sp)
 }
 EXPORT_SYMBOL(host1x_syncpt_read_min);
 
+/**
+ * host1x_syncpt_read() - read the current syncpoint value
+ * @sp: host1x syncpoint
+ */
 u32 host1x_syncpt_read(struct host1x_syncpt *sp)
 {
 	return host1x_syncpt_load(sp);
@@ -482,6 +528,11 @@ unsigned int host1x_syncpt_nb_mlocks(struct host1x *host)
 	return host->info->nb_mlocks;
 }
 
+/**
+ * host1x_syncpt_get() - obtain a syncpoint by ID
+ * @host: host1x controller
+ * @id: syncpoint ID
+ */
 struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, unsigned int id)
 {
 	if (id >= host->info->nb_pts)
@@ -491,12 +542,20 @@ struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, unsigned int id)
 }
 EXPORT_SYMBOL(host1x_syncpt_get);
 
+/**
+ * host1x_syncpt_get_base() - obtain the wait base associated with a syncpoint
+ * @sp: host1x syncpoint
+ */
 struct host1x_syncpt_base *host1x_syncpt_get_base(struct host1x_syncpt *sp)
 {
 	return sp ? sp->base : NULL;
 }
 EXPORT_SYMBOL(host1x_syncpt_get_base);
 
+/**
+ * host1x_syncpt_base_id() - retrieve the ID of a syncpoint wait base
+ * @base: host1x syncpoint wait base
+ */
 u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base)
 {
 	return base->id;