/*
 * TUX - Integrated Application Protocols Layer and Object Cache
 *
 * Copyright (C) 2000, 2001, Ingo Molnar <mingo@redhat.com>
 *
 * input.c: handle requests arriving on accepted connections
 */

#include <net/tux.h>
#include <linux/kmod.h>

void zap_request (tux_req_t *req, int cachemiss)
{
	if (!cachemiss && (req->error == 3)) {
		/*
		 * Zap connection as fast as possible, there is
		 * no valid client connection anymore:
		 */
		clear_keepalive(req);
		flush_request(req, 0);
	} else {
		if (req->error == 3)
			add_tux_atom(req, flush_request);
		else
			/*
			 * Potentially redirect to the secondary server:
			 */
			add_tux_atom(req, redirect_request);
		add_req_to_workqueue(req);
	}
}

static struct dentry * __tux_lookup (const char *filename,
			 struct nameidata *base, const unsigned int flags)
{
	int err;

	if (!base)
		TUX_BUG();
	mntget(base->mnt);

	if ((err = path_walk(filename, base))) {
		Dprintk("path_walk() returned with %d!\n", err);
		return ERR_PTR(err);
	}
	mntput(base->mnt);
	return base->dentry;
}

int url_permission (struct inode *inode)
{
	umode_t mode;
	int err;

	mode = inode->i_mode;
	Dprintk("URL inode mode: %08x.\n", mode);

	if (!S_ISREG(mode))
		return -1;

	/*
	 * Paranoia: first forbid things, then maybe allow.
	 * Only regular files allowed.
	 */
	if (mode & tux_mode_forbidden)
		return -2;
	/*
	 * at least one bit in the 'allowed' set has to
	 * be present to allow access.
	 */
	if (!(mode & tux_mode_allowed))
		return -3;
	err = permission(inode,MAY_READ);
	return err;
}

struct dentry * tux_lookup (tux_req_t *req, const char *filename,
				const unsigned int flag)
{
	struct dentry *dentry;
	struct nameidata base;

	if ((req->objectname[0] == '/') && req->cwd) {
		dput(req->cwd);
		mntput(req->cwdmnt);
		req->cwd = NULL;
		req->cwdmnt = NULL;
	}
	if (!req->cwd) {
		req->cwd = dget(docroot.dentry);
		req->cwdmnt = mntget(docroot.mnt);
	}

	base.flags = LOOKUP_POSITIVE|LOOKUP_FOLLOW|flag;
	base.last_type = LAST_ROOT;
	base.dentry = req->cwd;
	dget(base.dentry);
	base.mnt = req->cwdmnt;

	if (virtual_server && req->host_len) {
		dentry = __tux_lookup (req->host, &base, flag);
		if (dentry && !IS_ERR(dentry))
			dentry = __tux_lookup (filename, &base, flag);
	} else
		dentry = __tux_lookup (filename, &base, flag);

	Dprintk("looked up {%s} == dentry %p.\n", filename, dentry);

	if (dentry && !IS_ERR(dentry) && !dentry->d_inode)
		TUX_BUG();
	return dentry;
}

void install_req_dentry (tux_req_t *req, struct dentry *dentry)
{
	if (req->dentry)
		TUX_BUG();
	req->dentry = dentry;
	if (req->in_file.f_dentry)
		TUX_BUG();
	if (dentry)
		init_private_file(&req->in_file, dentry, FMODE_READ);
}

void release_req_dentry (tux_req_t *req)
{
	if (!req->dentry) {
		if (req->in_file.f_dentry)
			TUX_BUG();
		return;
	}
	if (req->in_file.f_op && req->in_file.f_op->release)
		req->in_file.f_op->release(req->dentry->d_inode, &req->in_file);
	memset(&req->in_file, 0, sizeof(req->in_file));

	dput(req->dentry);
	req->dentry = NULL;
}

int lookup_url (tux_req_t *req, const unsigned int flag)
{
	int perm = 0, i;
	struct dentry *dentry = NULL;
	struct inode *inode;
	const char *filename;

repeat_lookup:
	if (req->dentry)
		TUX_BUG();

	filename = req->objectname;
	Dprintk("will look up {%s} (%d)\n", filename, req->objectname_len);
	Dprintk("current->fsuid: %d, current->fsgid: %d, ngroups: %d\n",
		current->fsuid, current->fsgid, current->ngroups);
	for (i = 0; i < current->ngroups; i++)
		Dprintk(".. group #%d: %d.\n", i, current->groups[i]);

	dentry = tux_lookup(req, filename, flag);

	if (!dentry || IS_ERR(dentry)) {
		if (PTR_ERR(dentry) == -EWOULDBLOCKIO)
			goto cachemiss;

		if (!req->lookup_404) {
			int len = strlen(tux_404_page);
			memcpy(req->objectname, tux_404_page, len);
			req->objectname[len] = 0;
			req->objectname_len = len;
			req->lookup_404 = 1;
			req->status = 404;
			goto repeat_lookup;
		}
		Dprintk("abort - lookup error.\n");
		goto abort;
	}

	Dprintk("SUCCESS, looked up {%s} == dentry %p (inode %p, count %d.)\n", filename, dentry, dentry->d_inode, atomic_read(&dentry->d_count));
	inode = dentry->d_inode;

	/*
	 * At this point we have a real, non-negative dentry.
	 */
	perm = url_permission(inode);

	if (perm < 0) {
		Dprintk("FAILED trusted dentry %p permission %d.\n", dentry, perm);
#define INDEX "/index.html"
		if (S_ISDIR(dentry->d_inode->i_mode) && !req->lookup_dir &&
				(req->objectname_len + sizeof(INDEX) < 
							MAX_OBJECTNAME_LEN)) {
			if (req->objectname_len && (req->objectname[req->objectname_len-1] != '/')) {
				dput(dentry);
				return 2;
			}
			memcpy(req->objectname + req->objectname_len,
							INDEX, sizeof(INDEX));
			req->objectname_len += sizeof(INDEX)-1;
			req->lookup_dir = 1;
			dput(dentry);
			goto repeat_lookup;
		}
		req->status = 403;
		goto abort;
	}
	if (tux_max_object_size && (inode->i_size > tux_max_object_size)) {
		Dprintk("too big object, %d bytes.\n", (int)inode->i_size);
		req->status = 403;
		goto abort;
	}
	req->filelen = inode->i_size;

	Dprintk("looked up cached dentry %p, (count %d.)\n", dentry, dentry ? atomic_read(&dentry->d_count) : -1 );

	url_hist_hit(req->filelen);
out:
	install_req_dentry(req, dentry);
	return 0;

cachemiss:
	return 1;

abort:
	if (dentry) {
		if (!IS_ERR(dentry))
			dput(dentry);
		dentry = NULL;
	}
	TDprintk("req %p has lookup errors!\n", req);
	req_err(req);
	goto out;
}

int connection_too_fast (tux_req_t *req)
{
	unsigned long curr_bw, delta, bytes;

	if (!tux_max_output_bandwidth)
		return 1;
	bytes = req->total_bytes + req->bytes_sent;
	if (!bytes)
		return 1;

	delta = jiffies - req->first_timestamp;
	if (!delta)
		delta++;
	curr_bw = bytes * HZ / delta;

	if (curr_bw > tux_max_output_bandwidth)
		return 2;
	return 0;
}

void unidle_req (tux_req_t *req)
{
	threadinfo_t *ti = req->ti;

	Dprintk("UNIDLE req %p <%p> (sock %p, sk %p) (keepalive: %d, status: %d)\n", req, __builtin_return_address(0), req->sock, req->sock->sk, req->keep_alive, req->status);
	spin_lock_irq(&ti->work_lock);
	if (req->magic != TUX_MAGIC)
		TUX_BUG();
	if (!test_and_clear_bit(0, &req->idle_input)) {
		Dprintk("unidling %p, wasnt idle!\n", req);
		if (list_empty(&req->work))
			TUX_BUG();
		list_del(&req->work);
		DEBUG_DEL_LIST(&req->work);
		DEC_STAT(nr_work_pending);
	} else {
		del_keepalive_timer(req);
		DEC_STAT(nr_idle_input_pending);
		Dprintk("unidled %p.\n", req);
	}
	if (req->idle_input)
		TUX_BUG();
	spin_unlock_irq(&ti->work_lock);
}

#define GOTO_INCOMPLETE do { Dprintk("incomplete at %s:%d.\n", __FILE__, __LINE__); goto incomplete; } while (0)
#define GOTO_REDIRECT do { TDprintk("redirect at %s:%d.\n", __FILE__, __LINE__); goto redirect; } while (0)
#define GOTO_REDIRECT_NONIDLE do { TDprintk("redirect at %s:%d.\n", __FILE__, __LINE__); goto redirect_nonidle; } while (0)

static int read_request (struct socket *sock, char *buf, int max_size)
{
	mm_segment_t oldmm;
	struct msghdr msg;
	struct iovec iov;
	int len;

	msg.msg_name     = 0;
	msg.msg_namelen  = 0;
	msg.msg_iov	 = &iov;
	msg.msg_iovlen   = 1;
	msg.msg_control  = NULL;
	msg.msg_controllen = 0;
	msg.msg_flags    = 0;
	
	msg.msg_iov->iov_base = buf;
	msg.msg_iov->iov_len  = max_size;
	
	oldmm = get_fs(); set_fs(KERNEL_DS);

read_again:
	len = sock->sk->prot->recvmsg(sock->sk, &msg, max_size,
					MSG_DONTWAIT, MSG_PEEK, NULL);

	/*
	 * We must not get a signal inbetween
	 */
	if ((len == -EAGAIN) || (len == -ERESTARTSYS)) {
		if (!signal_pending(current)) {
			len = 0;
			goto out;
		}
		reap_kids();
		goto read_again;
	}
out:
	set_fs(oldmm);
	return len;
}

static int zap_urg_data (struct socket *sock)
{
	mm_segment_t oldmm;
	struct msghdr msg;
	struct iovec iov;
	int len;
	char buf[10];

	oldmm = get_fs(); set_fs(KERNEL_DS);

	msg.msg_name     = 0;
	msg.msg_namelen  = 0;
	msg.msg_iov	 = &iov;
	msg.msg_iovlen   = 1;
	msg.msg_control  = NULL;
	msg.msg_controllen = 0;
	msg.msg_flags    = 0; // MSG_TRUNC | MSG_OOB;
	
	msg.msg_iov->iov_base = buf;
	msg.msg_iov->iov_len  = 2;

read_again:
	len = sock->sk->prot->recvmsg(sock->sk, &msg, 2,
				MSG_DONTWAIT, 0 /*MSG_TRUNC | MSG_OOB*/, NULL);
	Dprintk("recvmsg(MSG_OOB) returned %d.\n", len);

	/*
	 * We must not get a signal inbetween
	 */
	if ((len == -EAGAIN) || (len == -ERESTARTSYS)) {
		if (!signal_pending(current)) {
			len = 0;
			goto out;
		}
		reap_kids();
		goto read_again;
	}
out:
//	if (len > 0)
//		goto read_more;
	set_fs(oldmm);

	return len;
}

void trunc_headers (tux_req_t *req)
{
	struct sock *sk = req->sock->sk;
	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
	int len, addr_len = 0;
	struct sk_buff *skb = req->input_skb;

	if (!req->parsed_len)
		TUX_BUG();
	if (skb && (skb->len == req->parsed_len)) {
		lock_sock(sk);
		tp->copied_seq += req->parsed_len;
		/*
		 * We unlink the skb here, but free it only
		 * after the TUX request is finished - we
		 * need to access headers.
		 */
		__skb_unlink(skb, &sk->receive_queue);
		skb_orphan(skb);
		release_sock(sk);
		kfree_skb(skb);

		Dprintk("truncated (skb) %d bytes at %p. (wanted: %d.)\n", req->parsed_len, __builtin_return_address(0), req->parsed_len);
	} else {
repeat_trunc:
		len = sk->prot->recvmsg(sk, NULL, req->parsed_len, 1, MSG_TRUNC, &addr_len);
		if ((len == -ERESTARTSYS) || (len == -EAGAIN)) {
			reap_kids();
			goto repeat_trunc;
		}
		Dprintk("truncated (TRUNC) %d bytes at %p. (wanted: %d.)\n", len, __builtin_return_address(0), req->parsed_len);
		if (len != req->parsed_len)
			printk("hm, truncated only %d bytes, wanted: %d.\n",
				len, req->parsed_len);
	}
	req->parsed_len = 0;
}

void print_req (tux_req_t *req)
{
	struct sock *sk;

	printk("PRINT req %p <%p>, sock %p\n",
			req, __builtin_return_address(0), req->sock);
	printk("... idx: %d\n", req->atom_idx);
	if (req->sock) {
		sk = req->sock->sk;
		printk("... sock %p, sk %p, sk->state: %d, sk->err: %d\n", req->sock, sk, sk->state, sk->err);
		printk("... write_queue: %d, receive_queue: %d, error_queue: %d, keepalive: %d, status: %d\n", !skb_queue_empty(&sk->write_queue), !skb_queue_empty(&sk->receive_queue), !skb_queue_empty(&sk->error_queue), req->keep_alive, req->status);
		printk("...tp->send_head: %p\n", sk->tp_pinfo.af_tcp.send_head);
		printk("...tp->snd_una: %08x\n", sk->tp_pinfo.af_tcp.snd_una);
		printk("...tp->snd_nxt: %08x\n", sk->tp_pinfo.af_tcp.snd_nxt);
		printk("...tp->packets_out: %08x\n", sk->tp_pinfo.af_tcp.packets_out);
	}
	printk("... meth:{%s}, uri:{%s}, query:{%s}, ver:{%s}\n", req->method_str ? req->method_str : "<null>", req->uri_str ? req->uri_str : "<null>", req->query_str ? req->query_str : "<null>", req->version_str ? req->version_str : "<null>");
	printk("... post_data:{%s}(%d).\n", req->post_data_str, req->post_data_len);
	printk("... headers: {%s}\n", req->headers);
}
/* 
 * parse_request() reads all available TCP/IP data and prepares
 * the request if the TUX request is complete. (we can get TUX
 * requests in several packets.) Invalid requests are redirected
 * to the secondary server.
 */

void parse_request (tux_req_t *req, int cachemiss)
{
	u32 peek_seq;
	struct sk_buff *skb = NULL;
	int len, parsed_len, offset;
	struct sock *sk = req->sock->sk;
	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
	char *data;
	int was_keepalive = req->keep_alive;

	if (req->magic != TUX_MAGIC)
		TUX_BUG();

	SET_TIMESTAMP(req->parse_timestamp);

	spin_lock_irq(&req->ti->work_lock);
	add_keepalive_timer(req);
	if (test_and_set_bit(0, &req->idle_input))
		TUX_BUG();
	INC_STAT(nr_idle_input_pending);
	spin_unlock_irq(&req->ti->work_lock);

	Dprintk("idled request %p.\n", req);

	if (!tux_zerocopy_parse)
		goto slow_path_nonlocked;
	lock_sock(sk);
	if ((sk->state != TCP_ESTABLISHED) || sk->err)
		goto slow_path;

	skb = skb_peek(&sk->receive_queue);
	if (!skb)
		goto slow_path;
	if (!skb || skb_shinfo(skb)->nr_frags)
		goto slow_path;

	Dprintk("got skb %p. (frags: %d)\n", skb, skb_shinfo(skb)->nr_frags);

	if (skb->h.th->fin || skb->h.th->urg)
		goto slow_path;
	peek_seq = tp->copied_seq;
	offset = tp->copied_seq - TCP_SKB_CB(skb)->seq;
	if (skb->h.th->syn)
		offset--;

	Dprintk("peek_seq: %u, offset: %u.\n", peek_seq, offset);

	if (offset)
		goto slow_path;
	len = skb->len;
	data = (unsigned char *)skb->h.th + skb->h.th->doff*4;

	Dprintk("len: %d, data: %p (dataoff: %d).\n", len, data, skb->h.th->doff*4);
	release_sock(sk);
	if (len >= tux_max_header_len-1)
		goto slow_path_nonlocked;

	INC_STAT(input_fastpath);
	req->headers = data;
	req->headers_len = len;
	if (req->input_skb)
		BUG();
	skb_get(skb);
	req->input_skb = skb;


	data[len] = 0;
	goto parse;

slow_path:
	release_sock(sk);
slow_path_nonlocked:

	while (tp->urg_data && !(tp->urg_data & TCP_URG_READ))
		zap_urg_data(req->sock);

	skb = NULL;
	req->input_skb = NULL;
	INC_STAT(input_slowpath);

	if (!req->headers_buf)
		req->headers_buf = kmalloc(tux_max_header_len, GFP_KERNEL);
	req->headers = req->headers_buf;

	/* First, read the data */
	len = read_request(req->sock, req->headers, tux_max_header_len-1);
	if (len < 0) {
		Dprintk("got %d from read_request().\n", len);
		GOTO_REDIRECT;
	}
	if (!len)
		GOTO_INCOMPLETE;

parse:
	/*
	 * Make it a zero-delimited string to automatically get
	 * protection against various buffer overflow situations.
	 * Then pass it to the TUX application protocol stack.
	 */
	req->headers[len] = 0;
	req->headers_len = len;

	parsed_len = req->proto->parse_message(req, len);

	/*
	 * Is the request fully read? (or is there any error)
	 */
	if (parsed_len < 0) {
		req->error = 3;
		goto redirect_error;
	}
	if (!parsed_len) {
		/*
		 * Push pending ACK which was delayed due to the
		 * pingpong optimization:
		 */
		if (was_keepalive) {
			lock_sock(sk);
			tp->ack.pingpong = 0;
			tp->ack.pending |= TCP_ACK_PUSHED;
			cleanup_rbuf(sk, 1);
			release_sock(sk);
		}
		if (len >= tux_max_header_len-1)
			GOTO_REDIRECT;
		if (skb) {
			DEC_STAT(input_fastpath);
			goto slow_path_nonlocked;
		}
		GOTO_INCOMPLETE;
	}
	unidle_req(req);

	req->sock->sk->tp_pinfo.af_tcp.nonagle = tux_nonagle;

	add_req_to_workqueue(req);
	return;

redirect:
	TDprintk("req %p will be redirected!\n", req);
	req_err(req);

redirect_error:
	unidle_req(req);

	if (len < 0)
		req->parsed_len = 0;
	else
		req->parsed_len = len;

	INC_STAT(parse_static_redirect);
	req->input_skb = NULL;
	req->headers = NULL;
	if (req->error)
		zap_request(req, cachemiss);
	return;

incomplete:
	if (req->error)
		goto redirect_error;
	if (tp->urg_data && !(tp->urg_data & TCP_URG_READ))
		goto slow_path_nonlocked;

	add_tux_atom(req, parse_request);
	INC_STAT(parse_static_incomplete);
	req->input_skb = NULL;
	req->headers = NULL;
}

int process_requests (threadinfo_t *ti, tux_req_t **user_req)
{
	struct list_head *head, *curr;
	int count = 0;
	tux_req_t *req;

	*user_req = NULL;

restart_loop:
	spin_lock_irq(&ti->work_lock);
	head = &ti->work_pending;
	curr = head->next;
	
	if (curr != head) {
		int i;

		req = list_entry(curr, tux_req_t, work);
		Dprintk("PROCESS req %p <%p>.\n",
			req, __builtin_return_address(0));
		for (i = 0; i < req->atom_idx; i++)
			Dprintk("... atom %d: %p\n", i, req->atoms[i]);

		if (req->ti != ti)
			TUX_BUG();
		if (req->magic != TUX_MAGIC)
			TUX_BUG();

		if (list_empty(&req->work))
			TUX_BUG();
		list_del(curr);
		DEBUG_DEL_LIST(&req->work);
		spin_unlock_irq(&ti->work_lock);

		if (!req->atom_idx) {
			if (req->usermode) {
				*user_req = req;
				return count;
			}
			/*
			 * idx == 0 requests are flushed automatically.
			 */
			flush_request(req, 0);
		} else
			tux_schedule_atom(req, 0);
		count++;
		goto restart_loop;
	}
	spin_unlock_irq(&ti->work_lock);

	return count;
}

int flush_workqueue (threadinfo_t *ti)
{
	struct list_head *head, *curr, *next;
	tux_req_t *req;
	int count = 0;

restart:
	spin_lock_irq(&ti->work_lock);
	head = &ti->work_pending;
	curr = head->next;

	if (curr != head) {
		req = list_entry(curr, tux_req_t, work);
		next = curr->next;
		clear_bit(0, &req->idle_input);
		clear_bit(0, &req->wait_output_space);
		if (list_empty(&req->work))
			TUX_BUG();
		list_del(curr);
		DEBUG_DEL_LIST(curr);
		DEC_STAT(nr_input_pending);
		spin_unlock_irq(&ti->work_lock);
#if CONFIG_TUX_DEBUG
		req->bytes_expected = 0;
#endif
		req->in_file.f_pos = 0;
		req->atom_idx = 0;
		clear_keepalive(req);
		req->status = -1;
		if (req->usermode) {
			req->usermode = 0;
			req->private = NULL;
		}
		flush_request(req, 0);
		count++;
		goto restart;
	}
	spin_unlock_irq(&ti->work_lock);

	return count;
}

int print_all_requests (threadinfo_t *ti)
{
	struct list_head *head, *curr, *next;
	tux_req_t *req;
	int count = 0;

restart:
	spin_lock_irq(&ti->work_lock);
	head = &ti->all_requests;
	curr = head->next;

	if (curr != head) {
		req = list_entry(curr, tux_req_t, all);
		next = curr->next;
		list_del(curr);
		DEBUG_DEL_LIST(curr);
		print_req(req);
		spin_unlock_irq(&ti->work_lock);
		count++;
		goto restart;
	}
	spin_unlock_irq(&ti->work_lock);

	return count;
}

