# -*- coding: utf-8 -*-
#
#   cms.py - simple WSGI/Python based CMS script
#
#   Copyright (C) 2011-2019 Michael Buesch <m@bues.ch>
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation, either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program.  If not, see <http://www.gnu.org/licenses/>.

#from cms.cython_support cimport * #@cy

from cms.exception import *
from cms.util import * #+cimport

import re
import os

__all__ = [
	"CMSPageIdent",
]

class CMSPageIdent(object):
	# Page identifier.

	__slots__ = (
		# Path components.
		# List of str.
		"__elements",

		# Boolean.
		# True, if all __elements have been validated.
		"__allValidated",
	)

	__pageFileName_re	= re.compile(
		r'^(.*)((?:\.html?)|(?:\.py)|(?:\.php))$', re.DOTALL)
	__indexPages		= {"", "index"}

	# Parse a page identifier from a string.
	# That string may contain malicious components such as backwards
	# traversals (".." in the file path). This class takes care to reject
	# such page identifiers before use as filesystem path.
	@classmethod
	def parse(cls, path, maxPathLen=512, maxIdentDepth=32):
		if len(path) > maxPathLen:
			raise CMSException(400, "Invalid URL")

		pageIdent = cls()

		# Strip whitespace and slashes
		path = path.strip(' \t/')

		# Remove page file extensions like .html and such.
		m = cls.__pageFileName_re.match(path)
		if m:
			path = m.group(1)

		# Use the ident elements, if this is not the root page.
		if path not in cls.__indexPages:
			pageIdent.extend(path.split("/"))

		if len(pageIdent.__elements) > maxIdentDepth:
			raise CMSException(400, "Invalid URL")

		return pageIdent

	__pathSep = os.path.sep
	__validPathChars = LOWERCASE + UPPERCASE + NUMBERS + "-_."

	# Validate a path component. Avoid any directory change.
	# Raises CMSException on failure.
	@classmethod
	def validateSafePathComponent(cls, pcomp):
		if pcomp.startswith('.'):
			# No ".", ".." and hidden files.
			raise CMSException(404, "Invalid page path")
		if [ c for c in pcomp if c not in cls.__validPathChars ]:
			raise CMSException(404, "Invalid page path")
		return pcomp

	# Validate a path. Avoid going back in the hierarchy (. and ..)
	# Raises CMSException on failure.
	@classmethod
	def validateSafePath(cls, path):
		for pcomp in path.split(cls.__pathSep):
			cls.validateSafePathComponent(pcomp)
		return path

	# Validate a page name.
	# Raises CMSException on failure.
	# If allowSysNames is True, system names starting with "__" are allowed.
	@classmethod
	def validateName(cls, name, allowSysNames=False):
		if name.startswith("__") and not allowSysNames:
			# Page names with __ are system folders.
			raise CMSException(404, "Invalid page name")
		return cls.validateSafePathComponent(name)

	# Initialize this page identifier.
	def __init__(self, initialElements=None):
		self.__elements = []
		self.extend(initialElements)
		self.__allValidated = False

	# Add a list of path elements to this identifier.
	def extend(self, other):
		if other is not None:
			self.__allValidated = False

			if isinstance(other, self.__class__):
				self.__elements.extend(other.__elements)
			elif isiterable(other):
				self.__elements.extend(other)
			else:
				raise CMSException(500, "Invalid 'other' in CMSPageIdent.extend()")
		return self

	# Add a list of path elements to this identifier.
	def __iadd__(self, other):
		return self.extend(other)

	# Create a new page identifier from 'self' and add 'other'.
	def __add__(self, other):
		return self.__class__(self).extend(other)

	# Get the number of path components in this path identifier.
	def __len__(self):
		return len(self.__elements)

	# Validate all page identifier name components.
	# (Do not allow system name components)
	def __validateAll(self):
		if not self.__allValidated:
			for pcomp in self.__elements:
				self.validateName(pcomp)
			# Remember that we validated.
			# (This flag must be reset to false, if components are added.)
			self.__allValidated = True

	# Get one page identifier component by index.
	def get(self, index, default=None, allowSysNames=False):
		try:
			return self.validateName(self.__elements[index],
						 allowSysNames)
		except IndexError:
			return default

	# Get the page identifier as URL.
	def getUrl(self, protocol=None, domain=None,
		   urlBase=None, pageSuffix=".html"):
		self.__validateAll()
		url = []
		if protocol:
			url.append(protocol + ":/")
		if domain:
			url.append(domain)
		if urlBase:
			url.append(urlBase.strip("/"))
		localPath = [elem for elem in self.__elements if elem]
		url.extend(localPath)
		if not protocol and not domain:
			url.insert(0, "")
		urlStr = "/".join(url)
		if localPath and pageSuffix:
			urlStr += pageSuffix
		return urlStr

	# Get the page identifier as filesystem path.
	def getFilesystemPath(self, rstrip=0):
		self.__validateAll()
		if self.__elements:
			if rstrip > 0:
				pcomps = self.__elements[ : 0 - rstrip]
				if pcomps:
					return fs.mkpath(*pcomps)
				return ""
			return fs.mkpath(*(self.__elements))
		return ""

	# Test if this identifier starts with the same elements
	# as another one.
	def startswith(self, other):
		return other is not None and\
		       len(self.__elements) >= len(other.__elements) and\
		       self.__elements[ : len(other.__elements)] == other.__elements

	def __hash__(self):
#@cy		cdef Py_ssize_t h
#@cy		cdef list elements
#@cy		cdef str element

		h = 0
		elements = self.__elements
		for element in elements:
			h ^= hash(element)
		return h

	def __eq__(self, other):
		return (isinstance(other, self.__class__) and
			self.__elements == other.__elements)