const cheerio = require('cheerio')
const findPage = require('./find-page')
const renderContent = require('./render-content')
const rewriteLocalLinks = require('./rewrite-local-links')
const getApplicableVersions = require('./get-applicable-versions')
const { getPathWithoutLanguage } = require('./path-utils')
const { getEnterpriseVersionNumber } = require('./patterns')
const { deprecated } = require('./enterprise-server-releases')

// internal links will have a language code by the time we're testing them
// we also want to capture same-page anchors (#foo)
const languageCode = 'en'
const internalHrefs = ['/en', '#']

const renderedPageCache = {}
const checkedAnchorCache = {}

module.exports = async function checkLinks ($, page, context, version, checkedLinkCache = {}) {
  // run rewriteLocalLinks to version links and add language codes
  rewriteLocalLinks($, version, languageCode)

  const brokenLinks = {
    anchors: [],
    links: []
  }

  // internal link check
  for (const href of internalHrefs) {
    const internalLinks = $(`a[href^="${href}"]`).get()

    for (const internalLink of internalLinks) {
      const href = $(internalLink).attr('href')

      // enable caching so we don't check links more than once
      // anchor links are cached locally (within this run) since they are specific to the page
      if (checkedLinkCache[href] || checkedAnchorCache[href]) continue

      const [link, anchor] = href.split('#')

      // if anchor only (e.g., #foo), look for heading on same page
      if (anchor && !link) {
        // ignore anchors that are autogenerated from headings
        if (anchor === $(internalLink).parent().attr('id')) continue

        const matchingHeadings = getMatchingHeadings($, anchor)

        if (matchingHeadings.length === 0) {
          brokenLinks.anchors.push({ 'broken same-page anchor': `#${anchor}`, reason: 'heading not found on page' })
        }
        checkedAnchorCache[href] = true
        continue
      }
      checkedLinkCache[href] = true

      // skip rare hardcoded links to old GHE versions
      // these paths will always be in the old versioned form
      // example: /enterprise/11.10.340/admin/articles/upgrading-to-the-latest-release
      const gheVersionInLink = link.match(getEnterpriseVersionNumber)
      if (gheVersionInLink && deprecated.includes(gheVersionInLink[1])) continue

      // look for linked page
      const linkedPage = findPage(link, context.pages, context.redirects, languageCode)

      if (!linkedPage) {
        brokenLinks.links.push({ 'broken link': link, reason: 'linked page not found' })
        continue
      }

      if (linkedPage.relativePath.includes('rest/reference/') && linkedPage.relativePath !== 'rest/reference/index.md') {
        const linkedPageRelevantPermalink = linkedPage.permalinks.find(permalink => permalink.pageVersion === version)
        const docsPath = linkedPageRelevantPermalink.href
          .split('rest/reference/')[1]
          .split('#')[0] // do not include #fragments

        // find all operations that with an operationID that matches the requested docs path
        context.currentRestOperations = context.operationsForCurrentProduct
          .filter(operation => operation.operationId.startsWith(docsPath))
      }

      // finding the linked page isn't enough if it's a github.com page; also need to check versions
      if (linkedPage.relativePath.startsWith('github')) {
        const linkedPageVersions = getApplicableVersions(linkedPage.versions, linkedPage.relativePath)

        if (!linkedPageVersions.includes(version) && $(internalLink).attr('class') !== 'dotcom-only') {
          brokenLinks.links.push({ 'broken link': link, reason: `${version} not found in linked page versions`, 'linked page': linkedPage.fullPath })
          continue
        }
      }

      // collect elements of the page that may contain links
      const linkedPageContent = linkedPage.relativePath.includes('graphql/reference/objects')
        ? linkedPage.markdown + context.graphql.prerenderedObjectsForCurrentVersion.html
        : linkedPage.markdown

      // create a unique string for caching purposes
      const pathToCache = version + linkedPage.relativePath

      const anchorToCheck = anchor

      // if link with anchor (e.g., /some/path#foo), look for heading on linked page
      if (anchorToCheck) {
        // either render page or fetch it from cache if we've already rendered it
        let linkedPageObject
        if (!renderedPageCache[pathToCache]) {
          const linkedPageHtml = await renderContent(linkedPageContent, context)
          linkedPageObject = cheerio.load(linkedPageHtml, { xmlMode: true })
          renderedPageCache[pathToCache] = linkedPageObject
        } else {
          linkedPageObject = renderedPageCache[pathToCache]
        }

        const matchingHeadings = getMatchingHeadings(linkedPageObject, anchorToCheck)

        if (matchingHeadings.length === 0) {
          if (anchor) {
            brokenLinks.anchors.push({ 'broken anchor': `#${anchor}`, 'full link': `${getPathWithoutLanguage(link)}#${anchor}`, reason: 'heading not found on linked page', 'linked page': linkedPage.fullPath })
          }
          continue
        }
      }
    }
  }

  return { brokenLinks, checkedLinkCache }
}

// article titles are h1s; headings can be any subsequent level
function getMatchingHeadings ($, anchor) {
  return $(`
    h2[id="${anchor}"],
    h3[id="${anchor}"],
    h4[id="${anchor}"],
    h5[id="${anchor}"],
    h6[id="${anchor}"],
    a[name="${anchor}"]
  `).get()
}
