1276 lines
38 KiB
JavaScript
1276 lines
38 KiB
JavaScript
require('dotenv').config();
|
|
const express = require('express');
|
|
const cors = require('cors');
|
|
const session = require('express-session');
|
|
const passport = require('passport');
|
|
const LdapStrategy = require('passport-ldapauth').Strategy;
|
|
const fs = require('fs').promises;
|
|
const path = require('path');
|
|
const axios = require('axios');
|
|
const cheerio = require('cheerio');
|
|
const { Op } = require('sequelize');
|
|
const db = require('./models');
|
|
const MigrationRunner = require('./migrations/runner');
|
|
const { v4: uuidv4 } = require('uuid');
|
|
|
|
// Lazy load puppeteer (only if needed)
|
|
let puppeteer = null;
|
|
let puppeteerAvailable = null;
|
|
|
|
async function getPuppeteer() {
|
|
if (puppeteerAvailable === false) {
|
|
return null; // Already tried and failed
|
|
}
|
|
|
|
if (!puppeteer) {
|
|
try {
|
|
puppeteer = require('puppeteer-core');
|
|
puppeteerAvailable = true;
|
|
console.log('Puppeteer-core loaded successfully');
|
|
} catch (e) {
|
|
console.warn('Puppeteer-core not available:', e.message);
|
|
puppeteerAvailable = false;
|
|
return null;
|
|
}
|
|
}
|
|
return puppeteer;
|
|
}
|
|
|
|
// Find system Chromium/Chrome executable
|
|
function findChromeExecutable() {
|
|
const { execSync } = require('child_process');
|
|
|
|
// Check environment variable first
|
|
if (process.env.CHROME_EXECUTABLE_PATH) {
|
|
return process.env.CHROME_EXECUTABLE_PATH;
|
|
}
|
|
|
|
// Try which command for common names
|
|
const commands = ['chromium', 'chromium-browser', 'google-chrome', 'google-chrome-stable'];
|
|
for (const cmd of commands) {
|
|
try {
|
|
const result = execSync(`which ${cmd} 2>/dev/null`, { encoding: 'utf8' }).trim();
|
|
if (result) {
|
|
return result;
|
|
}
|
|
} catch (e) {
|
|
// Continue to next command
|
|
}
|
|
}
|
|
|
|
// Try common NixOS paths
|
|
try {
|
|
const nixPaths = execSync('find /nix/store -name chromium -type f -executable 2>/dev/null | head -1', { encoding: 'utf8' }).trim();
|
|
if (nixPaths) return nixPaths;
|
|
} catch (e) {
|
|
// Ignore
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
const app = express();
|
|
const PORT = process.env.PORT || 3000;
|
|
const DATA_FILE = path.join(__dirname, 'data', 'links.json');
|
|
const LISTS_FILE = path.join(__dirname, 'data', 'lists.json');
|
|
|
|
// Trust proxy - required when behind reverse proxy (Traefik)
|
|
// This allows Express to trust X-Forwarded-* headers
|
|
app.set('trust proxy', process.env.TRUST_PROXY !== 'false'); // Default to true, set to 'false' to disable
|
|
|
|
// Session configuration
|
|
const isSecure = process.env.COOKIE_SECURE === 'true' ||
|
|
(process.env.COOKIE_SECURE !== 'false' && process.env.NODE_ENV === 'production');
|
|
|
|
app.use(session({
|
|
secret: process.env.SESSION_SECRET || 'your-secret-key-change-this-in-production',
|
|
resave: false,
|
|
saveUninitialized: false,
|
|
name: process.env.SESSION_NAME || 'connect.sid', // Custom session name to avoid conflicts
|
|
cookie: {
|
|
secure: isSecure, // Use secure cookies when behind HTTPS proxy
|
|
httpOnly: true,
|
|
maxAge: 24 * 60 * 60 * 1000, // 24 hours
|
|
sameSite: process.env.COOKIE_SAMESITE || (isSecure ? 'none' : 'lax'), // 'none' for cross-site, 'lax' for same-site
|
|
domain: process.env.COOKIE_DOMAIN || undefined, // Set if cookies need to be shared across subdomains
|
|
path: process.env.COOKIE_PATH || '/' // Cookie path
|
|
}
|
|
}));
|
|
|
|
// Initialize Passport
|
|
app.use(passport.initialize());
|
|
app.use(passport.session());
|
|
|
|
// Configure LDAP Strategy
|
|
// Combine base DN with additional users DN if provided
|
|
const baseDN = process.env.LDAP_BASE_DN;
|
|
const additionalUsersDN = process.env.LDAP_ADDITIONAL_USERS_DN || '';
|
|
const searchBase = additionalUsersDN && baseDN
|
|
? `${additionalUsersDN},${baseDN}`
|
|
: baseDN;
|
|
|
|
const searchAttributes = [];
|
|
if (process.env.LDAP_ATTRIBUTE_USERNAME) {
|
|
searchAttributes.push(process.env.LDAP_ATTRIBUTE_USERNAME);
|
|
}
|
|
if (process.env.LDAP_ATTRIBUTE_MAIL) {
|
|
searchAttributes.push(process.env.LDAP_ATTRIBUTE_MAIL);
|
|
}
|
|
if (process.env.LDAP_ATTRIBUTE_DISTINGUISHED_NAME) {
|
|
searchAttributes.push(process.env.LDAP_ATTRIBUTE_DISTINGUISHED_NAME);
|
|
}
|
|
if (process.env.LDAP_ATTRIBUTE_MEMBER_OF) {
|
|
searchAttributes.push(process.env.LDAP_ATTRIBUTE_MEMBER_OF);
|
|
}
|
|
|
|
const ldapOptions = {
|
|
server: {
|
|
url: process.env.LDAP_ADDRESS,
|
|
bindDN: process.env.LDAP_USER,
|
|
bindCredentials: process.env.LDAP_PASSWORD,
|
|
searchBase: searchBase,
|
|
searchFilter: process.env.LDAP_USERS_FILTER,
|
|
searchAttributes: searchAttributes.length > 0 ? searchAttributes : undefined,
|
|
timeout: process.env.LDAP_TIMEOUT ? parseInt(process.env.LDAP_TIMEOUT) : undefined,
|
|
connectTimeout: process.env.LDAP_TIMEOUT ? parseInt(process.env.LDAP_TIMEOUT) : undefined,
|
|
tlsOptions: {
|
|
rejectUnauthorized: process.env.LDAP_TLS_SKIP_VERIFY !== 'true',
|
|
servername: process.env.LDAP_TLS_SERVER_NAME || undefined
|
|
}
|
|
},
|
|
usernameField: 'username',
|
|
passwordField: 'password'
|
|
};
|
|
|
|
// Replace {username} placeholder in search filter
|
|
if (ldapOptions.server.searchFilter && ldapOptions.server.searchFilter.includes('{{username}}')) {
|
|
// Keep as is, passport-ldapauth will replace it
|
|
} else if (ldapOptions.server.searchFilter && ldapOptions.server.searchFilter.includes('{username_attribute}')) {
|
|
// Replace with actual attribute name
|
|
const usernameAttr = process.env.LDAP_ATTRIBUTE_USERNAME;
|
|
if (usernameAttr) {
|
|
ldapOptions.server.searchFilter = ldapOptions.server.searchFilter.replace('{username_attribute}', usernameAttr);
|
|
ldapOptions.server.searchFilter = ldapOptions.server.searchFilter.replace('{input}', '{{username}}');
|
|
}
|
|
}
|
|
|
|
passport.use(new LdapStrategy(ldapOptions, (user, done) => {
|
|
// User object contains LDAP user data
|
|
const usernameAttr = process.env.LDAP_ATTRIBUTE_USERNAME;
|
|
const mailAttr = process.env.LDAP_ATTRIBUTE_MAIL;
|
|
const dnAttr = process.env.LDAP_ATTRIBUTE_DISTINGUISHED_NAME;
|
|
|
|
return done(null, {
|
|
id: usernameAttr ? user[usernameAttr] : user.uid,
|
|
username: usernameAttr ? user[usernameAttr] : user.uid,
|
|
email: mailAttr ? user[mailAttr] : user.mail,
|
|
dn: dnAttr ? user[dnAttr] : user.dn
|
|
});
|
|
}));
|
|
|
|
// Serialize user for session
|
|
passport.serializeUser((user, done) => {
|
|
done(null, user);
|
|
});
|
|
|
|
// Deserialize user from session
|
|
passport.deserializeUser((user, done) => {
|
|
done(null, user);
|
|
});
|
|
|
|
// Middleware
|
|
app.use(cors({
|
|
origin: true,
|
|
credentials: true
|
|
}));
|
|
app.use(express.json());
|
|
app.use(express.static('public'));
|
|
|
|
// Authentication middleware
|
|
function isAuthenticated(req, res, next) {
|
|
if (req.isAuthenticated()) {
|
|
return next();
|
|
}
|
|
res.status(401).json({ error: 'Authentication required' });
|
|
}
|
|
|
|
// Database initialization and migration
|
|
async function initializeDatabase() {
|
|
try {
|
|
// Test database connection
|
|
await db.sequelize.authenticate();
|
|
console.log('Database connection established successfully.');
|
|
|
|
// Run migrations
|
|
const migrationRunner = new MigrationRunner(db.sequelize);
|
|
await migrationRunner.runMigrations();
|
|
|
|
// Migrate JSON files if they exist
|
|
await migrateJsonFiles();
|
|
|
|
console.log('Database initialization completed.');
|
|
} catch (error) {
|
|
console.error('Database initialization failed:', error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
// Migrate JSON files to database
|
|
async function migrateJsonFiles() {
|
|
const linksFile = DATA_FILE;
|
|
const listsFile = LISTS_FILE;
|
|
const linksBackup = linksFile + '.bak';
|
|
const listsBackup = listsFile + '.bak';
|
|
|
|
// Check if files have already been migrated
|
|
let linksAlreadyMigrated = false;
|
|
let listsAlreadyMigrated = false;
|
|
|
|
try {
|
|
await fs.access(linksBackup);
|
|
linksAlreadyMigrated = true;
|
|
} catch {
|
|
// Not migrated yet
|
|
}
|
|
|
|
try {
|
|
await fs.access(listsBackup);
|
|
listsAlreadyMigrated = true;
|
|
} catch {
|
|
// Not migrated yet
|
|
}
|
|
|
|
// Step 1: Migrate lists first (so we can create relationships)
|
|
const listIdMap = new Map(); // Map old ID -> new UUID
|
|
|
|
if (!listsAlreadyMigrated) {
|
|
try {
|
|
await fs.access(listsFile);
|
|
const listsData = JSON.parse(await fs.readFile(listsFile, 'utf8'));
|
|
|
|
if (Array.isArray(listsData) && listsData.length > 0) {
|
|
console.log(`Migrating ${listsData.length} lists from JSON file...`);
|
|
|
|
for (const list of listsData) {
|
|
const newId = uuidv4();
|
|
listIdMap.set(list.id, newId);
|
|
|
|
await db.List.create({
|
|
id: newId,
|
|
name: list.name,
|
|
created_at: list.createdAt ? new Date(list.createdAt) : new Date(),
|
|
created_by: null, // No user info in JSON
|
|
public: list.public || false
|
|
});
|
|
}
|
|
|
|
// Rename file to backup
|
|
await fs.rename(listsFile, listsBackup);
|
|
console.log('Lists migration completed.');
|
|
}
|
|
} catch (error) {
|
|
if (error.code !== 'ENOENT') {
|
|
console.error('Error migrating lists:', error);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Step 2: Migrate links and set up relationships
|
|
if (!linksAlreadyMigrated) {
|
|
try {
|
|
await fs.access(linksFile);
|
|
const linksData = JSON.parse(await fs.readFile(linksFile, 'utf8'));
|
|
|
|
if (Array.isArray(linksData) && linksData.length > 0) {
|
|
console.log(`Migrating ${linksData.length} links from JSON file...`);
|
|
|
|
for (const link of linksData) {
|
|
// Create link
|
|
const linkRecord = await db.Link.create({
|
|
id: uuidv4(),
|
|
url: link.url,
|
|
title: link.title || null,
|
|
description: link.description || null,
|
|
image: link.image || null,
|
|
created_at: link.createdAt ? new Date(link.createdAt) : new Date(),
|
|
created_by: null, // No user info in JSON
|
|
archived: link.archived || false
|
|
});
|
|
|
|
// Create relationships if listIds exist
|
|
if (link.listIds && Array.isArray(link.listIds) && link.listIds.length > 0) {
|
|
const listRecords = [];
|
|
for (const oldListId of link.listIds) {
|
|
const newListId = listIdMap.get(oldListId);
|
|
if (newListId) {
|
|
const listRecord = await db.List.findByPk(newListId);
|
|
if (listRecord) {
|
|
listRecords.push(listRecord);
|
|
}
|
|
}
|
|
}
|
|
if (listRecords.length > 0) {
|
|
await linkRecord.setLists(listRecords);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Rename file to backup
|
|
await fs.rename(linksFile, linksBackup);
|
|
console.log('Links migration completed.');
|
|
}
|
|
} catch (error) {
|
|
if (error.code !== 'ENOENT') {
|
|
console.error('Error migrating links:', error);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Helper function to format link for API response
|
|
function formatLink(link) {
|
|
const formatted = {
|
|
id: link.id,
|
|
url: link.url,
|
|
title: link.title,
|
|
description: link.description,
|
|
image: link.image,
|
|
createdAt: link.created_at,
|
|
createdBy: link.created_by,
|
|
modifiedAt: link.modified_at,
|
|
modifiedBy: link.modified_by,
|
|
archived: link.archived || false,
|
|
listIds: link.lists ? link.lists.map(list => list.id) : []
|
|
};
|
|
return formatted;
|
|
}
|
|
|
|
// Helper function to format list for API response
|
|
function formatList(list) {
|
|
return {
|
|
id: list.id,
|
|
name: list.name,
|
|
createdAt: list.created_at,
|
|
createdBy: list.created_by,
|
|
modifiedAt: list.modified_at,
|
|
modifiedBy: list.modified_by,
|
|
public: list.public || false
|
|
};
|
|
}
|
|
|
|
// Extract metadata using Puppeteer (for JavaScript-heavy sites)
|
|
async function extractMetadataWithPuppeteer(url) {
|
|
const pptr = await getPuppeteer();
|
|
if (!pptr) {
|
|
throw new Error('Puppeteer not available');
|
|
}
|
|
|
|
let browser = null;
|
|
try {
|
|
console.log('Launching Puppeteer browser...');
|
|
|
|
// Find system Chrome/Chromium executable
|
|
const executablePath = findChromeExecutable();
|
|
if (!executablePath) {
|
|
throw new Error('Chrome/Chromium not found. Please install it via NixOS or set CHROME_EXECUTABLE_PATH environment variable.');
|
|
}
|
|
|
|
console.log(`Using Chrome executable: ${executablePath}`);
|
|
|
|
browser = await pptr.launch({
|
|
headless: 'new',
|
|
executablePath: executablePath,
|
|
args: [
|
|
'--no-sandbox',
|
|
'--disable-setuid-sandbox',
|
|
'--disable-dev-shm-usage',
|
|
'--disable-blink-features=AutomationControlled',
|
|
'--disable-features=IsolateOrigins,site-per-process',
|
|
'--disable-gpu'
|
|
]
|
|
});
|
|
|
|
const page = await browser.newPage();
|
|
|
|
// Set realistic viewport and user agent
|
|
await page.setViewport({ width: 1920, height: 1080 });
|
|
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36');
|
|
|
|
// Add extra headers to look more like a real browser
|
|
await page.setExtraHTTPHeaders({
|
|
'Accept-Language': 'en-US,en;q=0.9,de;q=0.8',
|
|
'Accept-Encoding': 'gzip, deflate, br',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
'Connection': 'keep-alive',
|
|
'Upgrade-Insecure-Requests': '1'
|
|
});
|
|
|
|
console.log(`Navigating to ${url}...`);
|
|
// Navigate to the page with longer timeout
|
|
await page.goto(url, {
|
|
waitUntil: 'networkidle2',
|
|
timeout: 60000
|
|
});
|
|
|
|
// Helper function to wait (replacement for deprecated waitForTimeout)
|
|
const wait = (ms) => new Promise(resolve => setTimeout(resolve, ms));
|
|
|
|
// Wait a bit for any lazy-loaded content and images
|
|
console.log('Waiting for content to load...');
|
|
await wait(3000);
|
|
|
|
// Scroll a bit to trigger lazy loading
|
|
await page.evaluate(() => {
|
|
window.scrollTo(0, 300);
|
|
});
|
|
await wait(1000);
|
|
|
|
// Get the rendered HTML
|
|
console.log('Extracting HTML content...');
|
|
const html = await page.content();
|
|
await browser.close();
|
|
console.log('Browser closed, processing HTML...');
|
|
|
|
// Use the same extraction logic as the regular function
|
|
return await extractMetadataFromHTML(html, url);
|
|
} catch (error) {
|
|
if (browser) {
|
|
try {
|
|
await browser.close();
|
|
} catch (e) {
|
|
// Ignore close errors
|
|
}
|
|
}
|
|
console.error('Puppeteer extraction error:', error.message);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
// Common extraction logic that works with HTML string
|
|
async function extractMetadataFromHTML(html, url) {
|
|
const $ = cheerio.load(html);
|
|
const urlObj = new URL(url);
|
|
|
|
// Try to extract JSON-LD structured data (common in e-commerce sites)
|
|
let jsonLdData = null;
|
|
$('script[type="application/ld+json"]').each(function() {
|
|
try {
|
|
const content = $(this).html();
|
|
let jsonData = JSON.parse(content);
|
|
|
|
// Handle arrays of structured data
|
|
if (Array.isArray(jsonData)) {
|
|
jsonData = jsonData.find(item =>
|
|
item['@type'] === 'Product' ||
|
|
item['@type'] === 'WebPage' ||
|
|
item['@type'] === 'Offer'
|
|
) || jsonData[0];
|
|
}
|
|
|
|
if (jsonData && (jsonData['@type'] === 'Product' || jsonData['@type'] === 'WebPage' || jsonData['@type'] === 'Offer')) {
|
|
jsonLdData = jsonData;
|
|
return false; // break
|
|
}
|
|
} catch (e) {
|
|
// Ignore parse errors
|
|
}
|
|
});
|
|
|
|
// Extract title with priority order
|
|
let title = '';
|
|
if (jsonLdData) {
|
|
title = jsonLdData.name || jsonLdData.headline || jsonLdData.title;
|
|
}
|
|
if (!title) {
|
|
title = $('meta[property="og:title"]').attr('content') ||
|
|
$('meta[name="twitter:title"]').attr('content') ||
|
|
$('h1').first().text().trim() ||
|
|
$('title').text().trim() ||
|
|
'';
|
|
}
|
|
title = title || 'Untitled';
|
|
|
|
// Extract description with priority order
|
|
let description = '';
|
|
if (jsonLdData) {
|
|
description = jsonLdData.description || jsonLdData.about;
|
|
}
|
|
if (!description) {
|
|
description = $('meta[property="og:description"]').attr('content') ||
|
|
$('meta[name="twitter:description"]').attr('content') ||
|
|
$('meta[name="description"]').attr('content') ||
|
|
'';
|
|
}
|
|
|
|
// If still no description, try to find product description sections
|
|
if (!description) {
|
|
// Try common product description selectors
|
|
const descSelectors = [
|
|
'[data-testid="product-description"]',
|
|
'.product-description',
|
|
'.description',
|
|
'[itemprop="description"]',
|
|
'section[aria-label*="description" i]',
|
|
'section[aria-label*="beschreibung" i]' // German
|
|
];
|
|
|
|
for (const selector of descSelectors) {
|
|
const descText = $(selector).first().text().trim();
|
|
if (descText && descText.length > 20) {
|
|
description = descText;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fallback to first paragraph if still no description
|
|
if (!description) {
|
|
$('p').each(function() {
|
|
const text = $(this).text().trim();
|
|
if (text.length > 50 && text.length < 1000) {
|
|
description = text;
|
|
return false; // break
|
|
}
|
|
});
|
|
}
|
|
|
|
// Extract image with multiple strategies
|
|
let image = '';
|
|
|
|
// Helper function to extract image source from an img element
|
|
const extractImgSrc = (img) => {
|
|
return img.attr('src') ||
|
|
img.attr('data-src') ||
|
|
img.attr('data-lazy-src') ||
|
|
img.attr('data-original') ||
|
|
img.attr('data-image') ||
|
|
img.attr('data-lazy') ||
|
|
img.attr('data-url');
|
|
};
|
|
|
|
// Helper function to extract best image from srcset
|
|
const extractFromSrcset = (img) => {
|
|
if (!img.attr('srcset')) return null;
|
|
const srcset = img.attr('srcset');
|
|
// Extract the largest image from srcset (usually the last one)
|
|
const srcsetMatches = srcset.match(/([^\s,]+)\s+(\d+w|\d+\.\d+x)/g);
|
|
if (srcsetMatches && srcsetMatches.length > 0) {
|
|
// Get the last entry which is usually the highest resolution
|
|
const lastMatch = srcsetMatches[srcsetMatches.length - 1];
|
|
const srcMatch = lastMatch.match(/^([^\s]+)/);
|
|
if (srcMatch) {
|
|
return srcMatch[1];
|
|
}
|
|
} else {
|
|
// Fallback: just get first URL from srcset
|
|
const srcsetMatch = srcset.match(/^([^\s,]+)/);
|
|
if (srcsetMatch) {
|
|
return srcsetMatch[1];
|
|
}
|
|
}
|
|
return null;
|
|
};
|
|
|
|
// Priority 1: Product container images (most specific - check BEFORE meta tags)
|
|
const productContainerSelectors = [
|
|
'.product-container img',
|
|
'[class*="product-container" i] img',
|
|
'#product-container img',
|
|
'.product-container picture img',
|
|
'[class*="product-container" i] picture img'
|
|
];
|
|
|
|
for (const selector of productContainerSelectors) {
|
|
const imgs = $(selector);
|
|
if (imgs.length > 0) {
|
|
// Try to find the main product image (usually the first one that's not a thumbnail)
|
|
for (let i = 0; i < imgs.length; i++) {
|
|
const img = $(imgs[i]);
|
|
const src = extractImgSrc(img);
|
|
if (src && !src.includes('thumb') && !src.includes('thumbnail') && !src.includes('icon')) {
|
|
image = extractFromSrcset(img) || src;
|
|
break;
|
|
}
|
|
}
|
|
// If no good image found, just take the first one
|
|
if (!image && imgs.length > 0) {
|
|
const firstImg = $(imgs[0]);
|
|
image = extractFromSrcset(firstImg) || extractImgSrc(firstImg);
|
|
}
|
|
if (image) break;
|
|
}
|
|
}
|
|
|
|
// Priority 2: Other product-specific containers (before meta tags)
|
|
if (!image) {
|
|
const productImageContainers = [
|
|
'[data-testid="product-image"] img',
|
|
'[data-testid="productImage"] img',
|
|
'.product-image img',
|
|
'.product-gallery img',
|
|
'.product__image img',
|
|
'.product-images img',
|
|
'[class*="product-image" i] img',
|
|
'[class*="product-gallery" i] img',
|
|
'[id*="product-image" i] img'
|
|
];
|
|
|
|
for (const selector of productImageContainers) {
|
|
const img = $(selector).first();
|
|
if (img.length) {
|
|
const imgSrc = extractImgSrc(img);
|
|
if (imgSrc) {
|
|
image = extractFromSrcset(img) || imgSrc;
|
|
if (image) break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Priority 3: Try Open Graph and Twitter Card images (after product containers)
|
|
if (!image) {
|
|
image = $('meta[property="og:image"]').attr('content') ||
|
|
$('meta[name="twitter:image"]').attr('content') ||
|
|
$('meta[name="twitter:image:src"]').attr('content');
|
|
}
|
|
|
|
// Priority 4: Try JSON-LD image
|
|
if (!image && jsonLdData) {
|
|
if (jsonLdData.image) {
|
|
if (typeof jsonLdData.image === 'string') {
|
|
image = jsonLdData.image;
|
|
} else if (Array.isArray(jsonLdData.image) && jsonLdData.image.length > 0) {
|
|
image = jsonLdData.image[0];
|
|
} else if (jsonLdData.image.url) {
|
|
image = jsonLdData.image.url;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Priority 5: Galaxus-specific (keep existing logic)
|
|
if (!image) {
|
|
const isGalaxus = url.includes('galaxus.');
|
|
if (isGalaxus) {
|
|
const galaxusImg = $('img[alt*="Produktbild" i], img[alt*="Produkt" i]').first();
|
|
if (galaxusImg.length) {
|
|
image = extractImgSrc(galaxusImg);
|
|
}
|
|
|
|
if (!image) {
|
|
const galleryImg = $('[class*="product" i] img, [class*="image" i] img, [class*="gallery" i] img').first();
|
|
if (galleryImg.length) {
|
|
image = extractImgSrc(galleryImg);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Priority 6: Generic product selectors
|
|
if (!image) {
|
|
const genericSelectors = [
|
|
'[itemprop="image"]',
|
|
'picture img',
|
|
'figure img',
|
|
'main img',
|
|
'[role="img"]',
|
|
'article img',
|
|
'[class*="main-image" i] img',
|
|
'[id*="main-image" i] img'
|
|
];
|
|
|
|
for (const selector of genericSelectors) {
|
|
const img = $(selector).first();
|
|
if (img.length) {
|
|
const imgSrc = extractImgSrc(img);
|
|
if (imgSrc &&
|
|
!imgSrc.includes('logo') &&
|
|
!imgSrc.includes('icon') &&
|
|
!imgSrc.includes('avatar') &&
|
|
!imgSrc.includes('spacer') &&
|
|
!imgSrc.includes('pixel')) {
|
|
image = extractFromSrcset(img) || imgSrc;
|
|
if (image) break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fallback to first meaningful image
|
|
if (!image) {
|
|
$('img').each(function() {
|
|
const img = $(this);
|
|
const src = img.attr('src') ||
|
|
img.attr('data-src') ||
|
|
img.attr('data-lazy-src');
|
|
|
|
// Skip very small images, icons, and logos
|
|
if (src &&
|
|
!src.includes('logo') &&
|
|
!src.includes('icon') &&
|
|
!src.includes('avatar') &&
|
|
!src.includes('spacer') &&
|
|
!src.includes('pixel')) {
|
|
image = src;
|
|
return false; // break
|
|
}
|
|
});
|
|
}
|
|
|
|
// Convert relative URLs to absolute
|
|
if (image && !image.startsWith('http')) {
|
|
if (image.startsWith('//')) {
|
|
image = urlObj.protocol + image;
|
|
} else if (image.startsWith('/')) {
|
|
image = urlObj.origin + image;
|
|
} else {
|
|
image = new URL(image, url).href;
|
|
}
|
|
}
|
|
|
|
// Clean up title and description
|
|
title = title.trim().replace(/\s+/g, ' ');
|
|
description = description.trim().replace(/\s+/g, ' ').substring(0, 500);
|
|
|
|
return {
|
|
title: title,
|
|
description: description,
|
|
image: image
|
|
};
|
|
}
|
|
|
|
// Extract metadata from URL
|
|
async function extractMetadata(url) {
|
|
try {
|
|
const urlObj = new URL(url);
|
|
|
|
// More realistic browser headers to avoid 403 errors
|
|
const headers = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
|
'Accept-Language': 'en-US,en;q=0.9,de;q=0.8',
|
|
'Accept-Encoding': 'gzip, deflate, br',
|
|
'DNT': '1',
|
|
'Connection': 'keep-alive',
|
|
'Upgrade-Insecure-Requests': '1',
|
|
'Sec-Fetch-Dest': 'document',
|
|
'Sec-Fetch-Mode': 'navigate',
|
|
'Sec-Fetch-Site': 'none',
|
|
'Sec-Fetch-User': '?1',
|
|
'Cache-Control': 'max-age=0',
|
|
'Referer': urlObj.origin + '/'
|
|
};
|
|
|
|
const response = await axios.get(url, {
|
|
headers: headers,
|
|
timeout: 20000,
|
|
maxRedirects: 5,
|
|
validateStatus: function (status) {
|
|
return status >= 200 && status < 500; // Don't throw on 403, we'll handle it
|
|
}
|
|
});
|
|
|
|
// Check if we got blocked - use Puppeteer as fallback
|
|
if (response.status === 403 || response.status === 429) {
|
|
console.log(`Received ${response.status} status, trying Puppeteer fallback...`);
|
|
const pptr = await getPuppeteer();
|
|
if (pptr) {
|
|
try {
|
|
console.log('Using Puppeteer to extract metadata...');
|
|
return await extractMetadataWithPuppeteer(url);
|
|
} catch (puppeteerError) {
|
|
console.error('Puppeteer extraction failed:', puppeteerError.message);
|
|
// Fall through to retry with simpler headers
|
|
}
|
|
}
|
|
|
|
// Fallback: try simpler headers if Puppeteer not available or failed
|
|
console.log('Trying with simpler headers...');
|
|
const retryHeaders = {
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
'Accept-Language': 'en-US,en;q=0.9'
|
|
};
|
|
|
|
const retryResponse = await axios.get(url, {
|
|
headers: retryHeaders,
|
|
timeout: 20000,
|
|
maxRedirects: 5,
|
|
validateStatus: function (status) {
|
|
return status >= 200 && status < 500;
|
|
}
|
|
});
|
|
|
|
if (retryResponse.status === 403 || retryResponse.status === 429) {
|
|
throw new Error(`Site is blocking requests. Please try again later or the site may require JavaScript rendering.`);
|
|
}
|
|
|
|
if (retryResponse.status !== 200) {
|
|
throw new Error(`Request failed with status code ${retryResponse.status}`);
|
|
}
|
|
|
|
// Use shared extraction function
|
|
return await extractMetadataFromHTML(retryResponse.data, url);
|
|
} else if (response.status !== 200) {
|
|
throw new Error(`Request failed with status code ${response.status}`);
|
|
}
|
|
|
|
// Use shared extraction function
|
|
return await extractMetadataFromHTML(response.data, url);
|
|
} catch (error) {
|
|
console.error('Error extracting metadata:', error.message);
|
|
return {
|
|
title: 'Error loading page',
|
|
description: 'Could not extract metadata from this URL',
|
|
image: ''
|
|
};
|
|
}
|
|
}
|
|
|
|
// Authentication Routes
|
|
|
|
// Check authentication status
|
|
app.get('/api/auth/status', (req, res) => {
|
|
res.json({
|
|
authenticated: req.isAuthenticated(),
|
|
user: req.isAuthenticated() ? req.user : null
|
|
});
|
|
});
|
|
|
|
// Login endpoint
|
|
app.post('/api/auth/login', (req, res, next) => {
|
|
passport.authenticate('ldapauth', (err, user, info) => {
|
|
if (err) {
|
|
console.error('LDAP authentication error:', err);
|
|
return res.status(500).json({ error: 'Authentication failed', details: err.message });
|
|
}
|
|
if (!user) {
|
|
return res.status(401).json({ error: 'Invalid credentials' });
|
|
}
|
|
req.logIn(user, (loginErr) => {
|
|
if (loginErr) {
|
|
return res.status(500).json({ error: 'Login failed', details: loginErr.message });
|
|
}
|
|
return res.json({
|
|
authenticated: true,
|
|
user: user
|
|
});
|
|
});
|
|
})(req, res, next);
|
|
});
|
|
|
|
// Logout endpoint
|
|
app.post('/api/auth/logout', (req, res) => {
|
|
req.logout((err) => {
|
|
if (err) {
|
|
return res.status(500).json({ error: 'Logout failed' });
|
|
}
|
|
res.json({ authenticated: false });
|
|
});
|
|
});
|
|
|
|
// API Routes
|
|
|
|
// Get all links
|
|
app.get('/api/links', async (req, res) => {
|
|
try {
|
|
let links;
|
|
|
|
// If user is not authenticated, only show links in public lists
|
|
if (!req.isAuthenticated()) {
|
|
// Get all public lists
|
|
const publicLists = await db.List.findAll({
|
|
where: { public: true }
|
|
});
|
|
const publicListIds = publicLists.map(list => list.id);
|
|
|
|
// Get links that are in at least one public list
|
|
links = await db.Link.findAll({
|
|
include: [{
|
|
model: db.List,
|
|
as: 'lists',
|
|
where: { id: { [Op.in]: publicListIds } },
|
|
required: true,
|
|
attributes: ['id']
|
|
}],
|
|
order: [['created_at', 'DESC']]
|
|
});
|
|
} else {
|
|
// Authenticated users see all links
|
|
links = await db.Link.findAll({
|
|
include: [{
|
|
model: db.List,
|
|
as: 'lists',
|
|
attributes: ['id']
|
|
}],
|
|
order: [['created_at', 'DESC']]
|
|
});
|
|
}
|
|
|
|
res.json(links.map(formatLink));
|
|
} catch (error) {
|
|
console.error('Error fetching links:', error);
|
|
res.status(500).json({ error: 'Failed to read links' });
|
|
}
|
|
});
|
|
|
|
// Search links
|
|
app.get('/api/links/search', async (req, res) => {
|
|
try {
|
|
const query = req.query.q?.toLowerCase() || '';
|
|
|
|
const whereClause = {};
|
|
if (query) {
|
|
whereClause[Op.or] = [
|
|
{ title: { [Op.iLike]: `%${query}%` } },
|
|
{ description: { [Op.iLike]: `%${query}%` } },
|
|
{ url: { [Op.iLike]: `%${query}%` } }
|
|
];
|
|
}
|
|
|
|
let links;
|
|
|
|
// If user is not authenticated, only show links in public lists
|
|
if (!req.isAuthenticated()) {
|
|
const publicLists = await db.List.findAll({
|
|
where: { public: true }
|
|
});
|
|
const publicListIds = publicLists.map(list => list.id);
|
|
|
|
links = await db.Link.findAll({
|
|
where: whereClause,
|
|
include: [{
|
|
model: db.List,
|
|
as: 'lists',
|
|
where: { id: { [Op.in]: publicListIds } },
|
|
required: true,
|
|
attributes: ['id']
|
|
}],
|
|
order: [['created_at', 'DESC']]
|
|
});
|
|
} else {
|
|
links = await db.Link.findAll({
|
|
where: whereClause,
|
|
include: [{
|
|
model: db.List,
|
|
as: 'lists',
|
|
attributes: ['id']
|
|
}],
|
|
order: [['created_at', 'DESC']]
|
|
});
|
|
}
|
|
|
|
res.json(links.map(formatLink));
|
|
} catch (error) {
|
|
console.error('Error searching links:', error);
|
|
res.status(500).json({ error: 'Failed to search links' });
|
|
}
|
|
});
|
|
|
|
// Add a new link
|
|
app.post('/api/links', isAuthenticated, async (req, res) => {
|
|
try {
|
|
const { url } = req.body;
|
|
|
|
if (!url || !isValidUrl(url)) {
|
|
return res.status(400).json({ error: 'Invalid URL' });
|
|
}
|
|
|
|
// Check if link already exists
|
|
const existingLink = await db.Link.findOne({ where: { url } });
|
|
if (existingLink) {
|
|
return res.status(409).json({ error: 'Link already exists' });
|
|
}
|
|
|
|
// Extract metadata
|
|
const metadata = await extractMetadata(url);
|
|
|
|
// Create new link
|
|
const newLink = await db.Link.create({
|
|
url: url,
|
|
title: metadata.title,
|
|
description: metadata.description,
|
|
image: metadata.image,
|
|
created_by: req.user?.username || null,
|
|
archived: false
|
|
});
|
|
|
|
// Reload with associations to get listIds
|
|
await newLink.reload({ include: [{ model: db.List, as: 'lists', attributes: ['id'] }] });
|
|
|
|
res.status(201).json(formatLink(newLink));
|
|
} catch (error) {
|
|
console.error('Error adding link:', error);
|
|
res.status(500).json({ error: 'Failed to add link' });
|
|
}
|
|
});
|
|
|
|
// Archive/Unarchive a link
|
|
app.patch('/api/links/:id/archive', isAuthenticated, async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const { archived } = req.body;
|
|
|
|
if (typeof archived !== 'boolean') {
|
|
return res.status(400).json({ error: 'archived must be a boolean' });
|
|
}
|
|
|
|
const link = await db.Link.findByPk(id, {
|
|
include: [{ model: db.List, as: 'lists', attributes: ['id'] }]
|
|
});
|
|
|
|
if (!link) {
|
|
return res.status(404).json({ error: 'Link not found' });
|
|
}
|
|
|
|
await link.update({
|
|
archived: archived,
|
|
modified_by: req.user?.username || null
|
|
});
|
|
|
|
await link.reload({ include: [{ model: db.List, as: 'lists', attributes: ['id'] }] });
|
|
|
|
res.json(formatLink(link));
|
|
} catch (error) {
|
|
console.error('Error updating link:', error);
|
|
res.status(500).json({ error: 'Failed to update link' });
|
|
}
|
|
});
|
|
|
|
// Delete a link
|
|
app.delete('/api/links/:id', isAuthenticated, async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const link = await db.Link.findByPk(id);
|
|
|
|
if (!link) {
|
|
return res.status(404).json({ error: 'Link not found' });
|
|
}
|
|
|
|
await link.destroy();
|
|
res.json({ message: 'Link deleted successfully' });
|
|
} catch (error) {
|
|
console.error('Error deleting link:', error);
|
|
res.status(500).json({ error: 'Failed to delete link' });
|
|
}
|
|
});
|
|
|
|
// Update link's lists
|
|
app.patch('/api/links/:id/lists', isAuthenticated, async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const { listIds } = req.body;
|
|
|
|
if (!Array.isArray(listIds)) {
|
|
return res.status(400).json({ error: 'listIds must be an array' });
|
|
}
|
|
|
|
const link = await db.Link.findByPk(id);
|
|
|
|
if (!link) {
|
|
return res.status(404).json({ error: 'Link not found' });
|
|
}
|
|
|
|
// Find all lists by IDs
|
|
const lists = await db.List.findAll({
|
|
where: { id: { [Op.in]: listIds } }
|
|
});
|
|
|
|
// Update relationships
|
|
await link.setLists(lists);
|
|
|
|
// Update modified fields
|
|
await link.update({
|
|
modified_by: req.user?.username || null
|
|
});
|
|
|
|
// Reload with associations
|
|
await link.reload({ include: [{ model: db.List, as: 'lists', attributes: ['id'] }] });
|
|
|
|
res.json(formatLink(link));
|
|
} catch (error) {
|
|
console.error('Error updating link lists:', error);
|
|
res.status(500).json({ error: 'Failed to update link lists' });
|
|
}
|
|
});
|
|
|
|
// Lists API Routes
|
|
|
|
// Get all lists
|
|
app.get('/api/lists', async (req, res) => {
|
|
try {
|
|
let lists;
|
|
|
|
// If user is not authenticated, only return public lists
|
|
if (!req.isAuthenticated()) {
|
|
lists = await db.List.findAll({
|
|
where: { public: true },
|
|
order: [['created_at', 'DESC']]
|
|
});
|
|
} else {
|
|
// Authenticated users see all lists
|
|
lists = await db.List.findAll({
|
|
order: [['created_at', 'DESC']]
|
|
});
|
|
}
|
|
|
|
res.json(lists.map(formatList));
|
|
} catch (error) {
|
|
console.error('Error fetching lists:', error);
|
|
res.status(500).json({ error: 'Failed to read lists' });
|
|
}
|
|
});
|
|
|
|
// Create a new list
|
|
app.post('/api/lists', isAuthenticated, async (req, res) => {
|
|
try {
|
|
const { name } = req.body;
|
|
|
|
if (!name || typeof name !== 'string' || name.trim().length === 0) {
|
|
return res.status(400).json({ error: 'List name is required' });
|
|
}
|
|
|
|
const trimmedName = name.trim();
|
|
|
|
// Check if list with same name already exists (case-insensitive)
|
|
const existingList = await db.List.findOne({
|
|
where: {
|
|
name: { [Op.iLike]: trimmedName }
|
|
}
|
|
});
|
|
|
|
if (existingList) {
|
|
return res.status(409).json({ error: 'List with this name already exists' });
|
|
}
|
|
|
|
const newList = await db.List.create({
|
|
name: trimmedName,
|
|
created_by: req.user?.username || null,
|
|
public: false
|
|
});
|
|
|
|
res.status(201).json(formatList(newList));
|
|
} catch (error) {
|
|
console.error('Error creating list:', error);
|
|
res.status(500).json({ error: 'Failed to create list' });
|
|
}
|
|
});
|
|
|
|
// Update a list
|
|
app.put('/api/lists/:id', isAuthenticated, async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const { name } = req.body;
|
|
|
|
if (!name || typeof name !== 'string' || name.trim().length === 0) {
|
|
return res.status(400).json({ error: 'List name is required' });
|
|
}
|
|
|
|
const list = await db.List.findByPk(id);
|
|
|
|
if (!list) {
|
|
return res.status(404).json({ error: 'List not found' });
|
|
}
|
|
|
|
const trimmedName = name.trim();
|
|
|
|
// Check if another list with same name exists (case-insensitive)
|
|
const existingList = await db.List.findOne({
|
|
where: {
|
|
id: { [Op.ne]: id },
|
|
name: { [Op.iLike]: trimmedName }
|
|
}
|
|
});
|
|
|
|
if (existingList) {
|
|
return res.status(409).json({ error: 'List with this name already exists' });
|
|
}
|
|
|
|
await list.update({
|
|
name: trimmedName,
|
|
modified_by: req.user?.username || null
|
|
});
|
|
|
|
res.json(formatList(list));
|
|
} catch (error) {
|
|
console.error('Error updating list:', error);
|
|
res.status(500).json({ error: 'Failed to update list' });
|
|
}
|
|
});
|
|
|
|
// Toggle list public status
|
|
app.patch('/api/lists/:id/public', isAuthenticated, async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const { public: isPublic } = req.body;
|
|
|
|
if (typeof isPublic !== 'boolean') {
|
|
return res.status(400).json({ error: 'public must be a boolean' });
|
|
}
|
|
|
|
const list = await db.List.findByPk(id);
|
|
|
|
if (!list) {
|
|
return res.status(404).json({ error: 'List not found' });
|
|
}
|
|
|
|
await list.update({
|
|
public: isPublic,
|
|
modified_by: req.user?.username || null
|
|
});
|
|
|
|
res.json(formatList(list));
|
|
} catch (error) {
|
|
console.error('Error updating list public status:', error);
|
|
res.status(500).json({ error: 'Failed to update list public status' });
|
|
}
|
|
});
|
|
|
|
// Delete a list
|
|
app.delete('/api/lists/:id', isAuthenticated, async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
const list = await db.List.findByPk(id);
|
|
|
|
if (!list) {
|
|
return res.status(404).json({ error: 'List not found' });
|
|
}
|
|
|
|
// CASCADE delete will automatically remove from link_lists junction table
|
|
await list.destroy();
|
|
|
|
res.json({ message: 'List deleted successfully' });
|
|
} catch (error) {
|
|
console.error('Error deleting list:', error);
|
|
res.status(500).json({ error: 'Failed to delete list' });
|
|
}
|
|
});
|
|
|
|
// Helper function to validate URL
|
|
function isValidUrl(string) {
|
|
try {
|
|
const url = new URL(string);
|
|
return url.protocol === 'http:' || url.protocol === 'https:';
|
|
} catch (_) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Initialize server
|
|
async function startServer() {
|
|
try {
|
|
// Initialize database (connect, run migrations, migrate JSON files)
|
|
await initializeDatabase();
|
|
|
|
// Start server
|
|
app.listen(PORT, () => {
|
|
console.log(`LinkDing server running on http://localhost:${PORT}`);
|
|
});
|
|
} catch (error) {
|
|
console.error('Failed to start server:', error);
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
startServer();
|
|
|